diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 4a881ef5ff56a..1e9367732e591 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -38,11 +38,11 @@ env: # LLVM POST-BRANCH bump version # LLVM POST-BRANCH add compiler test for ToT - 1, e.g. "Clang 17" # LLVM RELEASE bump remove compiler ToT - 3, e.g. "Clang 15" - LLVM_HEAD_VERSION: "18" # Used compiler, update POST-BRANCH. - LLVM_PREVIOUS_VERSION: "17" - LLVM_OLDEST_VERSION: "16" + LLVM_HEAD_VERSION: "19" # Used compiler, update POST-BRANCH. + LLVM_PREVIOUS_VERSION: "18" + LLVM_OLDEST_VERSION: "17" GCC_STABLE_VERSION: "13" - LLVM_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer-18" + LLVM_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer-19" CLANG_CRASH_DIAGNOSTICS_DIR: "crash_diagnostics" @@ -59,8 +59,8 @@ jobs: 'generic-cxx26', 'generic-modules' ] - cc: [ 'clang-18' ] - cxx: [ 'clang++-18' ] + cc: [ 'clang-19' ] + cxx: [ 'clang++-19' ] clang_tidy: [ 'ON' ] include: - config: 'generic-gcc' @@ -100,22 +100,22 @@ jobs: 'generic-cxx20', 'generic-cxx23' ] - cc: [ 'clang-18' ] - cxx: [ 'clang++-18' ] + cc: [ 'clang-19' ] + cxx: [ 'clang++-19' ] clang_tidy: [ 'ON' ] include: - config: 'generic-gcc-cxx11' cc: 'gcc-13' cxx: 'g++-13' clang_tidy: 'OFF' - - config: 'generic-cxx23' - cc: 'clang-16' - cxx: 'clang++-16' - clang_tidy: 'OFF' - config: 'generic-cxx23' cc: 'clang-17' cxx: 'clang++-17' clang_tidy: 'OFF' + - config: 'generic-cxx26' + cc: 'clang-18' + cxx: 'clang++-18' + clang_tidy: 'ON' steps: - uses: actions/checkout@v4 - name: ${{ matrix.config }} @@ -186,8 +186,8 @@ jobs: - name: ${{ matrix.config }} run: libcxx/utils/ci/run-buildbot ${{ matrix.config }} env: - CC: clang-18 - CXX: clang++-18 + CC: clang-19 + CXX: clang++-19 ENABLE_CLANG_TIDY: "OFF" - uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # v4.3.0 if: always() diff --git a/.github/workflows/release-lit.yml b/.github/workflows/release-lit.yml index 36b0b6edd518f..0316ba406041d 100644 --- a/.github/workflows/release-lit.yml +++ b/.github/workflows/release-lit.yml @@ -58,7 +58,7 @@ jobs: cd llvm/utils/lit # Remove 'dev' suffix from lit version. sed -i 's/ + "dev"//g' lit/__init__.py - python3 setup.py sdist + python3 setup.py sdist bdist_wheel - name: Upload lit to test.pypi.org uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/bolt/include/bolt/Profile/BoltAddressTranslation.h b/bolt/include/bolt/Profile/BoltAddressTranslation.h index caf907cc43da3..6a0a477f27583 100644 --- a/bolt/include/bolt/Profile/BoltAddressTranslation.h +++ b/bolt/include/bolt/Profile/BoltAddressTranslation.h @@ -19,6 +19,7 @@ #include namespace llvm { +class MCSymbol; class raw_ostream; namespace object { @@ -123,6 +124,13 @@ class BoltAddressTranslation { std::unordered_map> getBFBranches(uint64_t FuncOutputAddress) const; + /// For a given \p Symbol in the output binary and known \p InputOffset + /// return a corresponding pair of parent BinaryFunction and secondary entry + /// point in it. + std::pair + translateSymbol(const BinaryContext &BC, const MCSymbol &Symbol, + uint32_t InputOffset) const; + private: /// Helper to update \p Map by inserting one or more BAT entries reflecting /// \p BB for function located at \p FuncAddress. At least one entry will be @@ -158,6 +166,10 @@ class BoltAddressTranslation { /// Map a function to its secondary entry points vector std::unordered_map> SecondaryEntryPointsMap; + /// Return a secondary entry point ID for a function located at \p Address and + /// \p Offset within that function. + unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const; + /// Links outlined cold bocks to their original function std::map ColdPartSource; diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h index 882748627e7f5..4a9355dfceac9 100644 --- a/bolt/include/bolt/Profile/YAMLProfileWriter.h +++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h @@ -15,6 +15,7 @@ namespace llvm { namespace bolt { +class BoltAddressTranslation; class RewriteInstance; class YAMLProfileWriter { @@ -31,8 +32,16 @@ class YAMLProfileWriter { /// Save execution profile for that instance. std::error_code writeProfile(const RewriteInstance &RI); - static yaml::bolt::BinaryFunctionProfile convert(const BinaryFunction &BF, - bool UseDFS); + static yaml::bolt::BinaryFunctionProfile + convert(const BinaryFunction &BF, bool UseDFS, + const BoltAddressTranslation *BAT = nullptr); + + /// Set CallSiteInfo destination fields from \p Symbol and return a target + /// BinaryFunction for that symbol. + static const BinaryFunction * + setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, + const MCSymbol *Symbol, const BoltAddressTranslation *BAT, + uint32_t Offset = 0); }; } // namespace bolt diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp index cbf95a7db08b5..d16b7a94787c6 100644 --- a/bolt/lib/Core/Relocation.cpp +++ b/bolt/lib/Core/Relocation.cpp @@ -774,60 +774,95 @@ static bool isPCRelativeRISCV(uint64_t Type) { } bool Relocation::isSupported(uint64_t Type) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + return false; + case Triple::aarch64: return isSupportedAArch64(Type); - if (Arch == Triple::riscv64) + case Triple::riscv64: return isSupportedRISCV(Type); - return isSupportedX86(Type); + case Triple::x86_64: + return isSupportedX86(Type); + } } size_t Relocation::getSizeForType(uint64_t Type) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return getSizeForTypeAArch64(Type); - if (Arch == Triple::riscv64) + case Triple::riscv64: return getSizeForTypeRISCV(Type); - return getSizeForTypeX86(Type); + case Triple::x86_64: + return getSizeForTypeX86(Type); + } } bool Relocation::skipRelocationType(uint64_t Type) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return skipRelocationTypeAArch64(Type); - if (Arch == Triple::riscv64) + case Triple::riscv64: return skipRelocationTypeRISCV(Type); - return skipRelocationTypeX86(Type); + case Triple::x86_64: + return skipRelocationTypeX86(Type); + } } bool Relocation::skipRelocationProcess(uint64_t &Type, uint64_t Contents) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return skipRelocationProcessAArch64(Type, Contents); - if (Arch == Triple::riscv64) - skipRelocationProcessRISCV(Type, Contents); - return skipRelocationProcessX86(Type, Contents); + case Triple::riscv64: + return skipRelocationProcessRISCV(Type, Contents); + case Triple::x86_64: + return skipRelocationProcessX86(Type, Contents); + } } uint64_t Relocation::encodeValue(uint64_t Type, uint64_t Value, uint64_t PC) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return encodeValueAArch64(Type, Value, PC); - if (Arch == Triple::riscv64) + case Triple::riscv64: return encodeValueRISCV(Type, Value, PC); - return encodeValueX86(Type, Value, PC); + case Triple::x86_64: + return encodeValueX86(Type, Value, PC); + } } uint64_t Relocation::extractValue(uint64_t Type, uint64_t Contents, uint64_t PC) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return extractValueAArch64(Type, Contents, PC); - if (Arch == Triple::riscv64) + case Triple::riscv64: return extractValueRISCV(Type, Contents, PC); - return extractValueX86(Type, Contents, PC); + case Triple::x86_64: + return extractValueX86(Type, Contents, PC); + } } bool Relocation::isGOT(uint64_t Type) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return isGOTAArch64(Type); - if (Arch == Triple::riscv64) + case Triple::riscv64: return isGOTRISCV(Type); - return isGOTX86(Type); + case Triple::x86_64: + return isGOTX86(Type); + } } bool Relocation::isX86GOTPCRELX(uint64_t Type) { @@ -845,27 +880,42 @@ bool Relocation::isX86GOTPC64(uint64_t Type) { bool Relocation::isNone(uint64_t Type) { return Type == getNone(); } bool Relocation::isRelative(uint64_t Type) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return Type == ELF::R_AARCH64_RELATIVE; - if (Arch == Triple::riscv64) + case Triple::riscv64: return Type == ELF::R_RISCV_RELATIVE; - return Type == ELF::R_X86_64_RELATIVE; + case Triple::x86_64: + return Type == ELF::R_X86_64_RELATIVE; + } } bool Relocation::isIRelative(uint64_t Type) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return Type == ELF::R_AARCH64_IRELATIVE; - if (Arch == Triple::riscv64) + case Triple::riscv64: llvm_unreachable("not implemented"); - return Type == ELF::R_X86_64_IRELATIVE; + case Triple::x86_64: + return Type == ELF::R_X86_64_IRELATIVE; + } } bool Relocation::isTLS(uint64_t Type) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return isTLSAArch64(Type); - if (Arch == Triple::riscv64) + case Triple::riscv64: return isTLSRISCV(Type); - return isTLSX86(Type); + case Triple::x86_64: + return isTLSX86(Type); + } } bool Relocation::isInstructionReference(uint64_t Type) { @@ -882,49 +932,81 @@ bool Relocation::isInstructionReference(uint64_t Type) { } uint64_t Relocation::getNone() { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return ELF::R_AARCH64_NONE; - if (Arch == Triple::riscv64) + case Triple::riscv64: return ELF::R_RISCV_NONE; - return ELF::R_X86_64_NONE; + case Triple::x86_64: + return ELF::R_X86_64_NONE; + } } uint64_t Relocation::getPC32() { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return ELF::R_AARCH64_PREL32; - if (Arch == Triple::riscv64) + case Triple::riscv64: return ELF::R_RISCV_32_PCREL; - return ELF::R_X86_64_PC32; + case Triple::x86_64: + return ELF::R_X86_64_PC32; + } } uint64_t Relocation::getPC64() { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return ELF::R_AARCH64_PREL64; - if (Arch == Triple::riscv64) + case Triple::riscv64: llvm_unreachable("not implemented"); - return ELF::R_X86_64_PC64; + case Triple::x86_64: + return ELF::R_X86_64_PC64; + } } bool Relocation::isPCRelative(uint64_t Type) { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return isPCRelativeAArch64(Type); - if (Arch == Triple::riscv64) + case Triple::riscv64: return isPCRelativeRISCV(Type); - return isPCRelativeX86(Type); + case Triple::x86_64: + return isPCRelativeX86(Type); + } } uint64_t Relocation::getAbs64() { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return ELF::R_AARCH64_ABS64; - if (Arch == Triple::riscv64) + case Triple::riscv64: return ELF::R_RISCV_64; - return ELF::R_X86_64_64; + case Triple::x86_64: + return ELF::R_X86_64_64; + } } uint64_t Relocation::getRelative() { - if (Arch == Triple::aarch64) + switch (Arch) { + default: + llvm_unreachable("Unsupported architecture"); + case Triple::aarch64: return ELF::R_AARCH64_RELATIVE; - return ELF::R_X86_64_RELATIVE; + case Triple::riscv64: + llvm_unreachable("not implemented"); + case Triple::x86_64: + return ELF::R_X86_64_RELATIVE; + } } size_t Relocation::emit(MCStreamer *Streamer) const { @@ -991,9 +1073,16 @@ void Relocation::print(raw_ostream &OS) const { static const char *AArch64RelocNames[] = { #include "llvm/BinaryFormat/ELFRelocs/AArch64.def" }; - if (Arch == Triple::aarch64) + switch (Arch) { + default: + OS << "RType:" << Twine::utohexstr(Type); + break; + + case Triple::aarch64: OS << AArch64RelocNames[Type]; - else if (Arch == Triple::riscv64) { + break; + + case Triple::riscv64: // RISC-V relocations are not sequentially numbered so we cannot use an // array switch (Type) { @@ -1006,8 +1095,12 @@ void Relocation::print(raw_ostream &OS) const { break; #include "llvm/BinaryFormat/ELFRelocs/RISCV.def" } - } else + break; + + case Triple::x86_64: OS << X86RelocNames[Type]; + break; + } OS << ", 0x" << Twine::utohexstr(Offset); if (Symbol) { OS << ", " << Symbol->getName(); diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp index bcd4a457ce3b4..59d499f97be72 100644 --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -100,7 +100,7 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) { LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n"); LLVM_DEBUG(dbgs() << " Address reference: 0x" << Twine::utohexstr(Function.getOutputAddress()) << "\n"); - LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(OutputAddress))); + LLVM_DEBUG(dbgs() << formatv(" Hash: {0:x}\n", getBFHash(InputAddress))); LLVM_DEBUG(dbgs() << " Secondary Entry Points: " << NumSecondaryEntryPoints << '\n'); @@ -197,8 +197,9 @@ void BoltAddressTranslation::writeMaps(std::map &Maps, ? SecondaryEntryPointsMap[Address].size() : 0; if (Cold) { - size_t HotIndex = - std::distance(ColdPartSource.begin(), ColdPartSource.find(Address)); + auto HotEntryIt = Maps.find(ColdPartSource[Address]); + assert(HotEntryIt != Maps.end()); + size_t HotIndex = std::distance(Maps.begin(), HotEntryIt); encodeULEB128(HotIndex - PrevIndex, OS); PrevIndex = HotIndex; } else { @@ -207,7 +208,7 @@ void BoltAddressTranslation::writeMaps(std::map &Maps, LLVM_DEBUG(dbgs() << "Hash: " << formatv("{0:x}\n", BFHash)); OS.write(reinterpret_cast(&BFHash), 8); // Number of basic blocks - size_t NumBasicBlocks = getBBHashMap(HotInputAddress).getNumBasicBlocks(); + size_t NumBasicBlocks = NumBasicBlocksMap[HotInputAddress]; LLVM_DEBUG(dbgs() << "Basic blocks: " << NumBasicBlocks << '\n'); encodeULEB128(NumBasicBlocks, OS); // Secondary entry points @@ -425,8 +426,9 @@ void BoltAddressTranslation::dump(raw_ostream &OS) { for (const auto &MapEntry : Maps) { const uint64_t Address = MapEntry.first; const uint64_t HotAddress = fetchParentAddress(Address); + const bool IsHotFunction = HotAddress == 0; OS << "Function Address: 0x" << Twine::utohexstr(Address); - if (HotAddress == 0) + if (IsHotFunction) OS << formatv(", hash: {0:x}", getBFHash(Address)); OS << "\n"; OS << "BB mappings:\n"; @@ -443,6 +445,8 @@ void BoltAddressTranslation::dump(raw_ostream &OS) { OS << formatv(" hash: {0:x}", BBHashMap.getBBHash(Val)); OS << "\n"; } + if (IsHotFunction) + OS << "NumBlocks: " << NumBasicBlocksMap[Address] << '\n'; if (SecondaryEntryPointsMap.count(Address)) { const std::vector &SecondaryEntryPoints = SecondaryEntryPointsMap[Address]; @@ -574,6 +578,7 @@ void BoltAddressTranslation::saveMetadata(BinaryContext &BC) { // Set BF/BB metadata for (const BinaryBasicBlock &BB : BF) BBHashMap.addEntry(BB.getInputOffset(), BB.getIndex(), BB.getHash()); + NumBasicBlocksMap.emplace(BF.getAddress(), BF.size()); } } @@ -597,5 +602,49 @@ BoltAddressTranslation::getBFBranches(uint64_t OutputAddress) const { return Branches; } +unsigned +BoltAddressTranslation::getSecondaryEntryPointId(uint64_t Address, + uint32_t Offset) const { + auto FunctionIt = SecondaryEntryPointsMap.find(Address); + if (FunctionIt == SecondaryEntryPointsMap.end()) + return 0; + const std::vector &Offsets = FunctionIt->second; + auto OffsetIt = std::find(Offsets.begin(), Offsets.end(), Offset); + if (OffsetIt == Offsets.end()) + return 0; + // Adding one here because main entry point is not stored in BAT, and + // enumeration for secondary entry points starts with 1. + return OffsetIt - Offsets.begin() + 1; +} + +std::pair +BoltAddressTranslation::translateSymbol(const BinaryContext &BC, + const MCSymbol &Symbol, + uint32_t Offset) const { + // The symbol could be a secondary entry in a cold fragment. + uint64_t SymbolValue = cantFail(errorOrToExpected(BC.getSymbolValue(Symbol))); + + const BinaryFunction *Callee = BC.getFunctionForSymbol(&Symbol); + assert(Callee); + + // Containing function, not necessarily the same as symbol value. + const uint64_t CalleeAddress = Callee->getAddress(); + const uint32_t OutputOffset = SymbolValue - CalleeAddress; + + const uint64_t ParentAddress = fetchParentAddress(CalleeAddress); + const uint64_t HotAddress = ParentAddress ? ParentAddress : CalleeAddress; + + const BinaryFunction *ParentBF = BC.getBinaryFunctionAtAddress(HotAddress); + + const uint32_t InputOffset = + translate(CalleeAddress, OutputOffset, /*IsBranchSrc*/ false) + Offset; + + unsigned SecondaryEntryId{0}; + if (InputOffset) + SecondaryEntryId = getSecondaryEntryPointId(HotAddress, InputOffset); + + return std::pair(ParentBF, SecondaryEntryId); +} + } // namespace bolt } // namespace llvm diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 05099aa25ce22..71824e2cc0e97 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -2333,7 +2333,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, if (BAT->isBATFunction(Function.getAddress())) continue; BP.Functions.emplace_back( - YAMLProfileWriter::convert(Function, /*UseDFS=*/false)); + YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT)); } for (const auto &KV : NamesToBranches) { diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index 0f082086c1fc2..ef04ba0d21ad7 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -9,6 +9,7 @@ #include "bolt/Profile/YAMLProfileWriter.h" #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/BinaryFunction.h" +#include "bolt/Profile/BoltAddressTranslation.h" #include "bolt/Profile/ProfileReaderBase.h" #include "bolt/Rewrite/RewriteInstance.h" #include "llvm/Support/CommandLine.h" @@ -25,17 +26,19 @@ extern llvm::cl::opt ProfileUseDFS; namespace llvm { namespace bolt { -/// Set CallSiteInfo destination fields from \p Symbol and return a target -/// BinaryFunction for that symbol. -static const BinaryFunction *setCSIDestination(const BinaryContext &BC, - yaml::bolt::CallSiteInfo &CSI, - const MCSymbol *Symbol) { +const BinaryFunction *YAMLProfileWriter::setCSIDestination( + const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, + const MCSymbol *Symbol, const BoltAddressTranslation *BAT, + uint32_t Offset) { CSI.DestId = 0; // designated for unknown functions CSI.EntryDiscriminator = 0; + if (Symbol) { uint64_t EntryID = 0; - if (const BinaryFunction *const Callee = + if (const BinaryFunction *Callee = BC.getFunctionForSymbol(Symbol, &EntryID)) { + if (BAT && BAT->isBATFunction(Callee->getAddress())) + std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset); CSI.DestId = Callee->getFunctionNumber(); CSI.EntryDiscriminator = EntryID; return Callee; @@ -45,7 +48,8 @@ static const BinaryFunction *setCSIDestination(const BinaryContext &BC, } yaml::bolt::BinaryFunctionProfile -YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) { +YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, + const BoltAddressTranslation *BAT) { yaml::bolt::BinaryFunctionProfile YamlBF; const BinaryContext &BC = BF.getBinaryContext(); @@ -98,7 +102,8 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) { continue; for (const IndirectCallProfile &CSP : ICSP.get()) { StringRef TargetName = ""; - const BinaryFunction *Callee = setCSIDestination(BC, CSI, CSP.Symbol); + const BinaryFunction *Callee = + setCSIDestination(BC, CSI, CSP.Symbol, BAT); if (Callee) TargetName = Callee->getOneName(); CSI.Count = CSP.Count; @@ -109,7 +114,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS) { StringRef TargetName = ""; const MCSymbol *CalleeSymbol = BC.MIB->getTargetSymbol(Instr); const BinaryFunction *const Callee = - setCSIDestination(BC, CSI, CalleeSymbol); + setCSIDestination(BC, CSI, CalleeSymbol, BAT); if (Callee) TargetName = Callee->getOneName(); diff --git a/bolt/test/X86/bolt-address-translation-yaml.test b/bolt/test/X86/bolt-address-translation-yaml.test index 7fdf7709a8b9d..af24c3d84a0f1 100644 --- a/bolt/test/X86/bolt-address-translation-yaml.test +++ b/bolt/test/X86/bolt-address-translation-yaml.test @@ -36,9 +36,14 @@ YAML-BAT-CHECK-NEXT: - bid: 0 YAML-BAT-CHECK-NEXT: insns: 26 YAML-BAT-CHECK-NEXT: hash: 0xA900AE79CFD40000 YAML-BAT-CHECK-NEXT: succ: [ { bid: 3, cnt: 0 }, { bid: 1, cnt: 0 } ] +# Calls from no-BAT to BAT function +YAML-BAT-CHECK: - bid: 28 +YAML-BAT-CHECK-NEXT: insns: 13 +YAML-BAT-CHECK-NEXT: hash: 0xB2F04C1F25F00400 +YAML-BAT-CHECK-NEXT: calls: [ { off: 0x21, fid: [[#SOLVECUBIC:]], cnt: 25 }, { off: 0x2D, fid: [[#]], cnt: 9 } ] # Function covered by BAT with calls YAML-BAT-CHECK: - name: SolveCubic -YAML-BAT-CHECK-NEXT: fid: [[#]] +YAML-BAT-CHECK-NEXT: fid: [[#SOLVECUBIC]] YAML-BAT-CHECK-NEXT: hash: 0x6AF7E61EA3966722 YAML-BAT-CHECK-NEXT: exec: 25 YAML-BAT-CHECK-NEXT: nblocks: 15 diff --git a/bolt/test/X86/yaml-secondary-entry-discriminator.s b/bolt/test/X86/yaml-secondary-entry-discriminator.s index 43c2e2a7f0554..78e7e55aa98eb 100644 --- a/bolt/test/X86/yaml-secondary-entry-discriminator.s +++ b/bolt/test/X86/yaml-secondary-entry-discriminator.s @@ -11,17 +11,17 @@ # RUN: FileCheck %s -input-file %t.yaml # CHECK: - name: main # CHECK-NEXT: fid: 2 -# CHECK-NEXT: hash: 0xADF270D550151185 +# CHECK-NEXT: hash: {{.*}} # CHECK-NEXT: exec: 0 # CHECK-NEXT: nblocks: 4 # CHECK-NEXT: blocks: # CHECK: - bid: 1 # CHECK-NEXT: insns: 1 -# CHECK-NEXT: hash: 0x36A303CBA4360014 +# CHECK-NEXT: hash: {{.*}} # CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1 } ] # CHECK: - bid: 2 # CHECK-NEXT: insns: 5 -# CHECK-NEXT: hash: 0x8B2F5747CD0019 +# CHECK-NEXT: hash: {{.*}} # CHECK-NEXT: calls: [ { off: 0x0, fid: 1, disc: 1, cnt: 1, mis: 1 } ] # Make sure that the profile is attached correctly @@ -33,6 +33,11 @@ # CHECK-CFG: callq *%rax # Offset: [[#]] # CallProfile: 1 (1 misses) : # CHECK-CFG-NEXT: { secondary_entry: 1 (1 misses) } +# YAML BAT test of calling BAT secondary entry from non-BAT function +# Now force-split func and skip main (making it call secondary entries) +# RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \ +# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat + .globl func .type func, @function func: @@ -40,8 +45,16 @@ func: .cfi_startproc pushq %rbp movq %rsp, %rbp + # Placeholder code to make splitting profitable +.rept 5 + testq %rax, %rax +.endr .globl secondary_entry secondary_entry: + # Placeholder code to make splitting profitable +.rept 5 + testq %rax, %rax +.endr popq %rbp retq nopl (%rax) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp index 87fd8adf99708..bbb35228ce47f 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/MissingStdForwardCheck.cpp @@ -9,8 +9,8 @@ #include "MissingStdForwardCheck.h" #include "../utils/Matchers.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/ExprConcepts.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/IdentifierTable.h" using namespace clang::ast_matchers; @@ -79,6 +79,11 @@ AST_MATCHER_P(LambdaExpr, hasCaptureDefaultKind, LambdaCaptureDefault, Kind) { return Node.getCaptureDefault() == Kind; } +AST_MATCHER(VarDecl, hasIdentifier) { + const IdentifierInfo *ID = Node.getIdentifier(); + return ID != NULL && !ID->isPlaceholder(); +} + } // namespace void MissingStdForwardCheck::registerMatchers(MatchFinder *Finder) { @@ -125,12 +130,14 @@ void MissingStdForwardCheck::registerMatchers(MatchFinder *Finder) { hasAncestor(expr(hasUnevaluatedContext()))))); Finder->addMatcher( - parmVarDecl(parmVarDecl().bind("param"), isTemplateTypeParameter(), - hasAncestor(functionDecl().bind("func")), - hasAncestor(functionDecl( - isDefinition(), equalsBoundNode("func"), ToParam, - unless(anyOf(isDeleted(), hasDescendant(std::move( - ForwardCallMatcher))))))), + parmVarDecl( + parmVarDecl().bind("param"), hasIdentifier(), + unless(hasAttr(attr::Kind::Unused)), isTemplateTypeParameter(), + hasAncestor(functionDecl().bind("func")), + hasAncestor(functionDecl( + isDefinition(), equalsBoundNode("func"), ToParam, + unless(anyOf(isDeleted(), + hasDescendant(std::move(ForwardCallMatcher))))))), this); } diff --git a/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.cpp b/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.cpp index 8020f8cd06251..b1a18485ce168 100644 --- a/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.cpp +++ b/clang-tools-extra/clang-tidy/hicpp/IgnoredRemoveResultCheck.cpp @@ -14,9 +14,9 @@ IgnoredRemoveResultCheck::IgnoredRemoveResultCheck(llvm::StringRef Name, ClangTidyContext *Context) : UnusedReturnValueCheck(Name, Context, { - "::std::remove", - "::std::remove_if", - "::std::unique", + "::std::remove$", + "::std::remove_if$", + "::std::unique$", }) { // The constructor for ClangTidyCheck needs to have been called // before we can access options via Options.get(). diff --git a/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.cpp b/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.cpp index e3400f614fa56..48bca41f4a3b1 100644 --- a/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/AvoidReturnWithVoidValueCheck.cpp @@ -7,19 +7,18 @@ //===----------------------------------------------------------------------===// #include "AvoidReturnWithVoidValueCheck.h" -#include "clang/AST/Stmt.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" +#include "../utils/BracesAroundStatement.h" +#include "../utils/LexerUtils.h" using namespace clang::ast_matchers; namespace clang::tidy::readability { -static constexpr auto IgnoreMacrosName = "IgnoreMacros"; -static constexpr auto IgnoreMacrosDefault = true; +static constexpr char IgnoreMacrosName[] = "IgnoreMacros"; +static const bool IgnoreMacrosDefault = true; -static constexpr auto StrictModeName = "StrictMode"; -static constexpr auto StrictModeDefault = true; +static constexpr char StrictModeName[] = "StrictMode"; +static const bool StrictModeDefault = true; AvoidReturnWithVoidValueCheck::AvoidReturnWithVoidValueCheck( StringRef Name, ClangTidyContext *Context) @@ -32,7 +31,10 @@ void AvoidReturnWithVoidValueCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher( returnStmt( hasReturnValue(allOf(hasType(voidType()), unless(initListExpr()))), - optionally(hasParent(compoundStmt().bind("compound_parent")))) + optionally(hasParent( + compoundStmt( + optionally(hasParent(functionDecl().bind("function_parent")))) + .bind("compound_parent")))) .bind("void_return"), this); } @@ -42,10 +44,30 @@ void AvoidReturnWithVoidValueCheck::check( const auto *VoidReturn = Result.Nodes.getNodeAs("void_return"); if (IgnoreMacros && VoidReturn->getBeginLoc().isMacroID()) return; - if (!StrictMode && !Result.Nodes.getNodeAs("compound_parent")) + const auto *SurroundingBlock = + Result.Nodes.getNodeAs("compound_parent"); + if (!StrictMode && !SurroundingBlock) return; - diag(VoidReturn->getBeginLoc(), "return statement within a void function " - "should not have a specified return value"); + DiagnosticBuilder Diag = diag(VoidReturn->getBeginLoc(), + "return statement within a void function " + "should not have a specified return value"); + const SourceLocation SemicolonPos = utils::lexer::findNextTerminator( + VoidReturn->getEndLoc(), *Result.SourceManager, getLangOpts()); + if (SemicolonPos.isInvalid()) + return; + if (!SurroundingBlock) { + const auto BraceInsertionHints = utils::getBraceInsertionsHints( + VoidReturn, getLangOpts(), *Result.SourceManager, + VoidReturn->getBeginLoc()); + if (BraceInsertionHints) + Diag << BraceInsertionHints.openingBraceFixIt() + << BraceInsertionHints.closingBraceFixIt(); + } + Diag << FixItHint::CreateRemoval(VoidReturn->getReturnLoc()); + if (!Result.Nodes.getNodeAs("function_parent") || + SurroundingBlock->body_back() != VoidReturn) + Diag << FixItHint::CreateInsertion(SemicolonPos.getLocWithOffset(1), + " return;", true); } void AvoidReturnWithVoidValueCheck::storeOptions( diff --git a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp index 81ca33cbbdfb4..85bd9c1e4f9a0 100644 --- a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "BracesAroundStatementsCheck.h" +#include "../utils/BracesAroundStatement.h" #include "../utils/LexerUtils.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -17,12 +18,10 @@ using namespace clang::ast_matchers; namespace clang::tidy::readability { static tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM, - const ASTContext *Context) { + const LangOptions &LangOpts) { Token Tok; - SourceLocation Beginning = - Lexer::GetBeginningOfToken(Loc, SM, Context->getLangOpts()); - const bool Invalid = - Lexer::getRawToken(Beginning, Tok, SM, Context->getLangOpts()); + SourceLocation Beginning = Lexer::GetBeginningOfToken(Loc, SM, LangOpts); + const bool Invalid = Lexer::getRawToken(Beginning, Tok, SM, LangOpts); assert(!Invalid && "Expected a valid token."); if (Invalid) @@ -33,64 +32,21 @@ static tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM, static SourceLocation forwardSkipWhitespaceAndComments(SourceLocation Loc, const SourceManager &SM, - const ASTContext *Context) { + const LangOptions &LangOpts) { assert(Loc.isValid()); for (;;) { while (isWhitespace(*SM.getCharacterData(Loc))) Loc = Loc.getLocWithOffset(1); - tok::TokenKind TokKind = getTokenKind(Loc, SM, Context); + tok::TokenKind TokKind = getTokenKind(Loc, SM, LangOpts); if (TokKind != tok::comment) return Loc; // Fast-forward current token. - Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, Context->getLangOpts()); + Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); } } -static SourceLocation findEndLocation(const Stmt &S, const SourceManager &SM, - const ASTContext *Context) { - SourceLocation Loc = - utils::lexer::getUnifiedEndLoc(S, SM, Context->getLangOpts()); - if (!Loc.isValid()) - return Loc; - - // Start searching right after S. - Loc = Loc.getLocWithOffset(1); - - for (;;) { - assert(Loc.isValid()); - while (isHorizontalWhitespace(*SM.getCharacterData(Loc))) { - Loc = Loc.getLocWithOffset(1); - } - - if (isVerticalWhitespace(*SM.getCharacterData(Loc))) { - // EOL, insert brace before. - break; - } - tok::TokenKind TokKind = getTokenKind(Loc, SM, Context); - if (TokKind != tok::comment) { - // Non-comment token, insert brace before. - break; - } - - SourceLocation TokEndLoc = - Lexer::getLocForEndOfToken(Loc, 0, SM, Context->getLangOpts()); - SourceRange TokRange(Loc, TokEndLoc); - StringRef Comment = Lexer::getSourceText( - CharSourceRange::getTokenRange(TokRange), SM, Context->getLangOpts()); - if (Comment.starts_with("/*") && Comment.contains('\n')) { - // Multi-line block comment, insert brace before. - break; - } - // else: Trailing comment, insert brace after the newline. - - // Fast-forward current token. - Loc = TokEndLoc; - } - return Loc; -} - BracesAroundStatementsCheck::BracesAroundStatementsCheck( StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), @@ -124,7 +80,7 @@ void BracesAroundStatementsCheck::check( } else if (const auto *S = Result.Nodes.getNodeAs("do")) { checkStmt(Result, S->getBody(), S->getDoLoc(), S->getWhileLoc()); } else if (const auto *S = Result.Nodes.getNodeAs("while")) { - SourceLocation StartLoc = findRParenLoc(S, SM, Context); + SourceLocation StartLoc = findRParenLoc(S, SM, Context->getLangOpts()); if (StartLoc.isInvalid()) return; checkStmt(Result, S->getBody(), StartLoc); @@ -133,7 +89,7 @@ void BracesAroundStatementsCheck::check( if (S->isConsteval()) return; - SourceLocation StartLoc = findRParenLoc(S, SM, Context); + SourceLocation StartLoc = findRParenLoc(S, SM, Context->getLangOpts()); if (StartLoc.isInvalid()) return; if (ForceBracesStmts.erase(S)) @@ -156,7 +112,7 @@ template SourceLocation BracesAroundStatementsCheck::findRParenLoc(const IfOrWhileStmt *S, const SourceManager &SM, - const ASTContext *Context) { + const LangOptions &LangOpts) { // Skip macros. if (S->getBeginLoc().isMacroID()) return {}; @@ -170,14 +126,14 @@ BracesAroundStatementsCheck::findRParenLoc(const IfOrWhileStmt *S, } SourceLocation PastCondEndLoc = - Lexer::getLocForEndOfToken(CondEndLoc, 0, SM, Context->getLangOpts()); + Lexer::getLocForEndOfToken(CondEndLoc, 0, SM, LangOpts); if (PastCondEndLoc.isInvalid()) return {}; SourceLocation RParenLoc = - forwardSkipWhitespaceAndComments(PastCondEndLoc, SM, Context); + forwardSkipWhitespaceAndComments(PastCondEndLoc, SM, LangOpts); if (RParenLoc.isInvalid()) return {}; - tok::TokenKind TokKind = getTokenKind(RParenLoc, SM, Context); + tok::TokenKind TokKind = getTokenKind(RParenLoc, SM, LangOpts); if (TokKind != tok::r_paren) return {}; return RParenLoc; @@ -188,86 +144,23 @@ BracesAroundStatementsCheck::findRParenLoc(const IfOrWhileStmt *S, bool BracesAroundStatementsCheck::checkStmt( const MatchFinder::MatchResult &Result, const Stmt *S, SourceLocation StartLoc, SourceLocation EndLocHint) { - while (const auto *AS = dyn_cast(S)) S = AS->getSubStmt(); - const SourceManager &SM = *Result.SourceManager; - const ASTContext *Context = Result.Context; - - // 1) If there's a corresponding "else" or "while", the check inserts "} " - // right before that token. - // 2) If there's a multi-line block comment starting on the same line after - // the location we're inserting the closing brace at, or there's a non-comment - // token, the check inserts "\n}" right before that token. - // 3) Otherwise the check finds the end of line (possibly after some block or - // line comments) and inserts "\n}" right before that EOL. - if (!S || isa(S)) { - // Already inside braces. - return false; - } - - // When TreeTransform, Stmt in constexpr IfStmt will be transform to NullStmt. - // This NullStmt can be detected according to beginning token. - const SourceLocation StmtBeginLoc = S->getBeginLoc(); - if (isa(S) && StmtBeginLoc.isValid() && - getTokenKind(StmtBeginLoc, SM, Context) == tok::l_brace) - return false; - - if (StartLoc.isInvalid()) - return false; - - // Convert StartLoc to file location, if it's on the same macro expansion - // level as the start of the statement. We also need file locations for - // Lexer::getLocForEndOfToken working properly. - StartLoc = Lexer::makeFileCharRange( - CharSourceRange::getCharRange(StartLoc, S->getBeginLoc()), SM, - Context->getLangOpts()) - .getBegin(); - if (StartLoc.isInvalid()) - return false; - StartLoc = - Lexer::getLocForEndOfToken(StartLoc, 0, SM, Context->getLangOpts()); - - // StartLoc points at the location of the opening brace to be inserted. - SourceLocation EndLoc; - std::string ClosingInsertion; - if (EndLocHint.isValid()) { - EndLoc = EndLocHint; - ClosingInsertion = "} "; - } else { - EndLoc = findEndLocation(*S, SM, Context); - ClosingInsertion = "\n}"; - } - - assert(StartLoc.isValid()); - - // Don't require braces for statements spanning less than certain number of - // lines. - if (ShortStatementLines && !ForceBracesStmts.erase(S)) { - unsigned StartLine = SM.getSpellingLineNumber(StartLoc); - unsigned EndLine = SM.getSpellingLineNumber(EndLoc); - if (EndLine - StartLine < ShortStatementLines) + const auto BraceInsertionHints = utils::getBraceInsertionsHints( + S, Result.Context->getLangOpts(), *Result.SourceManager, StartLoc, + EndLocHint); + if (BraceInsertionHints) { + if (ShortStatementLines && !ForceBracesStmts.erase(S) && + BraceInsertionHints.resultingCompoundLineExtent(*Result.SourceManager) < + ShortStatementLines) return false; + auto Diag = diag(BraceInsertionHints.DiagnosticPos, + "statement should be inside braces"); + if (BraceInsertionHints.offersFixIts()) + Diag << BraceInsertionHints.openingBraceFixIt() + << BraceInsertionHints.closingBraceFixIt(); } - - auto Diag = diag(StartLoc, "statement should be inside braces"); - - // Change only if StartLoc and EndLoc are on the same macro expansion level. - // This will also catch invalid EndLoc. - // Example: LLVM_DEBUG( for(...) do_something() ); - // In this case fix-it cannot be provided as the semicolon which is not - // visible here is part of the macro. Adding braces here would require adding - // another semicolon. - if (Lexer::makeFileCharRange( - CharSourceRange::getTokenRange(SourceRange( - SM.getSpellingLoc(StartLoc), SM.getSpellingLoc(EndLoc))), - SM, Context->getLangOpts()) - .isInvalid()) - return false; - - Diag << FixItHint::CreateInsertion(StartLoc, " {") - << FixItHint::CreateInsertion(EndLoc, ClosingInsertion); return true; } diff --git a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h index 249aa1aaaa915..4cd37a7b2dd6c 100644 --- a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h +++ b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h @@ -52,7 +52,7 @@ class BracesAroundStatementsCheck : public ClangTidyCheck { SourceLocation EndLocHint = SourceLocation()); template SourceLocation findRParenLoc(const IfOrWhileStmt *S, const SourceManager &SM, - const ASTContext *Context); + const LangOptions &LangOpts); std::optional getCheckTraversalKind() const override { return TK_IgnoreUnlessSpelledInSource; } diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp index 67147164946ab..229e5583846b9 100644 --- a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp @@ -79,6 +79,10 @@ void DuplicateIncludeCallbacks::InclusionDirective( bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule, bool ModuleImported, SrcMgr::CharacteristicKind FileType) { + // Skip includes behind macros + if (FilenameRange.getBegin().isMacroID() || + FilenameRange.getEnd().isMacroID()) + return; if (llvm::is_contained(Files.back(), FileName)) { // We want to delete the entire line, so make sure that [Start,End] covers // everything. diff --git a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp new file mode 100644 index 0000000000000..2a3b7bed08c1e --- /dev/null +++ b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.cpp @@ -0,0 +1,168 @@ +//===--- BracesAroundStatement.cpp - clang-tidy -------- ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides utilities to put braces around a statement. +/// +//===----------------------------------------------------------------------===// + +#include "BracesAroundStatement.h" +#include "../utils/LexerUtils.h" +#include "LexerUtils.h" +#include "clang/AST/ASTContext.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Lex/Lexer.h" + +namespace clang::tidy::utils { + +BraceInsertionHints::operator bool() const { return DiagnosticPos.isValid(); } + +bool BraceInsertionHints::offersFixIts() const { + return OpeningBracePos.isValid() && ClosingBracePos.isValid(); +} + +unsigned BraceInsertionHints::resultingCompoundLineExtent( + const SourceManager &SourceMgr) const { + return SourceMgr.getSpellingLineNumber(ClosingBracePos) - + SourceMgr.getSpellingLineNumber(OpeningBracePos); +} + +FixItHint BraceInsertionHints::openingBraceFixIt() const { + return OpeningBracePos.isValid() + ? FixItHint::CreateInsertion(OpeningBracePos, " {") + : FixItHint(); +} + +FixItHint BraceInsertionHints::closingBraceFixIt() const { + return ClosingBracePos.isValid() + ? FixItHint::CreateInsertion(ClosingBracePos, ClosingBrace) + : FixItHint(); +} + +static tok::TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + Token Tok; + SourceLocation Beginning = Lexer::GetBeginningOfToken(Loc, SM, LangOpts); + const bool Invalid = Lexer::getRawToken(Beginning, Tok, SM, LangOpts); + assert(!Invalid && "Expected a valid token."); + + if (Invalid) + return tok::NUM_TOKENS; + + return Tok.getKind(); +} + +static SourceLocation findEndLocation(const Stmt &S, const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation Loc = lexer::getUnifiedEndLoc(S, SM, LangOpts); + if (!Loc.isValid()) + return Loc; + + // Start searching right after S. + Loc = Loc.getLocWithOffset(1); + + for (;;) { + assert(Loc.isValid()); + while (isHorizontalWhitespace(*SM.getCharacterData(Loc))) { + Loc = Loc.getLocWithOffset(1); + } + + if (isVerticalWhitespace(*SM.getCharacterData(Loc))) { + // EOL, insert brace before. + break; + } + tok::TokenKind TokKind = getTokenKind(Loc, SM, LangOpts); + if (TokKind != tok::comment) { + // Non-comment token, insert brace before. + break; + } + + SourceLocation TokEndLoc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); + SourceRange TokRange(Loc, TokEndLoc); + StringRef Comment = Lexer::getSourceText( + CharSourceRange::getTokenRange(TokRange), SM, LangOpts); + if (Comment.starts_with("/*") && Comment.contains('\n')) { + // Multi-line block comment, insert brace before. + break; + } + // else: Trailing comment, insert brace after the newline. + + // Fast-forward current token. + Loc = TokEndLoc; + } + return Loc; +} + +BraceInsertionHints getBraceInsertionsHints(const Stmt *const S, + const LangOptions &LangOpts, + const SourceManager &SM, + SourceLocation StartLoc, + SourceLocation EndLocHint) { + // 1) If there's a corresponding "else" or "while", the check inserts "} " + // right before that token. + // 2) If there's a multi-line block comment starting on the same line after + // the location we're inserting the closing brace at, or there's a non-comment + // token, the check inserts "\n}" right before that token. + // 3) Otherwise the check finds the end of line (possibly after some block or + // line comments) and inserts "\n}" right before that EOL. + if (!S || isa(S)) { + // Already inside braces. + return {}; + } + + // When TreeTransform, Stmt in constexpr IfStmt will be transform to NullStmt. + // This NullStmt can be detected according to beginning token. + const SourceLocation StmtBeginLoc = S->getBeginLoc(); + if (isa(S) && StmtBeginLoc.isValid() && + getTokenKind(StmtBeginLoc, SM, LangOpts) == tok::l_brace) + return {}; + + if (StartLoc.isInvalid()) + return {}; + + // Convert StartLoc to file location, if it's on the same macro expansion + // level as the start of the statement. We also need file locations for + // Lexer::getLocForEndOfToken working properly. + StartLoc = Lexer::makeFileCharRange( + CharSourceRange::getCharRange(StartLoc, S->getBeginLoc()), SM, + LangOpts) + .getBegin(); + if (StartLoc.isInvalid()) + return {}; + StartLoc = Lexer::getLocForEndOfToken(StartLoc, 0, SM, LangOpts); + + // StartLoc points at the location of the opening brace to be inserted. + SourceLocation EndLoc; + std::string ClosingInsertion; + if (EndLocHint.isValid()) { + EndLoc = EndLocHint; + ClosingInsertion = "} "; + } else { + EndLoc = findEndLocation(*S, SM, LangOpts); + ClosingInsertion = "\n}"; + } + + assert(StartLoc.isValid()); + + // Change only if StartLoc and EndLoc are on the same macro expansion level. + // This will also catch invalid EndLoc. + // Example: LLVM_DEBUG( for(...) do_something() ); + // In this case fix-it cannot be provided as the semicolon which is not + // visible here is part of the macro. Adding braces here would require adding + // another semicolon. + if (Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(SourceRange( + SM.getSpellingLoc(StartLoc), SM.getSpellingLoc(EndLoc))), + SM, LangOpts) + .isInvalid()) + return {StartLoc}; + return {StartLoc, EndLoc, ClosingInsertion}; +} + +} // namespace clang::tidy::utils diff --git a/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h new file mode 100644 index 0000000000000..cb1c06c7aa1a1 --- /dev/null +++ b/clang-tools-extra/clang-tidy/utils/BracesAroundStatement.h @@ -0,0 +1,75 @@ +//===--- BracesAroundStatement.h - clang-tidy ------- -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file provides utilities to put braces around a statement. +/// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Stmt.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" + +namespace clang::tidy::utils { + +/// A provider of fix-it hints to insert opening and closing braces. An instance +/// of this type is the result of calling `getBraceInsertionsHints` below. +struct BraceInsertionHints { + /// The position of a potential diagnostic. It coincides with the position of + /// the opening brace to insert, but can also just be the place to show a + /// diagnostic in case braces cannot be inserted automatically. + SourceLocation DiagnosticPos; + + /// Constructor for a no-hint. + BraceInsertionHints() = default; + + /// Constructor for a valid hint that cannot insert braces automatically. + BraceInsertionHints(SourceLocation DiagnosticPos) + : DiagnosticPos(DiagnosticPos) {} + + /// Constructor for a hint offering fix-its for brace insertion. Both + /// positions must be valid. + BraceInsertionHints(SourceLocation OpeningBracePos, + SourceLocation ClosingBracePos, std::string ClosingBrace) + : DiagnosticPos(OpeningBracePos), OpeningBracePos(OpeningBracePos), + ClosingBracePos(ClosingBracePos), ClosingBrace(ClosingBrace) { + assert(offersFixIts()); + } + + /// Indicates whether the hint provides at least the position of a diagnostic. + operator bool() const; + + /// Indicates whether the hint provides fix-its to insert braces. + bool offersFixIts() const; + + /// The number of lines between the inserted opening brace and its closing + /// counterpart. + unsigned resultingCompoundLineExtent(const SourceManager &SourceMgr) const; + + /// Fix-it to insert an opening brace. + FixItHint openingBraceFixIt() const; + + /// Fix-it to insert a closing brace. + FixItHint closingBraceFixIt() const; + +private: + SourceLocation OpeningBracePos; + SourceLocation ClosingBracePos; + std::string ClosingBrace; +}; + +/// Create fix-it hints for braces that wrap the given statement when applied. +/// The algorithm computing them respects comment before and after the statement +/// and adds line breaks before the braces accordingly. +BraceInsertionHints +getBraceInsertionsHints(const Stmt *const S, const LangOptions &LangOpts, + const SourceManager &SM, SourceLocation StartLoc, + SourceLocation EndLocHint = SourceLocation()); + +} // namespace clang::tidy::utils diff --git a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt index f0160fa9df748..9cff7d475425d 100644 --- a/clang-tools-extra/clang-tidy/utils/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/utils/CMakeLists.txt @@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS add_clang_library(clangTidyUtils Aliasing.cpp ASTUtils.cpp + BracesAroundStatement.cpp DeclRefExprUtils.cpp DesignatedInitializers.cpp ExceptionAnalyzer.cpp diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index da1433aa2d05d..69b7d40ef628d 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -31,13 +31,12 @@ struct DenseMapInfo { using NamingCheckId = clang::tidy::RenamerClangTidyCheck::NamingCheckId; static inline NamingCheckId getEmptyKey() { - return {DenseMapInfo::getEmptyKey(), - "EMPTY"}; + return {DenseMapInfo::getEmptyKey(), "EMPTY"}; } static inline NamingCheckId getTombstoneKey() { return {DenseMapInfo::getTombstoneKey(), - "TOMBSTONE"}; + "TOMBSTONE"}; } static unsigned getHashValue(NamingCheckId Val) { @@ -367,6 +366,23 @@ class RenamerClangTidyVisitor return true; } + bool VisitDesignatedInitExpr(DesignatedInitExpr *Expr) { + for (const DesignatedInitExpr::Designator &D : Expr->designators()) { + if (!D.isFieldDesignator()) + continue; + const FieldDecl *FD = D.getFieldDecl(); + if (!FD) + continue; + const IdentifierInfo *II = FD->getIdentifier(); + if (!II) + continue; + SourceRange FixLocation{D.getFieldLoc(), D.getFieldLoc()}; + Check->addUsage(FD, FixLocation, SM); + } + + return true; + } + private: RenamerClangTidyCheck *Check; const SourceManager *SM; diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 5b8128fca6266..fddfffe7523d9 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -466,7 +466,8 @@ llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { static constexpr llvm::ArrayRef NAME( \ NAME##_init, std::size(NAME##_init) - 1); #define OPTION(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELP, METAVAR, VALUES) \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES) \ Prefixes[DriverID::OPT_##ID] = PREFIX; #include "clang/Driver/Options.inc" #undef OPTION @@ -478,7 +479,8 @@ llvm::ArrayRef ArgStripper::rulesFor(llvm::StringRef Arg) { const void *AliasArgs; } AliasTable[] = { #define OPTION(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELP, METAVAR, VALUES) \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ + METAVAR, VALUES) \ {DriverID::OPT_##ID, DriverID::OPT_##ALIAS, ALIASARGS}, #include "clang/Driver/Options.inc" #undef OPTION diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 309b844615a12..a7193e90c38da 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -179,8 +179,9 @@ Changes in existing checks - Improved :doc:`cppcoreguidelines-missing-std-forward ` check by no longer - giving false positives for deleted functions and fix false negative when some - parameters are forwarded, but other aren't. + giving false positives for deleted functions, by fixing false negatives when only + a few parameters are forwarded and by ignoring parameters without a name (unused + arguments). - Improved :doc:`cppcoreguidelines-owning-memory ` check to properly handle @@ -210,6 +211,10 @@ Changes in existing checks - Improved :doc:`google-runtime-int ` check performance through optimizations. +- Improved :doc:`hicpp-ignored-remove-result ` + check by ignoring other functions with same prefixes as the target specific + functions. + - Improved :doc:`llvm-header-guard ` check by replacing the local option `HeaderFileExtensions` by the global option of the same name. @@ -251,10 +256,19 @@ Changes in existing checks analyzed, se the check now handles the common patterns `const auto e = (*vector_ptr)[i]` and `const auto e = vector_ptr->at(i);`. +- Improved :doc:`readability-avoid-return-with-void-value + ` check by adding + fix-its. + +- Improved :doc:`readability-duplicate-include + ` check by excluding include + directives that form the filename using macro. + - Improved :doc:`readability-identifier-naming ` check in `GetConfigPerFile` mode by resolving symbolic links to header files. Fixed handling of Hungarian - Prefix when configured to `LowerCase`. + Prefix when configured to `LowerCase`. Added support for renaming designated + initializers. - Improved :doc:`readability-implicit-bool-conversion ` check to provide diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp index 9a50eabf619bd..8116db58c937d 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines/missing-std-forward.cpp @@ -198,3 +198,16 @@ struct S { }; } // namespace deleted_functions + +namespace unused_arguments { + +template +void unused_argument1(F&&) {} + +template +void unused_argument2([[maybe_unused]] F&& f) {} + +template +void unused_argument3(F&& _) {} + +} // namespace unused_arguments diff --git a/clang-tools-extra/test/clang-tidy/checkers/hicpp/ignored-remove-result.cpp b/clang-tools-extra/test/clang-tidy/checkers/hicpp/ignored-remove-result.cpp index b068f08590989..fc431024303ab 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/hicpp/ignored-remove-result.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/hicpp/ignored-remove-result.cpp @@ -15,6 +15,10 @@ ForwardIt unique(ForwardIt, ForwardIt); template InputIt find(InputIt, InputIt, const T&); +struct unique_disposable { + void* release(); +}; + class error_code { }; @@ -63,4 +67,6 @@ void noWarning() { // bugprone-unused-return-value's checked return types. errorFunc(); (void) errorFunc(); + + std::unique_disposable{}.release(); } diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/avoid-return-with-void-value.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/avoid-return-with-void-value.cpp index f00407c99ce57..7c948dba3e8f7 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/avoid-return-with-void-value.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/avoid-return-with-void-value.cpp @@ -12,23 +12,30 @@ void f2() { return f1(); // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] // CHECK-MESSAGES-LENIENT: :[[@LINE-2]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: f1(); } void f3(bool b) { if (b) return f1(); // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: if (b) { f1(); return; + // CHECK-NEXT: } return f2(); // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] // CHECK-MESSAGES-LENIENT: :[[@LINE-2]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: f2(); + // CHECK-FIXES-LENIENT: f2(); } template T f4() {} void f5() { - return f4(); - // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] - // CHECK-MESSAGES-LENIENT: :[[@LINE-2]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + { return f4(); } + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-MESSAGES-LENIENT: :[[@LINE-2]]:7: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: { f4(); return; } + // CHECK-FIXES-LENIENT: { f4(); return; } } void f6() { return; } @@ -41,6 +48,8 @@ void f9() { return (void)f7(); // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] // CHECK-MESSAGES-LENIENT: :[[@LINE-2]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: (void)f7(); + // CHECK-FIXES-LENIENT: (void)f7(); } #define RETURN_VOID return (void)1 @@ -50,12 +59,12 @@ void f10() { // CHECK-MESSAGES-INCLUDE-MACROS: :[[@LINE-1]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] } -template +template struct C { C(A) {} }; -template +template C f11() { return {}; } using VOID = void; @@ -66,4 +75,36 @@ VOID f13() { return f12(); // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] // CHECK-MESSAGES-LENIENT: :[[@LINE-2]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: f12(); return; + // CHECK-FIXES-LENIENT: f12(); return; + (void)1; +} + +void f14() { + return /* comment */ f1() /* comment */ ; + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-MESSAGES-LENIENT: :[[@LINE-2]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: /* comment */ f1() /* comment */ ; return; + // CHECK-FIXES-LENIENT: /* comment */ f1() /* comment */ ; return; + (void)1; +} + +void f15() { + return/*comment*/f1()/*comment*/;//comment + // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-MESSAGES-LENIENT: :[[@LINE-2]]:5: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: /*comment*/f1()/*comment*/; return;//comment + // CHECK-FIXES-LENIENT: /*comment*/f1()/*comment*/; return;//comment + (void)1; +} + +void f16(bool b) { + if (b) return f1(); + // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: if (b) { f1(); return; + // CHECK-NEXT: } + else return f2(); + // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: return statement within a void function should not have a specified return value [readability-avoid-return-with-void-value] + // CHECK-FIXES: else { f2(); return; + // CHECK-NEXT: } } diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/duplicate-include.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/duplicate-include.cpp index dd954c705514f..2119602ba454b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/duplicate-include.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/duplicate-include.cpp @@ -70,3 +70,18 @@ int r; // CHECK-FIXES: {{^int q;$}} // CHECK-FIXES-NEXT: {{^#include $}} // CHECK-FIXES-NEXT: {{^int r;$}} + +namespace Issue_87303 { +#define RESET_INCLUDE_CACHE +// Expect no warnings + +#define MACRO_FILENAME "duplicate-include.h" +#include MACRO_FILENAME +#include "duplicate-include.h" + +#define MACRO_FILENAME_2 +#include +#include MACRO_FILENAME_2 + +#undef RESET_INCLUDE_CACHE +} // Issue_87303 diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp index d2e89a7c9855c..57ef4aae5ddb7 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming.cpp @@ -766,3 +766,13 @@ STATIC_MACRO void someFunc(MyFunPtr, const MyFunPtr****) {} // CHECK-FIXES: {{^}}STATIC_MACRO void someFunc(my_fun_ptr_t, const my_fun_ptr_t****) {} #undef STATIC_MACRO } + +struct Some_struct { + int SomeMember; +// CHECK-MESSAGES: :[[@LINE-1]]:7: warning: invalid case style for public member 'SomeMember' [readability-identifier-naming] +// CHECK-FIXES: {{^}} int some_member; +}; +Some_struct g_s1{ .SomeMember = 1 }; +// CHECK-FIXES: {{^}}Some_struct g_s1{ .some_member = 1 }; +Some_struct g_s2{.SomeMember=1}; +// CHECK-FIXES: {{^}}Some_struct g_s2{.some_member=1}; diff --git a/clang/cmake/caches/Apple-stage2.cmake b/clang/cmake/caches/Apple-stage2.cmake index 72cdedd611bc9..ede256a2da6b8 100644 --- a/clang/cmake/caches/Apple-stage2.cmake +++ b/clang/cmake/caches/Apple-stage2.cmake @@ -15,6 +15,7 @@ set(LLVM_ENABLE_ZLIB ON CACHE BOOL "") set(LLVM_ENABLE_BACKTRACES OFF CACHE BOOL "") set(LLVM_ENABLE_MODULES ON CACHE BOOL "") set(LLVM_EXTERNALIZE_DEBUGINFO ON CACHE BOOL "") +set(LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES OFF CACHE BOOL "") set(CLANG_PLUGIN_SUPPORT OFF CACHE BOOL "") set(CLANG_SPAWN_CC1 ON CACHE BOOL "") set(BUG_REPORT_URL "http://developer.apple.com/bugreporter/" CACHE STRING "") diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8fc925350849c..f96cebbde3d82 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -110,6 +110,10 @@ C++20 Feature Support templates (`P1814R0 `_). (#GH54051). +- We have sufficient confidence and experience with the concepts implementation + to update the ``__cpp_concepts`` macro to `202002L`. This enables + ```` from libstdc++ to work correctly with Clang. + C++23 Feature Support ^^^^^^^^^^^^^^^^^^^^^ @@ -198,6 +202,10 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ +- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and + sign change. +- ``-fsanitize=implicit-integer-conversion`` a group that replaces the previous + group ``-fsanitize=implicit-conversion``. - ``-Wmissing-designated-field-initializers``, grouped under ``-Wmissing-field-initializers``. This diagnostic can be disabled to make ``-Wmissing-field-initializers`` behave @@ -211,6 +219,9 @@ Modified Compiler Flags - Added a new diagnostic flag ``-Wreturn-mismatch`` which is grouped under ``-Wreturn-type``, and moved some of the diagnostics previously controlled by ``-Wreturn-type`` under this new flag. Fixes #GH72116. +- ``-fsanitize=implicit-conversion`` is now a group for both + ``-fsanitize=implicit-integer-conversion`` and + ``-fsanitize=implicit-bitfield-conversion``. - Added ``-Wcast-function-type-mismatch`` under the ``-Wcast-function-type`` warning group. Moved the diagnostic previously controlled by @@ -334,11 +345,27 @@ Improvements to Clang's diagnostics - Clang now emits ``unused argument`` warning when the -fmodule-output flag is used with an input that is not of type c++-module. +- Clang emits a ``-Wreturn-stack-address`` warning if a function returns a pointer or + reference to a struct literal. Fixes #GH8678 + +- Clang emits a ``-Wunused-but-set-variable`` warning on C++ variables whose declaration + (with initializer) entirely consist the condition expression of a if/while/for construct + but are not actually used in the body of the if/while/for construct. Fixes #GH41447 + +- Clang emits a diagnostic when a tentative array definition is assumed to have + a single element, but that diagnostic was never given a diagnostic group. + Added the ``-Wtentative-definition-array`` warning group to cover this. + Fixes #GH87766 + Improvements to Clang's time-trace ---------------------------------- Bug Fixes in This Version ------------------------- +- Clang's ``-Wundefined-func-template`` no longer warns on pure virtual + functions. + (`#74016 `_) + - Fixed missing warnings when comparing mismatched enumeration constants in C (`#29217 `). @@ -473,6 +500,8 @@ Bug Fixes to C++ Support when one of the function had more specialized templates. Fixes (`#82509 `_) and (`#74494 `_) +- Clang now supports direct lambda calls inside of a type alias template declarations. + This addresses (#GH70601), (#GH76674), (#GH79555), (#GH81145) and (#GH82104). - Allow access to a public template alias declaration that refers to friend's private nested type. (#GH25708). - Fixed a crash in constant evaluation when trying to access a @@ -491,9 +520,15 @@ Bug Fixes to C++ Support - Fix an issue caused by not handling invalid cases when substituting into the parameter mapping of a constraint. Fixes (#GH86757). - Fixed a bug that prevented member function templates of class templates declared with a deduced return type from being explicitly specialized for a given implicit instantiation of the class template. +- Fixed a crash when ``this`` is used in a dependent class scope function template specialization + that instantiates to a static member function. - Fix crash when inheriting from a cv-qualified type. Fixes: (`#35603 `_) +- Fix a crash when the using enum declaration uses an anonymous enumeration. Fixes (#GH86790). +- Clang now correctly tracks type dependence of by-value captures in lambdas with an explicit + object parameter. + Fixes (#GH70604), (#GH79754), (#GH84163), (#GH84425), (#GH86054), (#GH86398), and (#GH86399). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -616,6 +651,7 @@ AST Matchers - Add ``isExplicitObjectMemberFunction``. - Fixed ``forEachArgumentWithParam`` and ``forEachArgumentWithParamType`` to not skip the explicit object parameter for operator calls. +- Fixed captureVars assertion failure if not capturesVariables. (#GH76425) clang-format ------------ diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 8f58c92bd2a16..531d56e313826 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -148,6 +148,11 @@ Available checks are: Issues caught by this sanitizer are not undefined behavior, but are often unintentional. - ``-fsanitize=integer-divide-by-zero``: Integer division by zero. + - ``-fsanitize=implicit-bitfield-conversion``: Implicit conversion from + integer of larger bit width to smaller bitfield, if that results in data + loss. This includes unsigned/signed truncations and sign changes, similarly + to how the ``-fsanitize=implicit-integer-conversion`` group works, but + explicitly for bitfields. - ``-fsanitize=nonnull-attribute``: Passing null pointer as a function parameter which is declared to never be null. - ``-fsanitize=null``: Use of a null pointer or creation of a null @@ -193,8 +198,8 @@ Available checks are: signed division overflow (``INT_MIN/-1``). Note that checks are still added even when ``-fwrapv`` is enabled. This sanitizer does not check for lossy implicit conversions performed before the computation (see - ``-fsanitize=implicit-conversion``). Both of these two issues are handled - by ``-fsanitize=implicit-conversion`` group of checks. + ``-fsanitize=implicit-integer-conversion``). Both of these two issues are handled + by ``-fsanitize=implicit-integer-conversion`` group of checks. - ``-fsanitize=unreachable``: If control flow reaches an unreachable program point. - ``-fsanitize=unsigned-integer-overflow``: Unsigned integer overflow, where @@ -202,7 +207,7 @@ Available checks are: type. Unlike signed integer overflow, this is not undefined behavior, but it is often unintentional. This sanitizer does not check for lossy implicit conversions performed before such a computation - (see ``-fsanitize=implicit-conversion``). + (see ``-fsanitize=implicit-integer-conversion``). - ``-fsanitize=vla-bound``: A variable-length array whose bound does not evaluate to a positive value. - ``-fsanitize=vptr``: Use of an object whose vptr indicates that it is of @@ -224,11 +229,15 @@ You can also use the following check groups: - ``-fsanitize=implicit-integer-arithmetic-value-change``: Catches implicit conversions that change the arithmetic value of the integer. Enables ``implicit-signed-integer-truncation`` and ``implicit-integer-sign-change``. - - ``-fsanitize=implicit-conversion``: Checks for suspicious - behavior of implicit conversions. Enables + - ``-fsanitize=implicit-integer-conversion``: Checks for suspicious + behavior of implicit integer conversions. Enables ``implicit-unsigned-integer-truncation``, ``implicit-signed-integer-truncation``, and ``implicit-integer-sign-change``. + - ``-fsanitize=implicit-conversion``: Checks for suspicious + behavior of implicit conversions. Enables + ``implicit-integer-conversion``, and + ``implicit-bitfield-conversion``. - ``-fsanitize=integer``: Checks for undefined or suspicious integer behavior (e.g. unsigned integer overflow). Enables ``signed-integer-overflow``, ``unsigned-integer-overflow``, diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index f188f18ba5557..fb748d23a53d0 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -3138,10 +3138,16 @@ are detected: allowed in this state. * Invalid 3rd ("``whence``") argument to ``fseek``. -The checker does not track the correspondence between integer file descriptors -and ``FILE *`` pointers. Operations on standard streams like ``stdin`` are not -treated specially and are therefore often not recognized (because these streams -are usually not opened explicitly by the program, and are global variables). +The stream operations are by this checker usually split into two cases, a success +and a failure case. However, in the case of write operations (like ``fwrite``, +``fprintf`` and even ``fsetpos``) this behavior could produce a large amount of +unwanted reports on projects that don't have error checks around the write +operations, so by default the checker assumes that write operations always succeed. +This behavior can be controlled by the ``Pedantic`` flag: With +``-analyzer-config alpha.unix.Stream:Pedantic=true`` the checker will model the +cases where a write operation fails and report situations where this leads to +erroneous behavior. (The default is ``Pedantic=false``, where write operations +are assumed to succeed.) .. code-block:: c @@ -3196,6 +3202,13 @@ are usually not opened explicitly by the program, and are global variables). fclose(p); } +**Limitations** + +The checker does not track the correspondence between integer file descriptors +and ``FILE *`` pointers. Operations on standard streams like ``stdin`` are not +treated specially and are therefore often not recognized (because these streams +are usually not opened explicitly by the program, and are global variables). + .. _alpha-unix-cstring-BufferOverlap: alpha.unix.cstring.BufferOverlap (C) diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index 06d67e9cba953..94e7dd817809d 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -53,6 +53,7 @@ struct { void Visit(TypeLoc); void Visit(const Decl *D); void Visit(const CXXCtorInitializer *Init); + void Visit(const OpenACCClause *C); void Visit(const OMPClause *C); void Visit(const BlockDecl::Capture &C); void Visit(const GenericSelectionExpr::ConstAssociation &A); @@ -239,6 +240,13 @@ class ASTNodeTraverser }); } + void Visit(const OpenACCClause *C) { + getNodeDelegate().AddChild([=] { + getNodeDelegate().Visit(C); + // TODO OpenACC: Switch on clauses that have children, and add them. + }); + } + void Visit(const OMPClause *C) { getNodeDelegate().AddChild([=] { getNodeDelegate().Visit(C); @@ -799,6 +807,11 @@ class ASTNodeTraverser Visit(C); } + void VisitOpenACCConstructStmt(const OpenACCConstructStmt *Node) { + for (const auto *C : Node->clauses()) + Visit(C); + } + void VisitInitListExpr(const InitListExpr *ILE) { if (auto *Filler = ILE->getArrayFiller()) { Visit(Filler, "array_filler"); diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 30bd0a118497a..31bacacd879ba 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1101,6 +1101,9 @@ class VarDecl : public DeclaratorDecl, public Redeclarable { LLVM_PREFERRED_TYPE(bool) unsigned EscapingByref : 1; + + LLVM_PREFERRED_TYPE(bool) + unsigned IsCXXCondDecl : 1; }; union { @@ -1590,6 +1593,15 @@ class VarDecl : public DeclaratorDecl, public Redeclarable { NonParmVarDeclBits.EscapingByref = true; } + bool isCXXCondDecl() const { + return isa(this) ? false : NonParmVarDeclBits.IsCXXCondDecl; + } + + void setCXXCondDecl() { + assert(!isa(this)); + NonParmVarDeclBits.IsCXXCondDecl = true; + } + /// Determines if this variable's alignment is dependent. bool hasDependentAlignment() const; diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index 9cebaff63bb0d..7aed4d5cbc002 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -1869,6 +1869,10 @@ class CXXRecordDecl : public RecordDecl { DL.MethodTyInfo = TS; } + void setLambdaDependencyKind(unsigned Kind) { + getLambdaData().DependencyKind = Kind; + } + void setLambdaIsGeneric(bool IsGeneric) { assert(DefinitionData && DefinitionData->IsLambda && "setting lambda property of non-lambda class"); diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h index e953044ff3333..3e3edc30702dd 100644 --- a/clang/include/clang/AST/ExprCXX.h +++ b/clang/include/clang/AST/ExprCXX.h @@ -1149,6 +1149,7 @@ class CXXThisExpr : public Expr { CXXThisExpr(SourceLocation L, QualType Ty, bool IsImplicit, ExprValueKind VK) : Expr(CXXThisExprClass, Ty, VK, OK_Ordinary) { CXXThisExprBits.IsImplicit = IsImplicit; + CXXThisExprBits.CapturedByCopyInLambdaWithExplicitObjectParameter = false; CXXThisExprBits.Loc = L; setDependence(computeDependence(this)); } @@ -1170,6 +1171,15 @@ class CXXThisExpr : public Expr { bool isImplicit() const { return CXXThisExprBits.IsImplicit; } void setImplicit(bool I) { CXXThisExprBits.IsImplicit = I; } + bool isCapturedByCopyInLambdaWithExplicitObjectParameter() const { + return CXXThisExprBits.CapturedByCopyInLambdaWithExplicitObjectParameter; + } + + void setCapturedByCopyInLambdaWithExplicitObjectParameter(bool Set) { + CXXThisExprBits.CapturedByCopyInLambdaWithExplicitObjectParameter = Set; + setDependence(computeDependence(this)); + } + static bool classof(const Stmt *T) { return T->getStmtClass() == CXXThisExprClass; } diff --git a/clang/include/clang/AST/JSONNodeDumper.h b/clang/include/clang/AST/JSONNodeDumper.h index c84c975aab1c3..3cfb7ff09125e 100644 --- a/clang/include/clang/AST/JSONNodeDumper.h +++ b/clang/include/clang/AST/JSONNodeDumper.h @@ -203,6 +203,7 @@ class JSONNodeDumper void Visit(const TemplateArgument &TA, SourceRange R = {}, const Decl *From = nullptr, StringRef Label = {}); void Visit(const CXXCtorInitializer *Init); + void Visit(const OpenACCClause *C); void Visit(const OMPClause *C); void Visit(const BlockDecl::Capture &C); void Visit(const GenericSelectionExpr::ConstAssociation &A); diff --git a/clang/include/clang/AST/OpenACCClause.h b/clang/include/clang/AST/OpenACCClause.h new file mode 100644 index 0000000000000..06a0098bbda4c --- /dev/null +++ b/clang/include/clang/AST/OpenACCClause.h @@ -0,0 +1,135 @@ +//===- OpenACCClause.h - Classes for OpenACC clauses ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file defines OpenACC AST classes for clauses. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_OPENACCCLAUSE_H +#define LLVM_CLANG_AST_OPENACCCLAUSE_H +#include "clang/AST/ASTContext.h" +#include "clang/Basic/OpenACCKinds.h" + +namespace clang { +/// This is the base type for all OpenACC Clauses. +class OpenACCClause { + OpenACCClauseKind Kind; + SourceRange Location; + +protected: + OpenACCClause(OpenACCClauseKind K, SourceLocation BeginLoc, + SourceLocation EndLoc) + : Kind(K), Location(BeginLoc, EndLoc) {} + +public: + OpenACCClauseKind getClauseKind() const { return Kind; } + SourceLocation getBeginLoc() const { return Location.getBegin(); } + SourceLocation getEndLoc() const { return Location.getEnd(); } + + static bool classof(const OpenACCClause *) { return true; } + + virtual ~OpenACCClause() = default; +}; + +/// Represents a clause that has a list of parameters. +class OpenACCClauseWithParams : public OpenACCClause { + /// Location of the '('. + SourceLocation LParenLoc; + +protected: + OpenACCClauseWithParams(OpenACCClauseKind K, SourceLocation BeginLoc, + SourceLocation LParenLoc, SourceLocation EndLoc) + : OpenACCClause(K, BeginLoc, EndLoc), LParenLoc(LParenLoc) {} + +public: + SourceLocation getLParenLoc() const { return LParenLoc; } +}; + +template class OpenACCClauseVisitor { + Impl &getDerived() { return static_cast(*this); } + +public: + void VisitClauseList(ArrayRef List) { + for (const OpenACCClause *Clause : List) + Visit(Clause); + } + + void Visit(const OpenACCClause *C) { + if (!C) + return; + + switch (C->getClauseKind()) { + case OpenACCClauseKind::Default: + case OpenACCClauseKind::Finalize: + case OpenACCClauseKind::IfPresent: + case OpenACCClauseKind::Seq: + case OpenACCClauseKind::Independent: + case OpenACCClauseKind::Auto: + case OpenACCClauseKind::Worker: + case OpenACCClauseKind::Vector: + case OpenACCClauseKind::NoHost: + case OpenACCClauseKind::If: + case OpenACCClauseKind::Self: + case OpenACCClauseKind::Copy: + case OpenACCClauseKind::UseDevice: + case OpenACCClauseKind::Attach: + case OpenACCClauseKind::Delete: + case OpenACCClauseKind::Detach: + case OpenACCClauseKind::Device: + case OpenACCClauseKind::DevicePtr: + case OpenACCClauseKind::DeviceResident: + case OpenACCClauseKind::FirstPrivate: + case OpenACCClauseKind::Host: + case OpenACCClauseKind::Link: + case OpenACCClauseKind::NoCreate: + case OpenACCClauseKind::Present: + case OpenACCClauseKind::Private: + case OpenACCClauseKind::CopyOut: + case OpenACCClauseKind::CopyIn: + case OpenACCClauseKind::Create: + case OpenACCClauseKind::Reduction: + case OpenACCClauseKind::Collapse: + case OpenACCClauseKind::Bind: + case OpenACCClauseKind::VectorLength: + case OpenACCClauseKind::NumGangs: + case OpenACCClauseKind::NumWorkers: + case OpenACCClauseKind::DeviceNum: + case OpenACCClauseKind::DefaultAsync: + case OpenACCClauseKind::DeviceType: + case OpenACCClauseKind::DType: + case OpenACCClauseKind::Async: + case OpenACCClauseKind::Tile: + case OpenACCClauseKind::Gang: + case OpenACCClauseKind::Wait: + case OpenACCClauseKind::Invalid: + llvm_unreachable("Clause visitor not yet implemented"); + } + llvm_unreachable("Invalid Clause kind"); + } +}; + +class OpenACCClausePrinter final + : public OpenACCClauseVisitor { + raw_ostream &OS; + +public: + void VisitClauseList(ArrayRef List) { + for (const OpenACCClause *Clause : List) { + Visit(Clause); + + if (Clause != List.back()) + OS << ' '; + } + } + OpenACCClausePrinter(raw_ostream &OS) : OS(OS) {} +}; + +} // namespace clang + +#endif // LLVM_CLANG_AST_OPENACCCLAUSE_H diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index a0c831f6f123c..0959e38919e50 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -509,6 +509,7 @@ template class RecursiveASTVisitor { bool TraverseOpenACCConstructStmt(OpenACCConstructStmt *S); bool TraverseOpenACCAssociatedStmtConstruct(OpenACCAssociatedStmtConstruct *S); + bool VisitOpenACCClauseList(ArrayRef); }; template @@ -3951,8 +3952,8 @@ bool RecursiveASTVisitor::VisitOMPXBareClause(OMPXBareClause *C) { template bool RecursiveASTVisitor::TraverseOpenACCConstructStmt( - OpenACCConstructStmt *) { - // TODO OpenACC: When we implement clauses, ensure we traverse them here. + OpenACCConstructStmt *C) { + TRY_TO(VisitOpenACCClauseList(C->clauses())); return true; } @@ -3964,6 +3965,14 @@ bool RecursiveASTVisitor::TraverseOpenACCAssociatedStmtConstruct( return true; } +template +bool RecursiveASTVisitor::VisitOpenACCClauseList( + ArrayRef) { + // TODO OpenACC: When we have Clauses with expressions, we should visit them + // here. + return true; +} + DEF_TRAVERSE_STMT(OpenACCComputeConstruct, { TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); }) diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index 8892518d58e85..1b9c923104771 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -784,6 +784,11 @@ class alignas(void *) Stmt { LLVM_PREFERRED_TYPE(bool) unsigned IsImplicit : 1; + /// Whether there is a lambda with an explicit object parameter that + /// captures this "this" by copy. + LLVM_PREFERRED_TYPE(bool) + unsigned CapturedByCopyInLambdaWithExplicitObjectParameter : 1; + /// The location of the "this". SourceLocation Loc; }; diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h index 19da66832c737..419cb6cada0bc 100644 --- a/clang/include/clang/AST/StmtOpenACC.h +++ b/clang/include/clang/AST/StmtOpenACC.h @@ -13,9 +13,11 @@ #ifndef LLVM_CLANG_AST_STMTOPENACC_H #define LLVM_CLANG_AST_STMTOPENACC_H +#include "clang/AST/OpenACCClause.h" #include "clang/AST/Stmt.h" #include "clang/Basic/OpenACCKinds.h" #include "clang/Basic/SourceLocation.h" +#include namespace clang { /// This is the base class for an OpenACC statement-level construct, other @@ -30,13 +32,23 @@ class OpenACCConstructStmt : public Stmt { /// the directive. SourceRange Range; - // TODO OPENACC: Clauses should probably be collected in this class. + /// The list of clauses. This is stored here as an ArrayRef, as this is the + /// most convienient place to access the list, however the list itself should + /// be stored in leaf nodes, likely in trailing-storage. + MutableArrayRef Clauses; protected: OpenACCConstructStmt(StmtClass SC, OpenACCDirectiveKind K, SourceLocation Start, SourceLocation End) : Stmt(SC), Kind(K), Range(Start, End) {} + // Used only for initialization, the leaf class can initialize this to + // trailing storage. + void setClauseList(MutableArrayRef NewClauses) { + assert(Clauses.empty() && "Cannot change clause list"); + Clauses = NewClauses; + } + public: OpenACCDirectiveKind getDirectiveKind() const { return Kind; } @@ -47,6 +59,7 @@ class OpenACCConstructStmt : public Stmt { SourceLocation getBeginLoc() const { return Range.getBegin(); } SourceLocation getEndLoc() const { return Range.getEnd(); } + ArrayRef clauses() const { return Clauses; } child_range children() { return child_range(child_iterator(), child_iterator()); @@ -101,17 +114,32 @@ class OpenACCAssociatedStmtConstruct : public OpenACCConstructStmt { /// those three, as they are semantically identical, and have only minor /// differences in the permitted list of clauses, which can be differentiated by /// the 'Kind'. -class OpenACCComputeConstruct : public OpenACCAssociatedStmtConstruct { +class OpenACCComputeConstruct final + : public OpenACCAssociatedStmtConstruct, + public llvm::TrailingObjects { friend class ASTStmtWriter; friend class ASTStmtReader; friend class ASTContext; - OpenACCComputeConstruct() - : OpenACCAssociatedStmtConstruct( - OpenACCComputeConstructClass, OpenACCDirectiveKind::Invalid, - SourceLocation{}, SourceLocation{}, /*AssociatedStmt=*/nullptr) {} + OpenACCComputeConstruct(unsigned NumClauses) + : OpenACCAssociatedStmtConstruct(OpenACCComputeConstructClass, + OpenACCDirectiveKind::Invalid, + SourceLocation{}, SourceLocation{}, + /*AssociatedStmt=*/nullptr) { + // We cannot send the TrailingObjects storage to the base class (which holds + // a reference to the data) until it is constructed, so we have to set it + // separately here. + std::uninitialized_value_construct( + getTrailingObjects(), + getTrailingObjects() + NumClauses); + setClauseList(MutableArrayRef(getTrailingObjects(), + NumClauses)); + } OpenACCComputeConstruct(OpenACCDirectiveKind K, SourceLocation Start, - SourceLocation End, Stmt *StructuredBlock) + SourceLocation End, + ArrayRef Clauses, + Stmt *StructuredBlock) : OpenACCAssociatedStmtConstruct(OpenACCComputeConstructClass, K, Start, End, StructuredBlock) { assert((K == OpenACCDirectiveKind::Parallel || @@ -119,6 +147,13 @@ class OpenACCComputeConstruct : public OpenACCAssociatedStmtConstruct { K == OpenACCDirectiveKind::Kernels) && "Only parallel, serial, and kernels constructs should be " "represented by this type"); + + // Initialize the trailing storage. + std::uninitialized_copy(Clauses.begin(), Clauses.end(), + getTrailingObjects()); + + setClauseList(MutableArrayRef(getTrailingObjects(), + Clauses.size())); } void setStructuredBlock(Stmt *S) { setAssociatedStmt(S); } @@ -128,10 +163,12 @@ class OpenACCComputeConstruct : public OpenACCAssociatedStmtConstruct { return T->getStmtClass() == OpenACCComputeConstructClass; } - static OpenACCComputeConstruct *CreateEmpty(const ASTContext &C, EmptyShell); + static OpenACCComputeConstruct *CreateEmpty(const ASTContext &C, + unsigned NumClauses); static OpenACCComputeConstruct * Create(const ASTContext &C, OpenACCDirectiveKind K, SourceLocation BeginLoc, - SourceLocation EndLoc, Stmt *StructuredBlock); + SourceLocation EndLoc, ArrayRef Clauses, + Stmt *StructuredBlock); Stmt *getStructuredBlock() { return getAssociatedStmt(); } const Stmt *getStructuredBlock() const { diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index 4d159b7eb3f9c..2b40ae48aa114 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -189,6 +189,8 @@ class TextNodeDumper void Visit(const OMPClause *C); + void Visit(const OpenACCClause *C); + void Visit(const BlockDecl::Capture &C); void Visit(const GenericSelectionExpr::ConstAssociation &A); diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index 2f71053d030f6..8a2bbfff9e9e6 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -4961,6 +4961,8 @@ AST_MATCHER_P(LambdaExpr, hasAnyCapture, internal::Matcher, /// capturesVar(hasName("x")) matches `x` and `x = 1`. AST_MATCHER_P(LambdaCapture, capturesVar, internal::Matcher, InnerMatcher) { + if (!Node.capturesVariable()) + return false; auto *capturedVar = Node.getCapturedVar(); return capturedVar && InnerMatcher.matches(*capturedVar, Finder, Builder); } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 40e5087186201..f17514b7fe8d6 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -328,13 +328,10 @@ class Spelling { } class GNU : Spelling; -class Declspec : Spelling { - bit PrintOnLeft = 1; -} +class Declspec : Spelling; class Microsoft : Spelling; class CXX11 : Spelling { - bit CanPrintOnLeft = 0; string Namespace = namespace; } class C23 @@ -608,12 +605,6 @@ class AttrSubjectMatcherAggregateRule { def SubjectMatcherForNamed : AttrSubjectMatcherAggregateRule; class Attr { - // Specifies that when printed, this attribute is meaningful on the - // 'left side' of the declaration. - bit CanPrintOnLeft = 1; - // Specifies that when printed, this attribute is required to be printed on - // the 'left side' of the declaration. - bit PrintOnLeft = 0; // The various ways in which an attribute can be spelled in source list Spellings; // The things to which an attribute can appertain @@ -978,7 +969,6 @@ def AVRSignal : InheritableAttr, TargetSpecificAttr { } def AsmLabel : InheritableAttr { - let CanPrintOnLeft = 0; let Spellings = [CustomKeyword<"asm">, CustomKeyword<"__asm__">]; let Args = [ // Label specifies the mangled name for the decl. @@ -2175,7 +2165,6 @@ def AllocSize : InheritableAttr { } def EnableIf : InheritableAttr { - let CanPrintOnLeft = 0; // Does not have a [[]] spelling because this attribute requires the ability // to parse function arguments but the attribute is not written in the type // position. @@ -4177,7 +4166,6 @@ def Unavailable : InheritableAttr { } def DiagnoseIf : InheritableAttr { - let CanPrintOnLeft = 0; // Does not have a [[]] spelling because this attribute requires the ability // to parse function arguments but the attribute is not written in the type // position. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 25dfa098b131d..7a9a6c6186af2 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4587,6 +4587,12 @@ def GetDeviceSideMangledName : LangBuiltin<"CUDA_LANG"> { } // HLSL +def HLSLAll : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_all"]; + let Attributes = [NoThrow, Const]; + let Prototype = "bool(...)"; +} + def HLSLAny : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_elementwise_any"]; let Attributes = [NoThrow, Const]; diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index 7d53c751c13ac..2ef6ddc68f4bf 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -31,16 +31,6 @@ clang_tablegen(AttrList.inc -gen-clang-attr-list SOURCE Attr.td TARGET ClangAttrList) -clang_tablegen(AttrLeftSideCanPrintList.inc -gen-clang-attr-can-print-left-list - -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ - SOURCE Attr.td - TARGET ClangAttrCanPrintLeftList) - -clang_tablegen(AttrLeftSideMustPrintList.inc -gen-clang-attr-must-print-left-list - -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ - SOURCE Attr.td - TARGET ClangAttrMustPrintLeftList) - clang_tablegen(AttrSubMatchRulesList.inc -gen-clang-attr-subject-match-rule-list -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE Attr.td diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 98c234f65c267..9587dd3e46506 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -153,6 +153,9 @@ def warn_drv_unsupported_diag_option_for_flang : Warning< def warn_drv_unsupported_option_for_processor : Warning< "ignoring '%0' option as it is not currently supported for processor '%1'">, InGroup; +def warn_drv_unsupported_openmp_library : Warning< + "The library '%0=%1' is not supported, openmp is not be enabled">, + InGroup; def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; @@ -732,6 +735,7 @@ def warn_drv_darwin_sdk_invalid_settings : Warning< "SDK settings were ignored as 'SDKSettings.json' could not be parsed">, InGroup>; +def err_missing_sysroot : Error<"no such sysroot directory: '%0'">; def err_drv_darwin_sdk_missing_arclite : Error< "SDK does not contain 'libarclite' at the path '%0'; try increasing the minimum deployment target">; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 8dbdea0762da4..0172154200e6c 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1436,6 +1436,9 @@ def MultiGPU: DiagGroup<"multi-gpu">; // libc and the CRT to be skipped. def AVRRtlibLinkingQuirks : DiagGroup<"avr-rtlib-linking-quirks">; +// A warning group related to AArch64 SME function attribues. +def AArch64SMEAttributes : DiagGroup<"aarch64-sme-attributes">; + // A warning group for things that will change semantics in the future. def FutureCompat : DiagGroup<"future-compat">; diff --git a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td index e3263fe9ccb9d..0a477da7186b0 100644 --- a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td +++ b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td @@ -19,9 +19,11 @@ def err_no_such_header_file : Error<"no such %select{public|private|project}1 he def warn_no_such_excluded_header_file : Warning<"no such excluded %select{public|private}0 header file: '%1'">, InGroup; def warn_glob_did_not_match: Warning<"glob '%0' did not match any header file">, InGroup; def err_no_such_umbrella_header_file : Error<"%select{public|private|project}1 umbrella header file not found in input: '%0'">; +def err_cannot_find_reexport : Error<"cannot find re-exported %select{framework|library}0: '%1'">; } // end of command line category. let CategoryName = "Verification" in { +// Diagnostics about symbols. def warn_target: Warning<"violations found for %0">, InGroup; def err_library_missing_symbol : Error<"declaration has external linkage, but dynamic library doesn't have symbol '%0'">; def warn_library_missing_symbol : Warning<"declaration has external linkage, but dynamic library doesn't have symbol '%0'">, InGroup; @@ -43,6 +45,25 @@ def err_dylib_symbol_flags_mismatch : Error<"dynamic library symbol '%0' is " "%select{weak defined|thread local}1, but its declaration is not">; def err_header_symbol_flags_mismatch : Error<"declaration '%0' is " "%select{weak defined|thread local}1, but symbol is not in dynamic library">; + +// Diagnostics about load commands. +def err_architecture_mismatch : Error<"architectures do not match: '%0' (provided) vs '%1' (found)">; +def warn_platform_mismatch : Warning<"platform does not match: '%0' (provided) vs '%1' (found)">, InGroup; +def err_platform_mismatch : Error<"platform does not match: '%0' (provided) vs '%1' (found)">; +def err_install_name_mismatch : Error<"install_name does not match: '%0' (provided) vs '%1' (found)">; +def err_current_version_mismatch : Error<"current_version does not match: '%0' (provided) vs '%1' (found)">; +def err_compatibility_version_mismatch : Error<"compatibility_version does not match: '%0' (provided) vs '%1' (found)">; +def err_appextension_safe_mismatch : Error<"ApplicationExtensionSafe flag does not match: '%0' (provided) vs '%1' (found)">; +def err_shared_cache_eligiblity_mismatch : Error<"NotForDyldSharedCache flag does not match: '%0' (provided) vs '%1' (found)">; +def err_no_twolevel_namespace : Error<"flat namespace libraries are not supported">; +def err_parent_umbrella_missing: Error<"parent umbrella missing from %0: '%1'">; +def err_parent_umbrella_mismatch : Error<"parent umbrella does not match: '%0' (provided) vs '%1' (found)">; +def err_reexported_libraries_missing : Error<"re-exported library missing from %0: '%1'">; +def err_reexported_libraries_mismatch : Error<"re-exported libraries do not match: '%0' (provided) vs '%1' (found)">; +def err_allowable_clients_missing : Error<"allowable client missing from %0: '%1'">; +def err_allowable_clients_mismatch : Error<"allowable clients do not match: '%0' (provided) vs '%1' (found)">; +def warn_rpaths_missing : Warning<"runpath search paths missing from %0: '%1'">, InGroup; +def warn_rpaths_mismatch : Warning<"runpath search paths do not match: '%0' (provided) vs '%1' (found)">, InGroup; } // end of Verification category. } // end of InstallAPI component diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 876620799acbe..9c47ac6664531 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -230,7 +230,7 @@ def ext_vla_folded_to_constant : ExtWarn< "variable length array folded to constant array as an extension">, InGroup; def err_vla_unsupported : Error< - "variable length arrays are not supported for %select{the current target|'%1'}0">; + "variable length arrays are not supported %select{for the current target|in '%1'}0">; def err_vla_in_coroutine_unsupported : Error< "variable length arrays in a coroutine are not supported">; def note_vla_unsupported : Note< @@ -3835,6 +3835,16 @@ def err_sme_definition_using_za_in_non_sme_target : Error< "function using ZA state requires 'sme'">; def err_sme_definition_using_zt0_in_non_sme2_target : Error< "function using ZT0 state requires 'sme2'">; +def warn_sme_streaming_pass_return_vl_to_non_streaming : Warning< + "passing a VL-dependent argument to/from a function that has a different" + " streaming-mode. The streaming and non-streaming vector lengths may be" + " different">, + InGroup, DefaultIgnore; +def warn_sme_locally_streaming_has_vl_args_returns : Warning< + "passing/returning a VL-dependent argument to/from a __arm_locally_streaming" + " function. The streaming and non-streaming vector" + " lengths may be different">, + InGroup, DefaultIgnore; def err_conflicting_attributes_arm_state : Error< "conflicting attributes for state '%0'">; def err_sme_streaming_cannot_be_multiversioned : Error< @@ -7228,7 +7238,8 @@ def ext_typecheck_decl_incomplete_type : ExtWarn< def err_tentative_def_incomplete_type : Error< "tentative definition has type %0 that is never completed">; def warn_tentative_incomplete_array : Warning< - "tentative array definition assumed to have one element">; + "tentative array definition assumed to have one element">, + InGroup>; def err_typecheck_incomplete_array_needs_initializer : Error< "definition of variable with array type needs an explicit size " "or an initializer">; @@ -12538,6 +12549,8 @@ def warn_acc_clause_unimplemented def err_acc_construct_appertainment : Error<"OpenACC construct '%0' cannot be used here; it can only " "be used in a statement context">; +def err_acc_clause_appertainment + : Error<"OpenACC '%1' clause is not valid on '%0' directive">; def err_acc_branch_in_out_compute_construct : Error<"invalid %select{branch|return|throw}0 %select{out of|into}1 " "OpenACC Compute Construct">; diff --git a/clang/include/clang/Basic/OpenACCKinds.h b/clang/include/clang/Basic/OpenACCKinds.h index 4456f4afd142d..95fc35a5bedb7 100644 --- a/clang/include/clang/Basic/OpenACCKinds.h +++ b/clang/include/clang/Basic/OpenACCKinds.h @@ -67,7 +67,7 @@ enum class OpenACCDirectiveKind { }; template -inline StreamTy &PrintOpenACCDirectiveKind(StreamTy &Out, +inline StreamTy &printOpenACCDirectiveKind(StreamTy &Out, OpenACCDirectiveKind K) { switch (K) { case OpenACCDirectiveKind::Parallel: @@ -138,12 +138,12 @@ inline StreamTy &PrintOpenACCDirectiveKind(StreamTy &Out, inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &Out, OpenACCDirectiveKind K) { - return PrintOpenACCDirectiveKind(Out, K); + return printOpenACCDirectiveKind(Out, K); } inline llvm::raw_ostream &operator<<(llvm::raw_ostream &Out, OpenACCDirectiveKind K) { - return PrintOpenACCDirectiveKind(Out, K); + return printOpenACCDirectiveKind(Out, K); } enum class OpenACCAtomicKind { @@ -266,7 +266,7 @@ enum class OpenACCClauseKind { }; template -inline StreamTy &PrintOpenACCClauseKind(StreamTy &Out, OpenACCClauseKind K) { +inline StreamTy &printOpenACCClauseKind(StreamTy &Out, OpenACCClauseKind K) { switch (K) { case OpenACCClauseKind::Finalize: return Out << "finalize"; @@ -402,12 +402,12 @@ inline StreamTy &PrintOpenACCClauseKind(StreamTy &Out, OpenACCClauseKind K) { inline const StreamingDiagnostic &operator<<(const StreamingDiagnostic &Out, OpenACCClauseKind K) { - return PrintOpenACCClauseKind(Out, K); + return printOpenACCClauseKind(Out, K); } inline llvm::raw_ostream &operator<<(llvm::raw_ostream &Out, OpenACCClauseKind K) { - return PrintOpenACCClauseKind(Out, K); + return printOpenACCClauseKind(Out, K); } enum class OpenACCDefaultClauseKind { diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def index c2137e3f61f64..b228ffd07ee74 100644 --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -163,24 +163,24 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change", ImplicitIntegerArithmeticValueChange, ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation) -SANITIZER("objc-cast", ObjCCast) +SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, + ImplicitIntegerArithmeticValueChange | + ImplicitUnsignedIntegerTruncation) -// FIXME: -//SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, -// ImplicitIntegerArithmeticValueChange | -// ImplicitUnsignedIntegerTruncation) -//SANITIZER_GROUP("implicit-conversion", ImplicitConversion, -// ImplicitIntegerConversion) +// Implicit bitfield sanitizers +SANITIZER("implicit-bitfield-conversion", ImplicitBitfieldConversion) SANITIZER_GROUP("implicit-conversion", ImplicitConversion, - ImplicitIntegerArithmeticValueChange | - ImplicitUnsignedIntegerTruncation) + ImplicitIntegerConversion | + ImplicitBitfieldConversion) SANITIZER_GROUP("integer", Integer, - ImplicitConversion | IntegerDivideByZero | Shift | + ImplicitIntegerConversion | IntegerDivideByZero | Shift | SignedIntegerOverflow | UnsignedIntegerOverflow | UnsignedShiftBase) +SANITIZER("objc-cast", ObjCCast) + SANITIZER("local-bounds", LocalBounds) SANITIZER_GROUP("bounds", Bounds, ArrayBounds | LocalBounds) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 8c597e03a62d0..0aef6aaeee58c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -810,9 +810,11 @@ def B : JoinedOrSeparate<["-"], "B">, MetaVarName<"">, HelpText<"Search $prefix$file for executables, libraries, and data files. " "If $prefix is a directory, search $prefix/$file">; def gcc_install_dir_EQ : Joined<["--"], "gcc-install-dir=">, + Visibility<[ClangOption, FlangOption]>, HelpText<"Use GCC installation in the specified directory. The directory ends with path components like 'lib{,32,64}/gcc{,-cross}/$triple/$version'. " "Note: executables (e.g. ld) used by the compiler are not overridden by the selected GCC installation">; def gcc_toolchain : Joined<["--"], "gcc-toolchain=">, Flags<[NoXarchOption]>, + Visibility<[ClangOption, FlangOption]>, HelpText<"Specify a directory where Clang can find 'include' and 'lib{,32,64}/gcc{,-cross}/$triple/$version'. " "Clang will use the GCC installation with the largest version">; def gcc_triple_EQ : Joined<["--"], "gcc-triple=">, @@ -1010,7 +1012,8 @@ def : Joined<["-"], "Xclang=">, Group, def Xcuda_fatbinary : Separate<["-"], "Xcuda-fatbinary">, HelpText<"Pass to fatbinary invocation">, MetaVarName<"">; def Xcuda_ptxas : Separate<["-"], "Xcuda-ptxas">, - HelpText<"Pass to the ptxas assembler">, MetaVarName<"">; + HelpText<"Pass to the ptxas assembler">, MetaVarName<"">, + Visibility<[ClangOption, CLOption]>; def Xdevice_post_link : Separate<["-"], "Xdevice-post-link">, HelpText<"Pass to the device post-link tool.">, MetaVarName<"">, Flags<[HelpHidden]>, Visibility<[ClangOption, CLOption, DXCOption]>; @@ -3496,17 +3499,24 @@ def fopenmp : Flag<["-"], "fopenmp">, Group, HelpText<"Parse OpenMP pragmas and generate parallel code.">; def fno_openmp : Flag<["-"], "fno-openmp">, Group, Flags<[NoArgumentUnused]>; +class OpenMPVersionHelp { + string str = !strconcat( + "Set OpenMP version (e.g. 45 for OpenMP 4.5, 51 for OpenMP 5.1). Default value is ", + default, " for ", program); +} def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group, Flags<[NoArgumentUnused]>, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, - HelpText<"Set OpenMP version (e.g. 45 for OpenMP 4.5, 51 for OpenMP 5.1). Default value is 51 for Clang">; + HelpText.str>, + HelpTextForVariants<[FlangOption, FC1Option], OpenMPVersionHelp<"Flang", "11">.str>; defm openmp_extensions: BoolFOption<"openmp-extensions", LangOpts<"OpenMPExtensions">, DefaultTrue, PosFlag, NegFlag>; -def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group; +def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group, + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group, Flags<[NoArgumentUnused, HelpHidden]>; def fnoopenmp_use_tls : Flag<["-"], "fnoopenmp-use-tls">, Group, diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h index 0518a8823a03e..a620ddfc40447 100644 --- a/clang/include/clang/Frontend/FrontendActions.h +++ b/clang/include/clang/Frontend/FrontendActions.h @@ -34,18 +34,12 @@ class InitOnlyAction : public FrontendAction { /// Preprocessor-based frontend action that also loads PCH files. class ReadPCHAndPreprocessAction : public FrontendAction { - llvm::unique_function AdjustCI; - void ExecuteAction() override; std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override; public: - ReadPCHAndPreprocessAction( - llvm::unique_function AdjustCI) - : AdjustCI(std::move(AdjustCI)) {} - bool usesPreprocessorOnly() const override { return false; } }; @@ -327,15 +321,11 @@ class PrintPreprocessedAction : public PreprocessorFrontendAction { class GetDependenciesByModuleNameAction : public PreprocessOnlyAction { StringRef ModuleName; - llvm::unique_function AdjustCI; - void ExecuteAction() override; public: - GetDependenciesByModuleNameAction( - StringRef ModuleName, - llvm::unique_function AdjustCI) - : ModuleName(ModuleName), AdjustCI(std::move(AdjustCI)) {} + GetDependenciesByModuleNameAction(StringRef ModuleName) + : ModuleName(ModuleName) {} }; } // end namespace clang diff --git a/clang/include/clang/InstallAPI/Context.h b/clang/include/clang/InstallAPI/Context.h index 54e517544b8ed..8f88331a2803f 100644 --- a/clang/include/clang/InstallAPI/Context.h +++ b/clang/include/clang/InstallAPI/Context.h @@ -28,6 +28,9 @@ struct InstallAPIContext { /// Library attributes that are typically passed as linker inputs. BinaryAttrs BA; + /// Install names of reexported libraries of a library. + LibAttrs Reexports; + /// All headers that represent a library. HeaderSeq InputHeaders; @@ -80,6 +83,20 @@ struct InstallAPIContext { llvm::DenseMap KnownIncludes; }; +/// Lookup the dylib or TextAPI file location for a system library or framework. +/// The search paths provided are searched in order. +/// @rpath based libraries are not supported. +/// +/// \param InstallName The install name for the library. +/// \param FrameworkSearchPaths Search paths to look up frameworks with. +/// \param LibrarySearchPaths Search paths to look up dylibs with. +/// \param SearchPaths Fallback search paths if library was not found in earlier +/// paths. +/// \return The full path of the library. +std::string findLibrary(StringRef InstallName, FileManager &FM, + ArrayRef FrameworkSearchPaths, + ArrayRef LibrarySearchPaths, + ArrayRef SearchPaths); } // namespace installapi } // namespace clang diff --git a/clang/include/clang/InstallAPI/DylibVerifier.h b/clang/include/clang/InstallAPI/DylibVerifier.h index 22cdc234486cf..a3df25f10de4b 100644 --- a/clang/include/clang/InstallAPI/DylibVerifier.h +++ b/clang/include/clang/InstallAPI/DylibVerifier.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_INSTALLAPI_DYLIBVERIFIER_H #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceManager.h" #include "clang/InstallAPI/MachO.h" namespace clang { @@ -24,6 +25,9 @@ enum class VerificationMode { Pedantic, }; +using LibAttrs = llvm::StringMap; +using ReexportedInterfaces = llvm::SmallVector; + /// Service responsible to tracking state of verification across the /// lifetime of InstallAPI. /// As declarations are collected during AST traversal, they are @@ -63,11 +67,12 @@ class DylibVerifier : llvm::MachO::RecordVisitor { DylibVerifier() = default; - DylibVerifier(llvm::MachO::Records &&Dylib, DiagnosticsEngine *Diag, - VerificationMode Mode, bool Demangle, StringRef DSYMPath) - : Dylib(std::move(Dylib)), Mode(Mode), Demangle(Demangle), - DSYMPath(DSYMPath), Exports(std::make_unique()), - Ctx(VerifierContext{Diag}) {} + DylibVerifier(llvm::MachO::Records &&Dylib, ReexportedInterfaces &&Reexports, + DiagnosticsEngine *Diag, VerificationMode Mode, bool Demangle, + StringRef DSYMPath) + : Dylib(std::move(Dylib)), Reexports(std::move(Reexports)), Mode(Mode), + Demangle(Demangle), DSYMPath(DSYMPath), + Exports(std::make_unique()), Ctx(VerifierContext{Diag}) {} Result verify(GlobalRecord *R, const FrontendAttrs *FA); Result verify(ObjCInterfaceRecord *R, const FrontendAttrs *FA); @@ -77,6 +82,14 @@ class DylibVerifier : llvm::MachO::RecordVisitor { // Scan through dylib slices and report any remaining missing exports. Result verifyRemainingSymbols(); + /// Compare and report the attributes represented as + /// load commands in the dylib to the attributes provided via options. + bool verifyBinaryAttrs(const ArrayRef ProvidedTargets, + const BinaryAttrs &ProvidedBA, + const LibAttrs &ProvidedReexports, + const LibAttrs &ProvidedClients, + const LibAttrs &ProvidedRPaths, const FileType &FT); + /// Initialize target for verification. void setTarget(const Target &T); @@ -87,11 +100,7 @@ class DylibVerifier : llvm::MachO::RecordVisitor { Result getState() const { return Ctx.FrontendState; } /// Set different source managers to the same diagnostics engine. - void setSourceManager(SourceManager &SourceMgr) const { - if (!Ctx.Diag) - return; - Ctx.Diag->setSourceManager(&SourceMgr); - } + void setSourceManager(IntrusiveRefCntPtr SourceMgr); private: /// Determine whether to compare declaration to symbol in binary. @@ -105,6 +114,10 @@ class DylibVerifier : llvm::MachO::RecordVisitor { bool shouldIgnoreObsolete(const Record *R, SymbolContext &SymCtx, const Record *DR); + /// Check if declaration is exported from a reexported library. These + /// symbols should be omitted from the text-api file. + bool shouldIgnoreReexport(const Record *R, SymbolContext &SymCtx) const; + /// Compare the visibility declarations to the linkage of symbol found in /// dylib. Result compareVisibility(const Record *R, SymbolContext &SymCtx, @@ -154,6 +167,9 @@ class DylibVerifier : llvm::MachO::RecordVisitor { // Symbols in dylib. llvm::MachO::Records Dylib; + // Reexported interfaces apart of the library. + ReexportedInterfaces Reexports; + // Controls what class of violations to report. VerificationMode Mode = VerificationMode::Invalid; @@ -171,6 +187,9 @@ class DylibVerifier : llvm::MachO::RecordVisitor { // Track DWARF provided source location for dylibs. DWARFContext *DWARFCtx = nullptr; + + // Source manager for each unique compiler instance. + llvm::SmallVector, 12> SourceManagers; }; } // namespace installapi diff --git a/clang/include/clang/InstallAPI/Frontend.h b/clang/include/clang/InstallAPI/Frontend.h index 5cccd891c5809..bc4e77de2b725 100644 --- a/clang/include/clang/InstallAPI/Frontend.h +++ b/clang/include/clang/InstallAPI/Frontend.h @@ -36,7 +36,7 @@ class InstallAPIAction : public ASTFrontendAction { std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { Ctx.Diags->getClient()->BeginSourceFile(CI.getLangOpts()); - Ctx.Verifier->setSourceManager(CI.getSourceManager()); + Ctx.Verifier->setSourceManager(CI.getSourceManagerPtr()); return std::make_unique( CI.getASTContext(), Ctx, CI.getSourceManager(), CI.getPreprocessor()); } diff --git a/clang/include/clang/InstallAPI/FrontendRecords.h b/clang/include/clang/InstallAPI/FrontendRecords.h index 59271e81e230c..ef82398addd7a 100644 --- a/clang/include/clang/InstallAPI/FrontendRecords.h +++ b/clang/include/clang/InstallAPI/FrontendRecords.h @@ -21,6 +21,7 @@ namespace installapi { struct FrontendAttrs { const AvailabilityInfo Avail; const Decl *D; + const SourceLocation Loc; const HeaderType Access; }; diff --git a/clang/include/clang/InstallAPI/MachO.h b/clang/include/clang/InstallAPI/MachO.h index 827220dbf39fb..854399f54ba6c 100644 --- a/clang/include/clang/InstallAPI/MachO.h +++ b/clang/include/clang/InstallAPI/MachO.h @@ -23,6 +23,8 @@ #include "llvm/TextAPI/TextAPIWriter.h" #include "llvm/TextAPI/Utils.h" +using Architecture = llvm::MachO::Architecture; +using ArchitectureSet = llvm::MachO::ArchitectureSet; using SymbolFlags = llvm::MachO::SymbolFlags; using RecordLinkage = llvm::MachO::RecordLinkage; using Record = llvm::MachO::Record; diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h index 705dcfa8aacc3..855f81f775f8a 100644 --- a/clang/include/clang/Lex/HeaderSearch.h +++ b/clang/include/clang/Lex/HeaderSearch.h @@ -78,11 +78,19 @@ struct HeaderFileInfo { LLVM_PREFERRED_TYPE(bool) unsigned External : 1; - /// Whether this header is part of a module. + /// Whether this header is part of and built with a module. i.e. it is listed + /// in a module map, and is not `excluded` or `textual`. (same meaning as + /// `ModuleMap::isModular()`). LLVM_PREFERRED_TYPE(bool) unsigned isModuleHeader : 1; - /// Whether this header is part of the module that we are building. + /// Whether this header is a `textual header` in a module. + LLVM_PREFERRED_TYPE(bool) + unsigned isTextualModuleHeader : 1; + + /// Whether this header is part of the module that we are building, even if it + /// doesn't build with the module. i.e. this will include `excluded` and + /// `textual` headers as well as normal headers. LLVM_PREFERRED_TYPE(bool) unsigned isCompilingModuleHeader : 1; @@ -128,13 +136,20 @@ struct HeaderFileInfo { HeaderFileInfo() : isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User), - External(false), isModuleHeader(false), isCompilingModuleHeader(false), - Resolved(false), IndexHeaderMapHeader(false), IsValid(false) {} + External(false), isModuleHeader(false), isTextualModuleHeader(false), + isCompilingModuleHeader(false), Resolved(false), + IndexHeaderMapHeader(false), IsValid(false) {} /// Retrieve the controlling macro for this header file, if /// any. const IdentifierInfo * getControllingMacro(ExternalPreprocessorSource *External); + + /// Update the module membership bits based on the header role. + /// + /// isModuleHeader will potentially be set, but not cleared. + /// isTextualModuleHeader will be set or cleared based on the role update. + void mergeModuleMembership(ModuleMap::ModuleHeaderRole Role); }; /// An external source of header file information, which may supply @@ -522,6 +537,9 @@ class HeaderSearch { /// /// \return false if \#including the file will have no effect or true /// if we should include it. + /// + /// \param M The module to which `File` belongs (this should usually be the + /// SuggestedModule returned by LookupFile/LookupSubframeworkHeader) bool ShouldEnterIncludeFile(Preprocessor &PP, FileEntryRef File, bool isImport, bool ModulesEnabled, Module *M, bool &IsFirstIncludeOfFile); diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 24e146a589a75..0836b7d439bb0 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -736,19 +736,6 @@ class Preprocessor { State ConditionalStackState = Off; } PreambleConditionalStack; - /// Function for getting the dependency preprocessor directives of a file. - /// - /// These are directives derived from a special form of lexing where the - /// source input is scanned for the preprocessor directives that might have an - /// effect on the dependencies for a compilation unit. - /// - /// Enables a client to cache the directives for a file and provide them - /// across multiple compiler invocations. - /// FIXME: Allow returning an error. - using DependencyDirectivesFn = llvm::unique_function>(FileEntryRef)>; - DependencyDirectivesFn DependencyDirectivesForFile; - /// The current top of the stack that we're lexing from if /// not expanding a macro and we are lexing directly from source code. /// @@ -1283,11 +1270,6 @@ class Preprocessor { /// false if it is producing tokens to be consumed by Parse and Sema. bool isPreprocessedOutput() const { return PreprocessedOutput; } - /// Set the function used to get dependency directives for a file. - void setDependencyDirectivesFn(DependencyDirectivesFn Fn) { - DependencyDirectivesForFile = std::move(Fn); - } - /// Return true if we are lexing directly from the specified lexer. bool isCurrentLexer(const PreprocessorLexer *L) const { return CurPPLexer == L; diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 50b5fba0ff773..635971d0ce5ee 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -186,6 +186,19 @@ class PreprocessorOptions { /// with support for lifetime-qualified pointers. ObjCXXARCStandardLibraryKind ObjCXXARCStandardLibrary = ARCXX_nolib; + /// Function for getting the dependency preprocessor directives of a file. + /// + /// These are directives derived from a special form of lexing where the + /// source input is scanned for the preprocessor directives that might have an + /// effect on the dependencies for a compilation unit. + /// + /// Enables a client to cache the directives for a file and provide them + /// across multiple compiler invocations. + /// FIXME: Allow returning an error. + std::function>( + FileEntryRef)> + DependencyDirectivesForFile; + /// Set up preprocessor for RunAnalysis action. bool SetUpStaticAnalyzer = false; diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h index 36ec5ddaa29ad..4f29fb7d11415 100644 --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -58,7 +58,7 @@ class Token { /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). /// This is a pointer to sema-specific data for the annotation token. /// Eof: - // This is a pointer to a Decl. + /// This is a pointer to a Decl. /// Other: /// This is null. void *PtrData; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index dfe312216bd7d..6237e430502bf 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -41,6 +41,7 @@ namespace clang { class InMessageExpressionRAIIObject; class PoisonSEHIdentifiersRAIIObject; class OMPClause; + class OpenACCClause; class ObjCTypeParamList; struct OMPTraitProperty; struct OMPTraitSelector; @@ -3595,11 +3596,26 @@ class Parser : public CodeCompletionHandler { OpenACCDirectiveKind DirKind; SourceLocation StartLoc; SourceLocation EndLoc; - // TODO OpenACC: Add Clause list here once we have a type for that. + SmallVector Clauses; // TODO OpenACC: As we implement support for the Atomic, Routine, Cache, and // Wait constructs, we likely want to put that information in here as well. }; + /// Represents the 'error' state of parsing an OpenACC Clause, and stores + /// whether we can continue parsing, or should give up on the directive. + enum class OpenACCParseCanContinue { Cannot = 0, Can = 1 }; + + /// A type to represent the state of parsing an OpenACC Clause. Situations + /// that result in an OpenACCClause pointer are a success and can continue + /// parsing, however some other situations can also continue. + /// FIXME: This is better represented as a std::expected when we get C++23. + using OpenACCClauseParseResult = + llvm::PointerIntPair; + + OpenACCClauseParseResult OpenACCCanContinue(); + OpenACCClauseParseResult OpenACCCannotContinue(); + OpenACCClauseParseResult OpenACCSuccess(OpenACCClause *Clause); + /// Parses the OpenACC directive (the entire pragma) including the clause /// list, but does not produce the main AST node. OpenACCDirectiveParseInfo ParseOpenACCDirective(); @@ -3614,12 +3630,18 @@ class Parser : public CodeCompletionHandler { bool ParseOpenACCClauseVarList(OpenACCClauseKind Kind); /// Parses any parameters for an OpenACC Clause, including required/optional /// parens. - bool ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, - OpenACCClauseKind Kind); - /// Parses a single clause in a clause-list for OpenACC. - bool ParseOpenACCClause(OpenACCDirectiveKind DirKind); + OpenACCClauseParseResult + ParseOpenACCClauseParams(ArrayRef ExistingClauses, + OpenACCDirectiveKind DirKind, OpenACCClauseKind Kind, + SourceLocation ClauseLoc); + /// Parses a single clause in a clause-list for OpenACC. Returns nullptr on + /// error. + OpenACCClauseParseResult + ParseOpenACCClause(ArrayRef ExistingClauses, + OpenACCDirectiveKind DirKind); /// Parses the clause-list for an OpenACC directive. - void ParseOpenACCClauseList(OpenACCDirectiveKind DirKind); + SmallVector + ParseOpenACCClauseList(OpenACCDirectiveKind DirKind); bool ParseOpenACCWaitArgument(); /// Parses the clause of the 'bind' argument, which can be a string literal or /// an ID expression. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 6aebfe35887fb..790f41627522d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -56,6 +56,7 @@ #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Ownership.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaBase.h" #include "clang/Sema/SemaConcept.h" #include "clang/Sema/TypoCorrection.h" #include "clang/Sema/Weak.h" @@ -182,6 +183,7 @@ class Preprocessor; class PseudoDestructorTypeStorage; class PseudoObjectExpr; class QualType; +class SemaHLSL; class SemaOpenACC; class StandardConversionSequence; class Stmt; @@ -610,7 +612,7 @@ enum class TemplateDeductionResult { /// Sema - This implements semantic analysis and AST building for C. /// \nosubgrouping -class Sema final { +class Sema final : public SemaBase { // Table of Contents // ----------------- // 1. Semantic Analysis (Sema.cpp) @@ -652,9 +654,8 @@ class Sema final { // 36. FixIt Helpers (SemaFixItUtils.cpp) // 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp) // 38. CUDA (SemaCUDA.cpp) - // 39. HLSL Constructs (SemaHLSL.cpp) - // 40. OpenMP Directives and Clauses (SemaOpenMP.cpp) - // 41. SYCL Constructs (SemaSYCL.cpp) + // 39. OpenMP Directives and Clauses (SemaOpenMP.cpp) + // 40. SYCL Constructs (SemaSYCL.cpp) /// \name Semantic Analysis /// Implementations are in Sema.cpp @@ -700,252 +701,6 @@ class Sema final { /// void addExternalSource(ExternalSemaSource *E); - /// Helper class that creates diagnostics with optional - /// template instantiation stacks. - /// - /// This class provides a wrapper around the basic DiagnosticBuilder - /// class that emits diagnostics. ImmediateDiagBuilder is - /// responsible for emitting the diagnostic (as DiagnosticBuilder - /// does) and, if the diagnostic comes from inside a template - /// instantiation, printing the template instantiation stack as - /// well. - class ImmediateDiagBuilder : public DiagnosticBuilder { - Sema &SemaRef; - unsigned DiagID; - - public: - ImmediateDiagBuilder(DiagnosticBuilder &DB, Sema &SemaRef, unsigned DiagID) - : DiagnosticBuilder(DB), SemaRef(SemaRef), DiagID(DiagID) {} - ImmediateDiagBuilder(DiagnosticBuilder &&DB, Sema &SemaRef, unsigned DiagID) - : DiagnosticBuilder(DB), SemaRef(SemaRef), DiagID(DiagID) {} - - // This is a cunning lie. DiagnosticBuilder actually performs move - // construction in its copy constructor (but due to varied uses, it's not - // possible to conveniently express this as actual move construction). So - // the default copy ctor here is fine, because the base class disables the - // source anyway, so the user-defined ~ImmediateDiagBuilder is a safe no-op - // in that case anwyay. - ImmediateDiagBuilder(const ImmediateDiagBuilder &) = default; - - ~ImmediateDiagBuilder() { - // If we aren't active, there is nothing to do. - if (!isActive()) - return; - - // Otherwise, we need to emit the diagnostic. First clear the diagnostic - // builder itself so it won't emit the diagnostic in its own destructor. - // - // This seems wasteful, in that as written the DiagnosticBuilder dtor will - // do its own needless checks to see if the diagnostic needs to be - // emitted. However, because we take care to ensure that the builder - // objects never escape, a sufficiently smart compiler will be able to - // eliminate that code. - Clear(); - - // Dispatch to Sema to emit the diagnostic. - SemaRef.EmitCurrentDiagnostic(DiagID); - } - - /// Teach operator<< to produce an object of the correct type. - template - friend const ImmediateDiagBuilder & - operator<<(const ImmediateDiagBuilder &Diag, const T &Value) { - const DiagnosticBuilder &BaseDiag = Diag; - BaseDiag << Value; - return Diag; - } - - // It is necessary to limit this to rvalue reference to avoid calling this - // function with a bitfield lvalue argument since non-const reference to - // bitfield is not allowed. - template ::value>> - const ImmediateDiagBuilder &operator<<(T &&V) const { - const DiagnosticBuilder &BaseDiag = *this; - BaseDiag << std::move(V); - return *this; - } - }; - - /// Bitmask to contain the list of reasons a single diagnostic should be - /// emitted, based on its language. This permits multiple offload systems - /// to coexist in the same translation unit. - enum class DeviceDiagnosticReason { - /// Diagnostic doesn't apply to anything. Included for completeness, but - /// should make this a no-op. - None = 0, - /// OpenMP specific diagnostic. - OmpDevice = 1 << 0, - OmpHost = 1 << 1, - OmpAll = OmpDevice | OmpHost, - /// CUDA specific diagnostics. - CudaDevice = 1 << 2, - CudaHost = 1 << 3, - CudaAll = CudaDevice | CudaHost, - /// SYCL specific diagnostic. - Sycl = 1 << 4, - /// ESIMD specific diagnostic. - Esimd = 1 << 5, - /// A flag representing 'all'. This can be used to avoid the check - /// all-together and make this behave as it did before the - /// DiagnosticReason was added (that is, unconditionally emit). - /// Note: This needs to be updated if any flags above are added. - All = OmpAll | CudaAll | Sycl | Esimd, - - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All) - }; - -private: - // A collection of a pair of undefined functions and their callers known - // to be reachable from a routine on the device (kernel or device function). - typedef std::pair CallPair; - llvm::SmallVector UndefinedReachableFromSyclDevice; - -public: - // Helper routine to add a pair of Callee-Caller pair of FunctionDecl * - // to UndefinedReachableFromSyclDevice. - void addFDToReachableFromSyclDevice(const FunctionDecl *Callee, - const FunctionDecl *Caller) { - UndefinedReachableFromSyclDevice.push_back(std::make_pair(Callee, Caller)); - } - // Helper routine to check if a pair of Callee-Caller FunctionDecl * - // is in UndefinedReachableFromSyclDevice. - bool isFDReachableFromSyclDevice(const FunctionDecl *Callee, - const FunctionDecl *Caller) { - return llvm::any_of(UndefinedReachableFromSyclDevice, - [Callee, Caller](const CallPair &P) { - return P.first == Callee && P.second == Caller; - }); - } - - /// A generic diagnostic builder for errors which may or may not be deferred. - /// - /// In CUDA, there exist constructs (e.g. variable-length arrays, try/catch) - /// which are not allowed to appear inside __device__ functions and are - /// allowed to appear in __host__ __device__ functions only if the host+device - /// function is never codegen'ed. - /// - /// To handle this, we use the notion of "deferred diagnostics", where we - /// attach a diagnostic to a FunctionDecl that's emitted iff it's codegen'ed. - /// - /// This class lets you emit either a regular diagnostic, a deferred - /// diagnostic, or no diagnostic at all, according to an argument you pass to - /// its constructor, thus simplifying the process of creating these "maybe - /// deferred" diagnostics. - class SemaDiagnosticBuilder { - public: - enum Kind { - /// Emit no diagnostics. - K_Nop, - /// Emit the diagnostic immediately (i.e., behave like Sema::Diag()). - K_Immediate, - /// Emit the diagnostic immediately, and, if it's a warning or error, also - /// emit a call stack showing how this function can be reached by an a - /// priori known-emitted function. - K_ImmediateWithCallStack, - /// Create a deferred diagnostic, which is emitted only if the function - /// it's attached to is codegen'ed. Also emit a call stack as with - /// K_ImmediateWithCallStack. - K_Deferred - }; - - SemaDiagnosticBuilder(Kind K, SourceLocation Loc, unsigned DiagID, - const FunctionDecl *Fn, Sema &S, DeviceDiagnosticReason R); - SemaDiagnosticBuilder(SemaDiagnosticBuilder &&D); - SemaDiagnosticBuilder(const SemaDiagnosticBuilder &) = default; - - // The copy and move assignment operator is defined as deleted pending - // further motivation. - SemaDiagnosticBuilder &operator=(const SemaDiagnosticBuilder &) = delete; - SemaDiagnosticBuilder &operator=(SemaDiagnosticBuilder &&) = delete; - - ~SemaDiagnosticBuilder(); - - bool isImmediate() const { return ImmediateDiag.has_value(); } - - /// Convertible to bool: True if we immediately emitted an error, false if - /// we didn't emit an error or we created a deferred error. - /// - /// Example usage: - /// - /// if (SemaDiagnosticBuilder(...) << foo << bar) - /// return ExprError(); - /// - /// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably - /// want to use these instead of creating a SemaDiagnosticBuilder yourself. - operator bool() const { return isImmediate(); } - - template - friend const SemaDiagnosticBuilder & - operator<<(const SemaDiagnosticBuilder &Diag, const T &Value) { - if (Diag.ImmediateDiag) - *Diag.ImmediateDiag << Value; - else if (Diag.PartialDiagId) - Diag.S.DeviceDeferredDiags[Diag.Fn][*Diag.PartialDiagId] - .getDiag() - .second - << Value; - return Diag; - } - - // It is necessary to limit this to rvalue reference to avoid calling this - // function with a bitfield lvalue argument since non-const reference to - // bitfield is not allowed. - template ::value>> - const SemaDiagnosticBuilder &operator<<(T &&V) const { - if (ImmediateDiag) - *ImmediateDiag << std::move(V); - else if (PartialDiagId) - S.DeviceDeferredDiags[Fn][*PartialDiagId].getDiag().second - << std::move(V); - return *this; - } - - friend const SemaDiagnosticBuilder & - operator<<(const SemaDiagnosticBuilder &Diag, const PartialDiagnostic &PD) { - if (Diag.ImmediateDiag) - PD.Emit(*Diag.ImmediateDiag); - else if (Diag.PartialDiagId) - Diag.S.DeviceDeferredDiags[Diag.Fn][*Diag.PartialDiagId] - .getDiag() - .second = PD; - return Diag; - } - - void AddFixItHint(const FixItHint &Hint) const { - if (ImmediateDiag) - ImmediateDiag->AddFixItHint(Hint); - else if (PartialDiagId) - S.DeviceDeferredDiags[Fn][*PartialDiagId].getDiag().second.AddFixItHint( - Hint); - } - - friend ExprResult ExprError(const SemaDiagnosticBuilder &) { - return ExprError(); - } - friend StmtResult StmtError(const SemaDiagnosticBuilder &) { - return StmtError(); - } - operator ExprResult() const { return ExprError(); } - operator StmtResult() const { return StmtError(); } - operator TypeResult() const { return TypeError(); } - operator DeclResult() const { return DeclResult(true); } - operator MemInitResult() const { return MemInitResult(true); } - - private: - Sema &S; - SourceLocation Loc; - unsigned DiagID; - const FunctionDecl *Fn; - bool ShowCallStack; - - // Invariant: At most one of these Optionals has a value. - // FIXME: Switch these to a Variant once that exists. - std::optional ImmediateDiag; - std::optional PartialDiagId; - }; - void PrintStats() const; /// Warn that the stack is nearly exhausted. @@ -987,14 +742,6 @@ class Sema final { void addImplicitTypedef(StringRef Name, QualType T); - /// Emit a diagnostic. - SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID, - bool DeferHint = false); - - /// Emit a partial diagnostic. - SemaDiagnosticBuilder Diag(SourceLocation Loc, const PartialDiagnostic &PD, - bool DeferHint = false); - /// Whether uncompilable error has occurred. This includes error happens /// in deferred diagnostics. bool hasUncompilableErrorOccurred() const; @@ -1405,6 +1152,11 @@ class Sema final { /// CurContext - This is the current declaration context of parsing. DeclContext *CurContext; + SemaHLSL &HLSL() { + assert(HLSLPtr); + return *HLSLPtr; + } + SemaOpenACC &OpenACC() { assert(OpenACCPtr); return *OpenACCPtr; @@ -1440,6 +1192,7 @@ class Sema final { mutable IdentifierInfo *Ident_super; + std::unique_ptr HLSLPtr; std::unique_ptr OpenACCPtr; ///@} @@ -2272,10 +2025,10 @@ class Sema final { bool IsVariadic, FormatStringInfo *FSI); // Used by C++ template instantiation. - ExprResult SemaBuiltinShuffleVector(CallExpr *TheCall); - ExprResult SemaConvertVectorExpr(Expr *E, TypeSourceInfo *TInfo, - SourceLocation BuiltinLoc, - SourceLocation RParenLoc); + ExprResult BuiltinShuffleVector(CallExpr *TheCall); + ExprResult ConvertVectorExpr(Expr *E, TypeSourceInfo *TInfo, + SourceLocation BuiltinLoc, + SourceLocation RParenLoc); enum FormatStringType { FST_Scanf, @@ -2408,6 +2161,11 @@ class Sema final { bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, const FunctionProtoType *Proto); + bool BuiltinVectorMath(CallExpr *TheCall, QualType &Res); + bool BuiltinVectorToScalarMath(CallExpr *TheCall); + + bool CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + private: void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, const ArraySubscriptExpr *ASE = nullptr, @@ -2497,62 +2255,59 @@ class Sema final { bool CheckNVPTXBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); - bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); - bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call); - bool SemaBuiltinUnorderedCompare(CallExpr *TheCall, unsigned BuiltinID); - bool SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs, - unsigned BuiltinID); - bool SemaBuiltinComplex(CallExpr *TheCall); - bool SemaBuiltinVSX(CallExpr *TheCall); - bool SemaBuiltinOSLogFormat(CallExpr *TheCall); - bool SemaValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum); - - bool SemaBuiltinPrefetch(CallExpr *TheCall); - bool SemaBuiltinAllocaWithAlign(CallExpr *TheCall); - bool SemaBuiltinArithmeticFence(CallExpr *TheCall); - bool SemaBuiltinAssume(CallExpr *TheCall); - bool SemaBuiltinAssumeAligned(CallExpr *TheCall); - bool SemaBuiltinLongjmp(CallExpr *TheCall); - bool SemaBuiltinSetjmp(CallExpr *TheCall); - ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult); - ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult); - ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult, - AtomicExpr::AtomicOp Op); - bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, - llvm::APSInt &Result); - bool SemaBuiltinConstantArgRange(CallExpr *TheCall, int ArgNum, int Low, - int High, bool RangeIsError = true); - bool SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum, - unsigned Multiple); - bool SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum); - bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, - unsigned ArgBits); - bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum, - unsigned ArgBits); - bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, - int ArgNum, unsigned ExpectedFieldNum, - bool AllowName); - bool SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall); - bool SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID, - const char *TypeDesc); + bool BuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall); + bool BuiltinVAStartARMMicrosoft(CallExpr *Call); + bool BuiltinUnorderedCompare(CallExpr *TheCall, unsigned BuiltinID); + bool BuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs, + unsigned BuiltinID); + bool BuiltinComplex(CallExpr *TheCall); + bool BuiltinVSX(CallExpr *TheCall); + bool BuiltinOSLogFormat(CallExpr *TheCall); + bool ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum); + + bool BuiltinPrefetch(CallExpr *TheCall); + bool BuiltinAllocaWithAlign(CallExpr *TheCall); + bool BuiltinArithmeticFence(CallExpr *TheCall); + bool BuiltinAssume(CallExpr *TheCall); + bool BuiltinAssumeAligned(CallExpr *TheCall); + bool BuiltinLongjmp(CallExpr *TheCall); + bool BuiltinSetjmp(CallExpr *TheCall); + ExprResult BuiltinAtomicOverloaded(ExprResult TheCallResult); + ExprResult BuiltinNontemporalOverloaded(ExprResult TheCallResult); + ExprResult AtomicOpsOverloaded(ExprResult TheCallResult, + AtomicExpr::AtomicOp Op); + bool BuiltinConstantArg(CallExpr *TheCall, int ArgNum, llvm::APSInt &Result); + bool BuiltinConstantArgRange(CallExpr *TheCall, int ArgNum, int Low, int High, + bool RangeIsError = true); + bool BuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum, + unsigned Multiple); + bool BuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum); + bool BuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, + unsigned ArgBits); + bool BuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum, + unsigned ArgBits); + bool BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, int ArgNum, + unsigned ExpectedFieldNum, bool AllowName); + bool BuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall); + bool BuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID, + const char *TypeDesc); bool CheckPPCMMAType(QualType Type, SourceLocation TypeLoc); - bool SemaBuiltinElementwiseMath(CallExpr *TheCall); - bool SemaBuiltinElementwiseTernaryMath(CallExpr *TheCall, - bool CheckForFloatArgs = true); + bool BuiltinElementwiseMath(CallExpr *TheCall); + bool BuiltinElementwiseTernaryMath(CallExpr *TheCall, + bool CheckForFloatArgs = true); bool PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall); bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall); - bool SemaBuiltinNonDeterministicValue(CallExpr *TheCall); + bool BuiltinNonDeterministicValue(CallExpr *TheCall); // Matrix builtin handling. - ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall, - ExprResult CallResult); - ExprResult SemaBuiltinMatrixColumnMajorLoad(CallExpr *TheCall, - ExprResult CallResult); - ExprResult SemaBuiltinMatrixColumnMajorStore(CallExpr *TheCall, - ExprResult CallResult); + ExprResult BuiltinMatrixTranspose(CallExpr *TheCall, ExprResult CallResult); + ExprResult BuiltinMatrixColumnMajorLoad(CallExpr *TheCall, + ExprResult CallResult); + ExprResult BuiltinMatrixColumnMajorStore(CallExpr *TheCall, + ExprResult CallResult); // WebAssembly builtin handling. bool BuiltinWasmRefNullExtern(CallExpr *TheCall); @@ -6040,7 +5795,8 @@ class Sema final { ExprResult BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R, bool NeedsADL, - bool AcceptInvalidDecl = false); + bool AcceptInvalidDecl = false, + bool NeedUnresolved = false); ExprResult BuildDeclarationNameExpr( const CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo, NamedDecl *D, NamedDecl *FoundD = nullptr, @@ -7200,7 +6956,10 @@ class Sema final { SourceLocation RParenLoc); //// ActOnCXXThis - Parse 'this' pointer. - ExprResult ActOnCXXThis(SourceLocation loc); + ExprResult ActOnCXXThis(SourceLocation Loc); + + /// Check whether the type of 'this' is valid in the current context. + bool CheckCXXThisType(SourceLocation Loc, QualType Type); /// Build a CXXThisExpr and mark it referenced in the current context. Expr *BuildCXXThisExpr(SourceLocation Loc, QualType Type, bool IsImplicit); @@ -7603,8 +7362,8 @@ class Sema final { SourceLocation ClosingBraceLoc); private: - ExprResult SemaBuiltinOperatorNewDeleteOverloaded(ExprResult TheCallResult, - bool IsDelete); + ExprResult BuiltinOperatorNewDeleteOverloaded(ExprResult TheCallResult, + bool IsDelete); void AnalyzeDeleteExprMismatch(const CXXDeleteExpr *DE); void AnalyzeDeleteExprMismatch(FieldDecl *Field, SourceLocation DeleteLoc, @@ -10158,7 +9917,8 @@ class Sema final { /// not already done so. void DeclareImplicitDeductionGuides(TemplateDecl *Template, SourceLocation Loc); - FunctionTemplateDecl *DeclareImplicitDeductionGuideFromInitList( + + FunctionTemplateDecl *DeclareAggregateDeductionGuideFromInitList( TemplateDecl *Template, MutableArrayRef ParamTypes, SourceLocation Loc); @@ -10615,6 +10375,9 @@ class Sema final { /// We are building deduction guides for a class. BuildingDeductionGuides, + + /// We are instantiating a type alias template declaration. + TypeAliasTemplateInstantiation, } Kind; /// Was the enclosing context a non-instantiation SFINAE context? @@ -10704,6 +10467,12 @@ class Sema final { FunctionDecl *Entity, ExceptionSpecification, SourceRange InstantiationRange = SourceRange()); + /// Note that we are instantiating a type alias template declaration. + InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation, + TypeAliasTemplateDecl *Entity, + ArrayRef TemplateArgs, + SourceRange InstantiationRange = SourceRange()); + /// Note that we are instantiating a default argument in a /// template-id. InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation, @@ -13546,25 +13315,11 @@ class Sema final { ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, MultiExprArg ExecConfig, SourceLocation GGGLoc); - class DeviceDeferredDiagnostic { - public: - DeviceDeferredDiagnostic(SourceLocation SL, const PartialDiagnostic &PD, - DeviceDiagnosticReason R) - : Diagnostic(SL, PD), Reason(R) {} - PartialDiagnosticAt &getDiag() { return Diagnostic; } - DeviceDiagnosticReason getReason() const { return Reason; } - - private: - PartialDiagnosticAt Diagnostic; - DeviceDiagnosticReason Reason; - }; /// Diagnostics that are emitted only if we discover that the given function /// must be codegen'ed. Because handling these correctly adds overhead to /// compilation, this is currently only enabled for CUDA compilations. - llvm::DenseMap, - std::vector> - DeviceDeferredDiags; + SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags; /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the /// key in a hashtable, both the FD and location are hashed. @@ -13793,29 +13548,6 @@ class Sema final { // // - /// \name HLSL Constructs - /// Implementations are in SemaHLSL.cpp - ///@{ - -public: - Decl *ActOnStartHLSLBuffer(Scope *BufferScope, bool CBuffer, - SourceLocation KwLoc, IdentifierInfo *Ident, - SourceLocation IdentLoc, SourceLocation LBrace); - void ActOnFinishHLSLBuffer(Decl *Dcl, SourceLocation RBrace); - - bool CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); - - bool SemaBuiltinVectorMath(CallExpr *TheCall, QualType &Res); - bool SemaBuiltinVectorToScalarMath(CallExpr *TheCall); - - ///@} - - // - // - // ------------------------------------------------------------------------- - // - // - /// \name OpenMP Directives and Clauses /// Implementations are in SemaOpenMP.cpp ///@{ diff --git a/clang/include/clang/Sema/SemaBase.h b/clang/include/clang/Sema/SemaBase.h new file mode 100644 index 0000000000000..b62e39998228a --- /dev/null +++ b/clang/include/clang/Sema/SemaBase.h @@ -0,0 +1,292 @@ +//===--- SemaBase.h - Common utilities for semantic analysis-----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SemaBase class, which provides utilities for Sema +// and its parts like SemaOpenACC. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_SEMABASE_H +#define LLVM_CLANG_SEMA_SEMABASE_H + +#include "clang/AST/Decl.h" +#include "clang/AST/Redeclarable.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/PartialDiagnostic.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Sema/Ownership.h" +#include "llvm/ADT/DenseMap.h" +#include +#include +#include +#include + +namespace clang { + +class ASTContext; +class DiagnosticsEngine; +class LangOptions; +class Sema; + +class SemaBase { +public: + SemaBase(Sema &S); + + Sema &SemaRef; + + ASTContext &getASTContext() const; + DiagnosticsEngine &getDiagnostics() const; + const LangOptions &getLangOpts() const; + + /// Helper class that creates diagnostics with optional + /// template instantiation stacks. + /// + /// This class provides a wrapper around the basic DiagnosticBuilder + /// class that emits diagnostics. ImmediateDiagBuilder is + /// responsible for emitting the diagnostic (as DiagnosticBuilder + /// does) and, if the diagnostic comes from inside a template + /// instantiation, printing the template instantiation stack as + /// well. + class ImmediateDiagBuilder : public DiagnosticBuilder { + Sema &SemaRef; + unsigned DiagID; + + public: + ImmediateDiagBuilder(DiagnosticBuilder &DB, Sema &SemaRef, unsigned DiagID) + : DiagnosticBuilder(DB), SemaRef(SemaRef), DiagID(DiagID) {} + ImmediateDiagBuilder(DiagnosticBuilder &&DB, Sema &SemaRef, unsigned DiagID) + : DiagnosticBuilder(DB), SemaRef(SemaRef), DiagID(DiagID) {} + + // This is a cunning lie. DiagnosticBuilder actually performs move + // construction in its copy constructor (but due to varied uses, it's not + // possible to conveniently express this as actual move construction). So + // the default copy ctor here is fine, because the base class disables the + // source anyway, so the user-defined ~ImmediateDiagBuilder is a safe no-op + // in that case anwyay. + ImmediateDiagBuilder(const ImmediateDiagBuilder &) = default; + + ~ImmediateDiagBuilder(); + + /// Teach operator<< to produce an object of the correct type. + template + friend const ImmediateDiagBuilder & + operator<<(const ImmediateDiagBuilder &Diag, const T &Value) { + const DiagnosticBuilder &BaseDiag = Diag; + BaseDiag << Value; + return Diag; + } + + // It is necessary to limit this to rvalue reference to avoid calling this + // function with a bitfield lvalue argument since non-const reference to + // bitfield is not allowed. + template ::value>> + const ImmediateDiagBuilder &operator<<(T &&V) const { + const DiagnosticBuilder &BaseDiag = *this; + BaseDiag << std::move(V); + return *this; + } + }; + + /// Bitmask to contain the list of reasons a single diagnostic should be + /// emitted, based on its language. This permits multiple offload systems + /// to coexist in the same translation unit. + enum class DeviceDiagnosticReason { + /// Diagnostic doesn't apply to anything. Included for completeness, but + /// should make this a no-op. + None = 0, + /// OpenMP specific diagnostic. + OmpDevice = 1 << 0, + OmpHost = 1 << 1, + OmpAll = OmpDevice | OmpHost, + /// CUDA specific diagnostics. + CudaDevice = 1 << 2, + CudaHost = 1 << 3, + CudaAll = CudaDevice | CudaHost, + /// SYCL specific diagnostic. + Sycl = 1 << 4, + /// ESIMD specific diagnostic. + Esimd = 1 << 5, + /// A flag representing 'all'. This can be used to avoid the check + /// all-together and make this behave as it did before the + /// DiagnosticReason was added (that is, unconditionally emit). + /// Note: This needs to be updated if any flags above are added. + All = OmpAll | CudaAll | Sycl | Esimd, + + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All) + }; + +private: + // A collection of a pair of undefined functions and their callers known + // to be reachable from a routine on the device (kernel or device function). + typedef std::pair CallPair; + llvm::SmallVector UndefinedReachableFromSyclDevice; + +public: + // Helper routine to add a pair of Callee-Caller pair of FunctionDecl * + // to UndefinedReachableFromSyclDevice. + void addFDToReachableFromSyclDevice(const FunctionDecl *Callee, + const FunctionDecl *Caller) { + UndefinedReachableFromSyclDevice.push_back(std::make_pair(Callee, Caller)); + } + // Helper routine to check if a pair of Callee-Caller FunctionDecl * + // is in UndefinedReachableFromSyclDevice. + bool isFDReachableFromSyclDevice(const FunctionDecl *Callee, + const FunctionDecl *Caller) { + return llvm::any_of(UndefinedReachableFromSyclDevice, + [Callee, Caller](const CallPair &P) { + return P.first == Callee && P.second == Caller; + }); + } + + class DeviceDeferredDiagnostic { + public: + DeviceDeferredDiagnostic(SourceLocation SL, const PartialDiagnostic &PD, + DeviceDiagnosticReason R) + : Diagnostic(SL, PD), Reason(R) {} + + PartialDiagnosticAt &getDiag() { return Diagnostic; } + DeviceDiagnosticReason getReason() const { return Reason; } + + private: + PartialDiagnosticAt Diagnostic; + DeviceDiagnosticReason Reason; + }; + + /// A generic diagnostic builder for errors which may or may not be deferred. + /// + /// In CUDA, there exist constructs (e.g. variable-length arrays, try/catch) + /// which are not allowed to appear inside __device__ functions and are + /// allowed to appear in __host__ __device__ functions only if the host+device + /// function is never codegen'ed. + /// + /// To handle this, we use the notion of "deferred diagnostics", where we + /// attach a diagnostic to a FunctionDecl that's emitted iff it's codegen'ed. + /// + /// This class lets you emit either a regular diagnostic, a deferred + /// diagnostic, or no diagnostic at all, according to an argument you pass to + /// its constructor, thus simplifying the process of creating these "maybe + /// deferred" diagnostics. + class SemaDiagnosticBuilder { + public: + enum Kind { + /// Emit no diagnostics. + K_Nop, + /// Emit the diagnostic immediately (i.e., behave like Sema::Diag()). + K_Immediate, + /// Emit the diagnostic immediately, and, if it's a warning or error, also + /// emit a call stack showing how this function can be reached by an a + /// priori known-emitted function. + K_ImmediateWithCallStack, + /// Create a deferred diagnostic, which is emitted only if the function + /// it's attached to is codegen'ed. Also emit a call stack as with + /// K_ImmediateWithCallStack. + K_Deferred + }; + + SemaDiagnosticBuilder(Kind K, SourceLocation Loc, unsigned DiagID, + const FunctionDecl *Fn, Sema &S, DeviceDiagnosticReason R); + SemaDiagnosticBuilder(SemaDiagnosticBuilder &&D); + SemaDiagnosticBuilder(const SemaDiagnosticBuilder &) = default; + + // The copy and move assignment operator is defined as deleted pending + // further motivation. + SemaDiagnosticBuilder &operator=(const SemaDiagnosticBuilder &) = delete; + SemaDiagnosticBuilder &operator=(SemaDiagnosticBuilder &&) = delete; + + ~SemaDiagnosticBuilder(); + + bool isImmediate() const { return ImmediateDiag.has_value(); } + + /// Convertible to bool: True if we immediately emitted an error, false if + /// we didn't emit an error or we created a deferred error. + /// + /// Example usage: + /// + /// if (SemaDiagnosticBuilder(...) << foo << bar) + /// return ExprError(); + /// + /// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably + /// want to use these instead of creating a SemaDiagnosticBuilder yourself. + operator bool() const { return isImmediate(); } + + template + friend const SemaDiagnosticBuilder & + operator<<(const SemaDiagnosticBuilder &Diag, const T &Value) { + if (Diag.ImmediateDiag) + *Diag.ImmediateDiag << Value; + else if (Diag.PartialDiagId) + Diag.getDeviceDeferredDiags()[Diag.Fn][*Diag.PartialDiagId] + .getDiag() + .second + << Value; + return Diag; + } + + // It is necessary to limit this to rvalue reference to avoid calling this + // function with a bitfield lvalue argument since non-const reference to + // bitfield is not allowed. + template ::value>> + const SemaDiagnosticBuilder &operator<<(T &&V) const { + if (ImmediateDiag) + *ImmediateDiag << std::move(V); + else if (PartialDiagId) + getDeviceDeferredDiags()[Fn][*PartialDiagId].getDiag().second + << std::move(V); + return *this; + } + + friend const SemaDiagnosticBuilder & + operator<<(const SemaDiagnosticBuilder &Diag, const PartialDiagnostic &PD); + + void AddFixItHint(const FixItHint &Hint) const; + + friend ExprResult ExprError(const SemaDiagnosticBuilder &) { + return ExprError(); + } + friend StmtResult StmtError(const SemaDiagnosticBuilder &) { + return StmtError(); + } + operator ExprResult() const { return ExprError(); } + operator StmtResult() const { return StmtError(); } + operator TypeResult() const { return TypeError(); } + operator DeclResult() const { return DeclResult(true); } + operator MemInitResult() const { return MemInitResult(true); } + + using DeferredDiagnosticsType = + llvm::DenseMap, + std::vector>; + + private: + Sema &S; + SourceLocation Loc; + unsigned DiagID; + const FunctionDecl *Fn; + bool ShowCallStack; + + // Invariant: At most one of these Optionals has a value. + // FIXME: Switch these to a Variant once that exists. + std::optional ImmediateDiag; + std::optional PartialDiagId; + + DeferredDiagnosticsType &getDeviceDeferredDiags() const; + }; + + /// Emit a diagnostic. + SemaDiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID, + bool DeferHint = false); + + /// Emit a partial diagnostic. + SemaDiagnosticBuilder Diag(SourceLocation Loc, const PartialDiagnostic &PD, + bool DeferHint = false); +}; + +} // namespace clang + +#endif diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h new file mode 100644 index 0000000000000..acc675963c23a --- /dev/null +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -0,0 +1,37 @@ +//===----- SemaHLSL.h ----- Semantic Analysis for HLSL constructs ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares semantic analysis for HLSL constructs. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_SEMAHLSL_H +#define LLVM_CLANG_SEMA_SEMAHLSL_H + +#include "clang/AST/DeclBase.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Sema/Scope.h" +#include "clang/Sema/SemaBase.h" + +namespace clang { + +class SemaHLSL : public SemaBase { +public: + SemaHLSL(Sema &S); + + Decl *ActOnStartHLSLBuffer(Scope *BufferScope, bool CBuffer, + SourceLocation KwLoc, IdentifierInfo *Ident, + SourceLocation IdentLoc, SourceLocation LBrace); + void ActOnFinishHLSLBuffer(Decl *Dcl, SourceLocation RBrace); +}; + +} // namespace clang + +#endif // LLVM_CLANG_SEMA_SEMAHLSL_H diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 7f50d7889ad79..45929e4a9db3f 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -18,26 +18,49 @@ #include "clang/Basic/OpenACCKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Sema/Ownership.h" +#include "clang/Sema/SemaBase.h" namespace clang { +class OpenACCClause; -class ASTContext; -class DiagnosticEngine; -class LangOptions; -class Sema; - -class SemaOpenACC { +class SemaOpenACC : public SemaBase { public: - SemaOpenACC(Sema &S); + /// A type to represent all the data for an OpenACC Clause that has been + /// parsed, but not yet created/semantically analyzed. This is effectively a + /// discriminated union on the 'Clause Kind', with all of the individual + /// clause details stored in a std::variant. + class OpenACCParsedClause { + OpenACCDirectiveKind DirKind; + OpenACCClauseKind ClauseKind; + SourceRange ClauseRange; + SourceLocation LParenLoc; + + // TODO OpenACC: Add variant here to store details of individual clauses. + + public: + OpenACCParsedClause(OpenACCDirectiveKind DirKind, + OpenACCClauseKind ClauseKind, SourceLocation BeginLoc) + : DirKind(DirKind), ClauseKind(ClauseKind), ClauseRange(BeginLoc, {}) {} + + OpenACCDirectiveKind getDirectiveKind() const { return DirKind; } + + OpenACCClauseKind getClauseKind() const { return ClauseKind; } - ASTContext &getASTContext() const; - DiagnosticsEngine &getDiagnostics() const; - const LangOptions &getLangOpts() const; + SourceLocation getBeginLoc() const { return ClauseRange.getBegin(); } - Sema &SemaRef; + SourceLocation getLParenLoc() const { return LParenLoc; } + + SourceLocation getEndLoc() const { return ClauseRange.getEnd(); } + + void setLParenLoc(SourceLocation EndLoc) { LParenLoc = EndLoc; } + void setEndLoc(SourceLocation EndLoc) { ClauseRange.setEnd(EndLoc); } + }; + + SemaOpenACC(Sema &S); /// Called after parsing an OpenACC Clause so that it can be checked. - bool ActOnClause(OpenACCClauseKind ClauseKind, SourceLocation StartLoc); + OpenACCClause *ActOnClause(ArrayRef ExistingClauses, + OpenACCParsedClause &Clause); /// Called after the construct has been parsed, but clauses haven't been /// parsed. This allows us to diagnose not-implemented, as well as set up any @@ -63,7 +86,10 @@ class SemaOpenACC { /// declaration group or associated statement. StmtResult ActOnEndStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc, - SourceLocation EndLoc, StmtResult AssocStmt); + SourceLocation EndLoc, + ArrayRef Clauses, + StmtResult AssocStmt); + /// Called after the directive has been completely parsed, including the /// declaration group or associated statement. DeclGroupRef ActOnEndDeclDirective(); diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 370d8037a4da1..1911252b34cd1 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1089,27 +1089,13 @@ class ASTReader /// the last time we loaded information about this identifier. llvm::DenseMap IdentifierGeneration; - class InterestingDecl { - Decl *D; - bool DeclHasPendingBody; - - public: - InterestingDecl(Decl *D, bool HasBody) - : D(D), DeclHasPendingBody(HasBody) {} - - Decl *getDecl() { return D; } - - /// Whether the declaration has a pending body. - bool hasPendingBody() { return DeclHasPendingBody; } - }; - /// Contains declarations and definitions that could be /// "interesting" to the ASTConsumer, when we get that AST consumer. /// /// "Interesting" declarations are those that have data that may /// need to be emitted, such as inline function definitions or /// Objective-C protocols. - std::deque PotentiallyInterestingDecls; + std::deque PotentiallyInterestingDecls; /// The list of deduced function types that we have not yet read, because /// they might contain a deduced return type that refers to a local type diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h index 5d3e95cb5d630..7dd1140106e47 100644 --- a/clang/include/clang/Serialization/ASTRecordReader.h +++ b/clang/include/clang/Serialization/ASTRecordReader.h @@ -24,6 +24,7 @@ #include "llvm/ADT/APSInt.h" namespace clang { +class OpenACCClause; class OMPTraitInfo; class OMPChildren; @@ -278,6 +279,12 @@ class ASTRecordReader /// Read an OpenMP children, advancing Idx. void readOMPChildren(OMPChildren *Data); + /// Read an OpenACC clause, advancing Idx. + OpenACCClause *readOpenACCClause(); + + /// Read a list of OpenACC clauses into the passed SmallVector. + void readOpenACCClauseList(MutableArrayRef Clauses); + /// Read a source location, advancing Idx. SourceLocation readSourceLocation(LocSeq *Seq = nullptr) { return Reader->ReadSourceLocation(*F, Record, Idx, Seq); diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h index e007d4a70843a..1feb8fcbacf77 100644 --- a/clang/include/clang/Serialization/ASTRecordWriter.h +++ b/clang/include/clang/Serialization/ASTRecordWriter.h @@ -21,6 +21,7 @@ namespace clang { +class OpenACCClause; class TypeLoc; /// An object for streaming information to a record. @@ -292,6 +293,12 @@ class ASTRecordWriter /// Writes data related to the OpenMP directives. void writeOMPChildren(OMPChildren *Data); + /// Writes out a single OpenACC Clause. + void writeOpenACCClause(const OpenACCClause *C); + + /// Writes out a list of OpenACC clauses. + void writeOpenACCClauseList(ArrayRef Clauses); + /// Emit a string. void AddString(StringRef Str) { return Writer->AddString(Str, *Record); diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 5fe5c9286dabb..9aa1c6ddfe449 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -604,6 +604,15 @@ def PthreadLockChecker : Checker<"PthreadLock">, def StreamChecker : Checker<"Stream">, HelpText<"Check stream handling functions">, WeakDependencies<[NonNullParamChecker]>, + CheckerOptions<[ + CmdLineOption + ]>, Documentation; def SimpleStreamChecker : Checker<"SimpleStream">, diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h index d9b3d9352d322..cc3d93aabafda 100644 --- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h +++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h @@ -374,6 +374,7 @@ bool trackExpressionValue(const ExplodedNode *N, const Expr *E, /// from. /// /// \param V We're searching for the store where \c R received this value. +/// It may be either defined or undefined, but should not be unknown. /// \param R The region we're tracking. /// \param Opts Tracking options specifying how we want to track the value. /// \param Origin Only adds notes when the last store happened in a @@ -383,7 +384,7 @@ bool trackExpressionValue(const ExplodedNode *N, const Expr *E, /// changes to its value in a nested stackframe could be pruned, and /// this visitor can prevent that without polluting the bugpath too /// much. -void trackStoredValue(KnownSVal V, const MemRegion *R, +void trackStoredValue(SVal V, const MemRegion *R, PathSensitiveBugReport &Report, TrackingOptions Opts = {}, const StackFrameContext *Origin = nullptr); diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h index b4e1636130ca7..ccfe8d47c290b 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h @@ -32,19 +32,22 @@ namespace ento { class CallDescription { public: enum class Mode { - /// Match calls to functions from the C standard library. On some platforms - /// some functions may be implemented as macros that expand to calls to - /// built-in variants of the given functions, so in this mode we use some - /// heuristics to recognize these implementation-defined variants: - /// - We also accept calls where the name is derived from the specified - /// name by adding "__builtin" or similar prefixes/suffixes. - /// - We also accept calls where the number of arguments or parameters is - /// greater than the specified value. + /// Match calls to functions from the C standard library. This also + /// recognizes builtin variants whose name is derived by adding + /// "__builtin", "__inline" or similar prefixes or suffixes; but only + /// matches functions than are externally visible and are declared either + /// directly within a TU or in the namespace 'std'. /// For the exact heuristics, see CheckerContext::isCLibraryFunction(). - /// (This mode only matches functions that are declared either directly - /// within a TU or in the namespace `std`.) CLibrary, + /// An extended version of the `CLibrary` mode that also matches the + /// hardened variants like __FOO_chk() and __builtin__FOO_chk() that take + /// additional arguments compared to the "regular" function FOO(). + /// This is not the default behavior of `CLibrary` because in this case the + /// checker code must be prepared to handle the different parametrization. + /// For the exact heuristics, see CheckerContext::isHardenedVariantOf(). + CLibraryMaybeHardened, + /// Matches "simple" functions that are not methods. (Static methods are /// methods.) SimpleFunc, @@ -187,6 +190,9 @@ class CallDescription { private: bool matchesImpl(const FunctionDecl *Callee, size_t ArgCount, size_t ParamCount) const; + + bool matchNameOnly(const NamedDecl *ND) const; + bool matchQualifiedNameParts(const Decl *D) const; }; /// An immutable map from CallDescriptions to arbitrary data. Provides a unified diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h index 9923c41e6ad2d..0365f9e41312d 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h @@ -366,19 +366,31 @@ class CheckerContext { return getCalleeName(FunDecl); } - /// Returns true if the callee is an externally-visible function in the - /// top-level namespace, such as \c malloc. + /// Returns true if the given function is an externally-visible function in + /// the top-level namespace, such as \c malloc. /// /// If a name is provided, the function must additionally match the given /// name. /// - /// Note that this deliberately excludes C++ library functions in the \c std - /// namespace, but will include C library functions accessed through the - /// \c std namespace. This also does not check if the function is declared - /// as 'extern "C"', or if it uses C++ name mangling. + /// Note that this also accepts functions from the \c std namespace (because + /// headers like declare them there) and does not check if the + /// function is declared as 'extern "C"' or if it uses C++ name mangling. static bool isCLibraryFunction(const FunctionDecl *FD, StringRef Name = StringRef()); + /// In builds that use source hardening (-D_FORTIFY_SOURCE), many standard + /// functions are implemented as macros that expand to calls of hardened + /// functions that take additional arguments compared to the "usual" + /// variant and perform additional input validation. For example, a `memcpy` + /// call may expand to `__memcpy_chk()` or `__builtin___memcpy_chk()`. + /// + /// This method returns true if `FD` declares a fortified variant of the + /// standard library function `Name`. + /// + /// NOTE: This method relies on heuristics; extend it if you need to handle a + /// hardened variant that's not yet covered by it. + static bool isHardenedVariantOf(const FunctionDecl *FD, StringRef Name); + /// Depending on wither the location corresponds to a macro, return /// either the macro name or the token spelling. /// diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index c60528b7685fe..3a4b087257149 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -232,14 +232,6 @@ class DefinedSVal : public DefinedOrUnknownSVal { : DefinedOrUnknownSVal(Kind, Data) {} }; -/// Represents an SVal that is guaranteed to not be UnknownVal. -class KnownSVal : public SVal { -public: - /*implicit*/ KnownSVal(DefinedSVal V) : SVal(V) {} - /*implicit*/ KnownSVal(UndefinedVal V) : SVal(V) {} - static bool classof(SVal V) { return !V.isUnknown(); } -}; - class NonLoc : public DefinedSVal { protected: NonLoc(SValKind Kind, const void *Data) : DefinedSVal(Kind, Data) {} diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index a7cf2532fe5d1..b25b9bc9b1140 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -4547,6 +4547,10 @@ ExpectedDecl ASTNodeImporter::VisitVarDecl(VarDecl *D) { ToVar->setQualifierInfo(ToQualifierLoc); ToVar->setAccess(D->getAccess()); ToVar->setLexicalDeclContext(LexicalDC); + if (D->isInlineSpecified()) + ToVar->setInlineSpecified(); + if (D->isInline()) + ToVar->setImplicitlyInline(); if (FoundByLookup) { auto *Recent = const_cast(FoundByLookup->getMostRecentDecl()); diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index 3fba052d916c9..3faefb54f599f 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -98,6 +98,7 @@ add_clang_library(clangAST NSAPI.cpp ODRDiagsEmitter.cpp ODRHash.cpp + OpenACCClause.cpp OpenMPClause.cpp OSLog.cpp ParentMap.cpp diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index 1b1dbcb90fb1b..3a94fc7eb9b28 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -310,6 +310,16 @@ ExprDependence clang::computeDependence(CXXThisExpr *E) { // 'this' is type-dependent if the class type of the enclosing // member function is dependent (C++ [temp.dep.expr]p2) auto D = toExprDependenceForImpliedType(E->getType()->getDependence()); + + // If a lambda with an explicit object parameter captures '*this', then + // 'this' now refers to the captured copy of lambda, and if the lambda + // is type-dependent, so is the object and thus 'this'. + // + // Note: The standard does not mention this case explicitly, but we need + // to do this so we can mark NSDM accesses as dependent. + if (E->isCapturedByCopyInLambdaWithExplicitObjectParameter()) + D |= ExprDependence::Type; + assert(!(D & ExprDependence::UnexpandedPack)); return D; } diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index c51fc6ef60466..fedf03830168f 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -21,6 +21,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/PrettyPrinter.h" #include "clang/Basic/Module.h" +#include "clang/Basic/SourceManager.h" #include "llvm/Support/raw_ostream.h" using namespace clang; @@ -49,18 +50,6 @@ namespace { void PrintObjCTypeParams(ObjCTypeParamList *Params); - enum class AttrPrintLoc { - None = 0, - Left = 1, - Right = 2, - Any = Left | Right, - - LLVM_MARK_AS_BITMASK_ENUM(/*DefaultValue=*/Any) - }; - - void prettyPrintAttributes(Decl *D, raw_ostream &out, - AttrPrintLoc loc = AttrPrintLoc::Any); - public: DeclPrinter(raw_ostream &Out, const PrintingPolicy &Policy, const ASTContext &Context, unsigned Indentation = 0, @@ -129,11 +118,10 @@ namespace { const TemplateParameterList *Params); void printTemplateArguments(llvm::ArrayRef Args, const TemplateParameterList *Params); - - inline void prettyPrintAttributes(Decl *D) { - prettyPrintAttributes(D, Out); - } - + enum class AttrPosAsWritten { Default = 0, Left, Right }; + void + prettyPrintAttributes(const Decl *D, + AttrPosAsWritten Pos = AttrPosAsWritten::Default); void prettyPrintPragmas(Decl *D); void printDeclType(QualType T, StringRef DeclName, bool Pack = false); }; @@ -250,87 +238,48 @@ raw_ostream& DeclPrinter::Indent(unsigned Indentation) { return Out; } -// For CLANG_ATTR_LIST_CanPrintOnLeft macro. -#include "clang/Basic/AttrLeftSideCanPrintList.inc" +static DeclPrinter::AttrPosAsWritten getPosAsWritten(const Attr *A, + const Decl *D) { + SourceLocation ALoc = A->getLoc(); + SourceLocation DLoc = D->getLocation(); + const ASTContext &C = D->getASTContext(); + if (ALoc.isInvalid() || DLoc.isInvalid()) + return DeclPrinter::AttrPosAsWritten::Left; -// For CLANG_ATTR_LIST_PrintOnLeft macro. -#include "clang/Basic/AttrLeftSideMustPrintList.inc" + if (C.getSourceManager().isBeforeInTranslationUnit(ALoc, DLoc)) + return DeclPrinter::AttrPosAsWritten::Left; -static bool canPrintOnLeftSide(attr::Kind kind) { -#ifdef CLANG_ATTR_LIST_CanPrintOnLeft - switch (kind) { - CLANG_ATTR_LIST_CanPrintOnLeft - return true; - default: - return false; - } -#else - return false; -#endif -} - -static bool canPrintOnLeftSide(const Attr *A) { - if (A->isStandardAttributeSyntax()) - return false; - - return canPrintOnLeftSide(A->getKind()); + return DeclPrinter::AttrPosAsWritten::Right; } -static bool mustPrintOnLeftSide(attr::Kind kind) { -#ifdef CLANG_ATTR_LIST_PrintOnLeft - switch (kind) { - CLANG_ATTR_LIST_PrintOnLeft - return true; - default: - return false; - } -#else - return false; -#endif -} - -static bool mustPrintOnLeftSide(const Attr *A) { - if (A->isDeclspecAttribute()) - return true; - - return mustPrintOnLeftSide(A->getKind()); -} - -void DeclPrinter::prettyPrintAttributes(Decl *D, llvm::raw_ostream &Out, - AttrPrintLoc Loc) { +void DeclPrinter::prettyPrintAttributes(const Decl *D, + AttrPosAsWritten Pos /*=Default*/) { if (Policy.PolishForDeclaration) return; if (D->hasAttrs()) { - AttrVec &Attrs = D->getAttrs(); + const AttrVec &Attrs = D->getAttrs(); for (auto *A : Attrs) { if (A->isInherited() || A->isImplicit()) continue; - - AttrPrintLoc AttrLoc = AttrPrintLoc::Right; - if (mustPrintOnLeftSide(A)) { - // If we must always print on left side (e.g. declspec), then mark as - // so. - AttrLoc = AttrPrintLoc::Left; - } else if (canPrintOnLeftSide(A)) { - // For functions with body defined we print the attributes on the left - // side so that GCC accept our dumps as well. - if (const FunctionDecl *FD = dyn_cast(D); - FD && FD->isThisDeclarationADefinition()) - // In case Decl is a function with a body, then attrs should be print - // on the left side. - AttrLoc = AttrPrintLoc::Left; - - // In case it is a variable declaration with a ctor, then allow - // printing on the left side for readbility. - else if (const VarDecl *VD = dyn_cast(D); - VD && VD->getInit() && - VD->getInitStyle() == VarDecl::CallInit) - AttrLoc = AttrPrintLoc::Left; + switch (A->getKind()) { +#define ATTR(X) +#define PRAGMA_SPELLING_ATTR(X) case attr::X: +#include "clang/Basic/AttrList.inc" + break; + default: + AttrPosAsWritten APos = getPosAsWritten(A, D); + assert(APos != AttrPosAsWritten::Default && + "Default not a valid for an attribute location"); + if (Pos == AttrPosAsWritten::Default || Pos == APos) { + if (Pos != AttrPosAsWritten::Left) + Out << ' '; + A->printPretty(Out, Policy); + if (Pos == AttrPosAsWritten::Left) + Out << ' '; + } + break; } - // Only print the side matches the user requested. - if ((Loc & AttrLoc) != AttrPrintLoc::None) - A->printPretty(Out, Policy); } } } @@ -697,8 +646,10 @@ static void MaybePrintTagKeywordIfSupressingScopes(PrintingPolicy &Policy, void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { if (!D->getDescribedFunctionTemplate() && - !D->isFunctionTemplateSpecialization()) + !D->isFunctionTemplateSpecialization()) { prettyPrintPragmas(D); + prettyPrintAttributes(D, AttrPosAsWritten::Left); + } if (D->isFunctionTemplateSpecialization()) Out << "template<> "; @@ -708,22 +659,6 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { printTemplateParameters(D->getTemplateParameterList(I)); } - std::string LeftsideAttrs; - llvm::raw_string_ostream LSAS(LeftsideAttrs); - - prettyPrintAttributes(D, LSAS, AttrPrintLoc::Left); - - // prettyPrintAttributes print a space on left side of the attribute. - if (LeftsideAttrs[0] == ' ') { - // Skip the space prettyPrintAttributes generated. - LeftsideAttrs.erase(0, LeftsideAttrs.find_first_not_of(' ')); - - // Add a single space between the attribute and the Decl name. - LSAS << ' '; - } - - Out << LeftsideAttrs; - CXXConstructorDecl *CDecl = dyn_cast(D); CXXConversionDecl *ConversionDecl = dyn_cast(D); CXXDeductionGuideDecl *GuideDecl = dyn_cast(D); @@ -889,7 +824,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) { Ty.print(Out, Policy, Proto); } - prettyPrintAttributes(D, Out, AttrPrintLoc::Right); + prettyPrintAttributes(D, AttrPosAsWritten::Right); if (D->isPureVirtual()) Out << " = 0"; @@ -982,27 +917,12 @@ void DeclPrinter::VisitLabelDecl(LabelDecl *D) { void DeclPrinter::VisitVarDecl(VarDecl *D) { prettyPrintPragmas(D); + prettyPrintAttributes(D, AttrPosAsWritten::Left); + if (const auto *Param = dyn_cast(D); Param && Param->isExplicitObjectParameter()) Out << "this "; - std::string LeftSide; - llvm::raw_string_ostream LeftSideStream(LeftSide); - - // Print attributes that should be placed on the left, such as __declspec. - prettyPrintAttributes(D, LeftSideStream, AttrPrintLoc::Left); - - // prettyPrintAttributes print a space on left side of the attribute. - if (LeftSide[0] == ' ') { - // Skip the space prettyPrintAttributes generated. - LeftSide.erase(0, LeftSide.find_first_not_of(' ')); - - // Add a single space between the attribute and the Decl name. - LeftSideStream << ' '; - } - - Out << LeftSide; - QualType T = D->getTypeSourceInfo() ? D->getTypeSourceInfo()->getType() : D->getASTContext().getUnqualifiedObjCPointerType(D->getType()); @@ -1035,21 +955,16 @@ void DeclPrinter::VisitVarDecl(VarDecl *D) { } } - StringRef Name; - - Name = (isa(D) && Policy.CleanUglifiedParameters && - D->getIdentifier()) - ? D->getIdentifier()->deuglifiedName() - : D->getName(); - if (!Policy.SuppressTagKeyword && Policy.SuppressScope && !Policy.SuppressUnwrittenScope) MaybePrintTagKeywordIfSupressingScopes(Policy, T, Out); - printDeclType(T, Name); - // Print the attributes that should be placed right before the end of the - // decl. - prettyPrintAttributes(D, Out, AttrPrintLoc::Right); + printDeclType(T, (isa(D) && Policy.CleanUglifiedParameters && + D->getIdentifier()) + ? D->getIdentifier()->deuglifiedName() + : D->getName()); + + prettyPrintAttributes(D, AttrPosAsWritten::Right); Expr *Init = D->getInit(); if (!Policy.SuppressInitializers && Init) { diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp index 46182809810bc..84bacd457c85b 100644 --- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp @@ -173,10 +173,18 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { return this->emitCastFloatingIntegral(*ToT, CE); } - case CK_NullToPointer: + case CK_NullToPointer: { if (DiscardResult) return true; - return this->emitNull(classifyPrim(CE->getType()), CE); + + const Descriptor *Desc = nullptr; + const QualType PointeeType = CE->getType()->getPointeeType(); + if (!PointeeType.isNull()) { + if (std::optional T = classify(PointeeType)) + Desc = P.createDescriptor(SubExpr, *T); + } + return this->emitNull(classifyPrim(CE->getType()), Desc, CE); + } case CK_PointerToIntegral: { if (DiscardResult) @@ -199,6 +207,41 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { return true; } + case CK_IntegralToPointer: { + QualType IntType = SubExpr->getType(); + assert(IntType->isIntegralOrEnumerationType()); + if (!this->visit(SubExpr)) + return false; + // FIXME: I think the discard is wrong since the int->ptr cast might cause a + // diagnostic. + PrimType T = classifyPrim(IntType); + if (DiscardResult) + return this->emitPop(T, CE); + + QualType PtrType = CE->getType(); + assert(PtrType->isPointerType()); + + const Descriptor *Desc; + if (std::optional T = classify(PtrType->getPointeeType())) + Desc = P.createDescriptor(SubExpr, *T); + else if (PtrType->getPointeeType()->isVoidType()) + Desc = nullptr; + else + Desc = P.createDescriptor(CE, PtrType->getPointeeType().getTypePtr(), + Descriptor::InlineDescMD, true, false, + /*IsMutable=*/false, nullptr); + + if (!this->emitGetIntPtr(T, Desc, CE)) + return false; + + PrimType DestPtrT = classifyPrim(PtrType); + if (DestPtrT == PT_Ptr) + return true; + + // In case we're converting the integer to a non-Pointer. + return this->emitDecayPtr(PT_Ptr, DestPtrT, CE); + } + case CK_AtomicToNonAtomic: case CK_ConstructorConversion: case CK_FunctionToPointerDecay: @@ -207,13 +250,31 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { case CK_UserDefinedConversion: return this->delegate(SubExpr); - case CK_BitCast: + case CK_BitCast: { + // Reject bitcasts to atomic types. if (CE->getType()->isAtomicType()) { if (!this->discard(SubExpr)) return false; return this->emitInvalidCast(CastKind::Reinterpret, CE); } - return this->delegate(SubExpr); + + if (DiscardResult) + return this->discard(SubExpr); + + std::optional FromT = classify(SubExpr->getType()); + std::optional ToT = classifyPrim(CE->getType()); + if (!FromT || !ToT) + return false; + + assert(isPtrType(*FromT)); + assert(isPtrType(*ToT)); + if (FromT == ToT) + return this->delegate(SubExpr); + + if (!this->visit(SubExpr)) + return false; + return this->emitDecayPtr(*FromT, *ToT, CE); + } case CK_IntegralToBoolean: case CK_IntegralCast: { @@ -245,7 +306,7 @@ bool ByteCodeExprGen::VisitCastExpr(const CastExpr *CE) { if (!this->visit(SubExpr)) return false; - if (!this->emitNull(PtrT, CE)) + if (!this->emitNull(PtrT, nullptr, CE)) return false; return this->emitNE(PtrT, CE); @@ -455,7 +516,7 @@ bool ByteCodeExprGen::VisitBinaryOperator(const BinaryOperator *BO) { // Pointer arithmetic special case. if (BO->getOpcode() == BO_Add || BO->getOpcode() == BO_Sub) { - if (T == PT_Ptr || (LT == PT_Ptr && RT == PT_Ptr)) + if (isPtrType(*T) || (isPtrType(*LT) && isPtrType(*RT))) return this->VisitPointerArithBinOp(BO); } @@ -1033,6 +1094,34 @@ bool ByteCodeExprGen::VisitInitListExpr(const InitListExpr *E) { return true; } + if (const auto *VecT = E->getType()->getAs()) { + unsigned NumVecElements = VecT->getNumElements(); + assert(NumVecElements >= E->getNumInits()); + + QualType ElemQT = VecT->getElementType(); + PrimType ElemT = classifyPrim(ElemQT); + + // All initializer elements. + unsigned InitIndex = 0; + for (const Expr *Init : E->inits()) { + if (!this->visit(Init)) + return false; + + if (!this->emitInitElem(ElemT, InitIndex, E)) + return false; + ++InitIndex; + } + + // Fill the rest with zeroes. + for (; InitIndex != NumVecElements; ++InitIndex) { + if (!this->visitZeroInitializer(ElemT, ElemQT, E)) + return false; + if (!this->emitInitElem(ElemT, InitIndex, E)) + return false; + } + return true; + } + return false; } @@ -1084,6 +1173,9 @@ static CharUnits AlignOfType(QualType T, const ASTContext &ASTCtx, if (const auto *Ref = T->getAs()) T = Ref->getPointeeType(); + if (T.getQualifiers().hasUnaligned()) + return CharUnits::One(); + // __alignof is defined to return the preferred alignment. // Before 8, clang returned the preferred alignment for alignof and // _Alignof as well. @@ -2323,7 +2415,7 @@ bool ByteCodeExprGen::visitBool(const Expr *E) { // Convert pointers to bool. if (T == PT_Ptr || T == PT_FnPtr) { - if (!this->emitNull(*T, E)) + if (!this->emitNull(*T, nullptr, E)) return false; return this->emitNE(*T, E); } @@ -2363,9 +2455,9 @@ bool ByteCodeExprGen::visitZeroInitializer(PrimType T, QualType QT, case PT_IntAPS: return this->emitZeroIntAPS(Ctx.getBitWidth(QT), E); case PT_Ptr: - return this->emitNullPtr(E); + return this->emitNullPtr(nullptr, E); case PT_FnPtr: - return this->emitNullFnPtr(E); + return this->emitNullFnPtr(nullptr, E); case PT_Float: { return this->emitConstFloat(APFloat::getZero(Ctx.getFloatSemantics(QT)), E); } @@ -2511,6 +2603,7 @@ unsigned ByteCodeExprGen::allocateLocalPrimitive(DeclTy &&Src, dyn_cast_if_present(Src.dyn_cast())) { assert(!P.getGlobal(VD)); assert(!Locals.contains(VD)); + (void)VD; } // FIXME: There are cases where Src.is() is wrong, e.g. @@ -2948,7 +3041,7 @@ bool ByteCodeExprGen::VisitCXXNullPtrLiteralExpr( if (DiscardResult) return true; - return this->emitNullPtr(E); + return this->emitNullPtr(nullptr, E); } template diff --git a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp index 675063e748988..55a06f37a0c3d 100644 --- a/clang/lib/AST/Interp/ByteCodeStmtGen.cpp +++ b/clang/lib/AST/Interp/ByteCodeStmtGen.cpp @@ -110,7 +110,7 @@ bool ByteCodeStmtGen::emitLambdaStaticInvokerBody( // one here, and we don't need one either because the lambda cannot have // any captures, as verified above. Emit a null pointer. This is then // special-cased when interpreting to not emit any misleading diagnostics. - if (!this->emitNullPtr(MD)) + if (!this->emitNullPtr(nullptr, MD)) return false; // Forward all arguments from the static invoker to the lambda call operator. diff --git a/clang/lib/AST/Interp/Context.cpp b/clang/lib/AST/Interp/Context.cpp index 15a9d46880e95..274178837bf04 100644 --- a/clang/lib/AST/Interp/Context.cpp +++ b/clang/lib/AST/Interp/Context.cpp @@ -120,7 +120,8 @@ std::optional Context::classify(QualType T) const { if (T->isBooleanType()) return PT_Bool; - if (T->isAnyComplexType()) + // We map these to primitive arrays. + if (T->isAnyComplexType() || T->isVectorType()) return std::nullopt; if (T->isSignedIntegerOrEnumerationType()) { diff --git a/clang/lib/AST/Interp/Descriptor.h b/clang/lib/AST/Interp/Descriptor.h index 4e257361ad146..c386fc8ac7b09 100644 --- a/clang/lib/AST/Interp/Descriptor.h +++ b/clang/lib/AST/Interp/Descriptor.h @@ -168,6 +168,7 @@ struct Descriptor final { const Decl *asDecl() const { return Source.dyn_cast(); } const Expr *asExpr() const { return Source.dyn_cast(); } + const DeclTy &getSource() const { return Source; } const ValueDecl *asValueDecl() const { return dyn_cast_if_present(asDecl()); diff --git a/clang/lib/AST/Interp/Disasm.cpp b/clang/lib/AST/Interp/Disasm.cpp index 01ef1c24744a5..022b394e58e64 100644 --- a/clang/lib/AST/Interp/Disasm.cpp +++ b/clang/lib/AST/Interp/Disasm.cpp @@ -233,3 +233,34 @@ LLVM_DUMP_METHOD void InterpFrame::dump(llvm::raw_ostream &OS, F = F->Caller; } } + +LLVM_DUMP_METHOD void Record::dump(llvm::raw_ostream &OS, unsigned Indentation, + unsigned Offset) const { + unsigned Indent = Indentation * 2; + OS.indent(Indent); + { + ColorScope SC(OS, true, {llvm::raw_ostream::BLUE, true}); + OS << getName() << "\n"; + } + + unsigned I = 0; + for (const Record::Base &B : bases()) { + OS.indent(Indent) << "- Base " << I << ". Offset " << (Offset + B.Offset) + << "\n"; + B.R->dump(OS, Indentation + 1, Offset + B.Offset); + ++I; + } + + // FIXME: Virtual bases. + + I = 0; + for (const Record::Field &F : fields()) { + OS.indent(Indent) << "- Field " << I << ": "; + { + ColorScope SC(OS, true, {llvm::raw_ostream::BRIGHT_RED, true}); + OS << F.Decl->getName(); + } + OS << ". Offset " << (Offset + F.Offset) << "\n"; + ++I; + } +} diff --git a/clang/lib/AST/Interp/EvalEmitter.cpp b/clang/lib/AST/Interp/EvalEmitter.cpp index caffb69d83e37..d764b4b6f6d17 100644 --- a/clang/lib/AST/Interp/EvalEmitter.cpp +++ b/clang/lib/AST/Interp/EvalEmitter.cpp @@ -51,7 +51,8 @@ EvaluationResult EvalEmitter::interpretDecl(const VarDecl *VD, this->CheckFullyInitialized = CheckFullyInitialized; this->ConvertResultToRValue = VD->getAnyInitializer() && - (VD->getAnyInitializer()->getType()->isAnyComplexType()); + (VD->getAnyInitializer()->getType()->isAnyComplexType() || + VD->getAnyInitializer()->getType()->isVectorType()); EvalResult.setSource(VD); if (!this->visitDecl(VD) && EvalResult.empty()) diff --git a/clang/lib/AST/Interp/FunctionPointer.h b/clang/lib/AST/Interp/FunctionPointer.h index 2ff691b1cd3e1..e7fad8161fd9c 100644 --- a/clang/lib/AST/Interp/FunctionPointer.h +++ b/clang/lib/AST/Interp/FunctionPointer.h @@ -22,7 +22,11 @@ class FunctionPointer final { const Function *Func; public: - FunctionPointer() : Func(nullptr) {} + // FIXME: We might want to track the fact that the Function pointer + // has been created from an integer and is most likely garbage anyway. + FunctionPointer(int IntVal = 0, const Descriptor *Desc = nullptr) + : Func(reinterpret_cast(IntVal)) {} + FunctionPointer(const Function *Func) : Func(Func) { assert(Func); } const Function *getFunction() const { return Func; } @@ -53,6 +57,10 @@ class FunctionPointer final { return toAPValue().getAsString(Ctx, Func->getDecl()->getType()); } + uint64_t getIntegerRepresentation() const { + return static_cast(reinterpret_cast(Func)); + } + ComparisonCategoryResult compare(const FunctionPointer &RHS) const { if (Func == RHS.Func) return ComparisonCategoryResult::Equal; diff --git a/clang/lib/AST/Interp/Interp.cpp b/clang/lib/AST/Interp/Interp.cpp index 0ce64a572c263..e5e2c932f500b 100644 --- a/clang/lib/AST/Interp/Interp.cpp +++ b/clang/lib/AST/Interp/Interp.cpp @@ -282,6 +282,8 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { } static bool CheckConstant(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { + if (Ptr.isIntegralPointer()) + return true; return CheckConstant(S, OpPC, Ptr.getDeclDesc()); } @@ -335,6 +337,9 @@ bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { return true; } + if (!Ptr.isBlockPointer()) + return false; + const QualType Ty = Ptr.getType(); const SourceInfo &Loc = S.Current->getSource(OpPC); S.FFDiag(Loc, diag::note_constexpr_modify_const_type) << Ty; diff --git a/clang/lib/AST/Interp/Interp.h b/clang/lib/AST/Interp/Interp.h index 405993eb82703..c7012aa4ec680 100644 --- a/clang/lib/AST/Interp/Interp.h +++ b/clang/lib/AST/Interp/Interp.h @@ -801,6 +801,17 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { return true; } + for (const auto &P : {LHS, RHS}) { + if (P.isZero()) + continue; + if (P.isWeak()) { + const SourceInfo &Loc = S.Current->getSource(OpPC); + S.FFDiag(Loc, diag::note_constexpr_pointer_weak_comparison) + << P.toDiagnosticString(S.getCtx()); + return false; + } + } + if (!Pointer::hasSameBase(LHS, RHS)) { S.Stk.push(BoolT::from(Fn(ComparisonCategoryResult::Unordered))); return true; @@ -812,9 +823,9 @@ inline bool CmpHelperEQ(InterpState &S, CodePtr OpPC, CompareFn Fn) { // element in the same array are NOT equal. They have the same Base value, // but a different Offset. This is a pretty rare case, so we fix this here // by comparing pointers to the first elements. - if (!LHS.isDummy() && LHS.isArrayRoot()) + if (!LHS.isZero() && !LHS.isDummy() && LHS.isArrayRoot()) VL = LHS.atIndex(0).getByteOffset(); - if (!RHS.isDummy() && RHS.isArrayRoot()) + if (!RHS.isZero() && !RHS.isDummy() && RHS.isArrayRoot()) VR = RHS.atIndex(0).getByteOffset(); S.Stk.push(BoolT::from(Fn(Compare(VL, VR)))); @@ -1333,6 +1344,11 @@ inline bool FinishInit(InterpState &S, CodePtr OpPC) { return true; } +inline bool Dump(InterpState &S, CodePtr OpPC) { + S.Stk.dump(); + return true; +} + inline bool VirtBaseHelper(InterpState &S, CodePtr OpPC, const RecordDecl *Decl, const Pointer &Ptr) { Pointer Base = Ptr; @@ -1370,6 +1386,8 @@ bool Load(InterpState &S, CodePtr OpPC) { const Pointer &Ptr = S.Stk.peek(); if (!CheckLoad(S, OpPC, Ptr)) return false; + if (!Ptr.isBlockPointer()) + return false; S.Stk.push(Ptr.deref()); return true; } @@ -1379,6 +1397,8 @@ bool LoadPop(InterpState &S, CodePtr OpPC) { const Pointer &Ptr = S.Stk.pop(); if (!CheckLoad(S, OpPC, Ptr)) return false; + if (!Ptr.isBlockPointer()) + return false; S.Stk.push(Ptr.deref()); return true; } @@ -1517,8 +1537,12 @@ bool OffsetHelper(InterpState &S, CodePtr OpPC, const T &Offset, return true; } - if (!CheckNull(S, OpPC, Ptr, CSK_ArrayIndex)) - return false; + if (!CheckNull(S, OpPC, Ptr, CSK_ArrayIndex)) { + // The CheckNull will have emitted a note already, but we only + // abort in C++, since this is fine in C. + if (S.getLangOpts().CPlusPlus) + return false; + } // Arrays of unknown bounds cannot have pointers into them. if (!CheckArray(S, OpPC, Ptr)) @@ -1544,23 +1568,25 @@ bool OffsetHelper(InterpState &S, CodePtr OpPC, const T &Offset, Invalid = true; }; - T MaxOffset = T::from(MaxIndex - Index, Offset.bitWidth()); - if constexpr (Op == ArithOp::Add) { - // If the new offset would be negative, bail out. - if (Offset.isNegative() && (Offset.isMin() || -Offset > Index)) - DiagInvalidOffset(); - - // If the new offset would be out of bounds, bail out. - if (Offset.isPositive() && Offset > MaxOffset) - DiagInvalidOffset(); - } else { - // If the new offset would be negative, bail out. - if (Offset.isPositive() && Index < Offset) - DiagInvalidOffset(); - - // If the new offset would be out of bounds, bail out. - if (Offset.isNegative() && (Offset.isMin() || -Offset > MaxOffset)) - DiagInvalidOffset(); + if (Ptr.isBlockPointer()) { + T MaxOffset = T::from(MaxIndex - Index, Offset.bitWidth()); + if constexpr (Op == ArithOp::Add) { + // If the new offset would be negative, bail out. + if (Offset.isNegative() && (Offset.isMin() || -Offset > Index)) + DiagInvalidOffset(); + + // If the new offset would be out of bounds, bail out. + if (Offset.isPositive() && Offset > MaxOffset) + DiagInvalidOffset(); + } else { + // If the new offset would be negative, bail out. + if (Offset.isPositive() && Index < Offset) + DiagInvalidOffset(); + + // If the new offset would be out of bounds, bail out. + if (Offset.isNegative() && (Offset.isMin() || -Offset > MaxOffset)) + DiagInvalidOffset(); + } } if (Invalid && !Ptr.isDummy() && S.getLangOpts().CPlusPlus) @@ -1644,6 +1670,11 @@ inline bool SubPtr(InterpState &S, CodePtr OpPC) { const Pointer &LHS = S.Stk.pop(); const Pointer &RHS = S.Stk.pop(); + if (RHS.isZero()) { + S.Stk.push(T::from(LHS.getIndex())); + return true; + } + if (!Pointer::hasSameBase(LHS, RHS) && S.getLangOpts().CPlusPlus) { // TODO: Diagnose. return false; @@ -1822,8 +1853,9 @@ static inline bool ZeroIntAPS(InterpState &S, CodePtr OpPC, uint32_t BitWidth) { } template ::T> -inline bool Null(InterpState &S, CodePtr OpPC) { - S.Stk.push(); +inline bool Null(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { + // Note: Desc can be null. + S.Stk.push(0, Desc); return true; } @@ -1841,6 +1873,15 @@ inline bool This(InterpState &S, CodePtr OpPC) { if (!CheckThis(S, OpPC, This)) return false; + // Ensure the This pointer has been cast to the correct base. + if (!This.isDummy()) { + assert(isa(S.Current->getFunction()->getDecl())); + assert(This.getRecord()); + assert( + This.getRecord()->getDecl() == + cast(S.Current->getFunction()->getDecl())->getParent()); + } + S.Stk.push(This); return true; } @@ -2218,6 +2259,14 @@ inline bool GetFnPtr(InterpState &S, CodePtr OpPC, const Function *Func) { return true; } +template ::T> +inline bool GetIntPtr(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { + const T &IntVal = S.Stk.pop(); + + S.Stk.push(static_cast(IntVal), Desc); + return true; +} + /// Just emit a diagnostic. The expression that caused emission of this /// op is not valid in a constant context. inline bool Invalid(InterpState &S, CodePtr OpPC) { @@ -2274,6 +2323,18 @@ inline bool CheckNonNullArg(InterpState &S, CodePtr OpPC) { return false; } +/// OldPtr -> Integer -> NewPtr. +template +inline bool DecayPtr(InterpState &S, CodePtr OpPC) { + static_assert(isPtrType(TIn) && isPtrType(TOut)); + using FromT = typename PrimConv::T; + using ToT = typename PrimConv::T; + + const FromT &OldPtr = S.Stk.pop(); + S.Stk.push(ToT(OldPtr.getIntegerRepresentation(), nullptr)); + return true; +} + //===----------------------------------------------------------------------===// // Read opcode arguments //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/Interp/InterpBlock.cpp b/clang/lib/AST/Interp/InterpBlock.cpp index a62128d9cfaed..9b33d1b778fb2 100644 --- a/clang/lib/AST/Interp/InterpBlock.cpp +++ b/clang/lib/AST/Interp/InterpBlock.cpp @@ -73,7 +73,7 @@ void Block::replacePointer(Pointer *Old, Pointer *New) { removePointer(Old); addPointer(New); - Old->Pointee = nullptr; + Old->PointeeStorage.BS.Pointee = nullptr; #ifndef NDEBUG assert(!hasPointer(Old)); @@ -104,7 +104,7 @@ DeadBlock::DeadBlock(DeadBlock *&Root, Block *Blk) // Transfer pointers. B.Pointers = Blk->Pointers; for (Pointer *P = Blk->Pointers; P; P = P->Next) - P->Pointee = &B; + P->PointeeStorage.BS.Pointee = &B; } void DeadBlock::free() { diff --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp index 1bf5d55314f1f..984ba4f7f2689 100644 --- a/clang/lib/AST/Interp/InterpBuiltin.cpp +++ b/clang/lib/AST/Interp/InterpBuiltin.cpp @@ -16,6 +16,16 @@ namespace clang { namespace interp { +static unsigned callArgSize(const InterpState &S, const CallExpr *C) { + unsigned O = 0; + + for (const Expr *E : C->arguments()) { + O += align(primSize(*S.getContext().classify(E))); + } + + return O; +} + template static T getParam(const InterpFrame *Frame, unsigned Index) { assert(Frame->getFunction()->getNumParams() > Index); @@ -816,9 +826,10 @@ static bool interp__builtin_carryop(InterpState &S, CodePtr OpPC, static bool interp__builtin_clz(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const Function *Func, const CallExpr *Call) { + unsigned CallSize = callArgSize(S, Call); unsigned BuiltinOp = Func->getBuiltinID(); PrimType ValT = *S.getContext().classify(Call->getArg(0)); - const APSInt &Val = peekToAPSInt(S.Stk, ValT); + const APSInt &Val = peekToAPSInt(S.Stk, ValT, CallSize); // When the argument is 0, the result of GCC builtins is undefined, whereas // for Microsoft intrinsics, the result is the bit-width of the argument. @@ -826,8 +837,19 @@ static bool interp__builtin_clz(InterpState &S, CodePtr OpPC, BuiltinOp != Builtin::BI__lzcnt && BuiltinOp != Builtin::BI__lzcnt64; - if (ZeroIsUndefined && Val == 0) - return false; + if (Val == 0) { + if (Func->getBuiltinID() == Builtin::BI__builtin_clzg && + Call->getNumArgs() == 2) { + // We have a fallback parameter. + PrimType FallbackT = *S.getContext().classify(Call->getArg(1)); + const APSInt &Fallback = peekToAPSInt(S.Stk, FallbackT); + pushInteger(S, Fallback, Call->getType()); + return true; + } + + if (ZeroIsUndefined) + return false; + } pushInteger(S, Val.countl_zero(), Call->getType()); return true; @@ -836,11 +858,21 @@ static bool interp__builtin_clz(InterpState &S, CodePtr OpPC, static bool interp__builtin_ctz(InterpState &S, CodePtr OpPC, const InterpFrame *Frame, const Function *Func, const CallExpr *Call) { + unsigned CallSize = callArgSize(S, Call); PrimType ValT = *S.getContext().classify(Call->getArg(0)); - const APSInt &Val = peekToAPSInt(S.Stk, ValT); - - if (Val == 0) + const APSInt &Val = peekToAPSInt(S.Stk, ValT, CallSize); + + if (Val == 0) { + if (Func->getBuiltinID() == Builtin::BI__builtin_ctzg && + Call->getNumArgs() == 2) { + // We have a fallback parameter. + PrimType FallbackT = *S.getContext().classify(Call->getArg(1)); + const APSInt &Fallback = peekToAPSInt(S.Stk, FallbackT); + pushInteger(S, Fallback, Call->getType()); + return true; + } return false; + } pushInteger(S, Val.countr_zero(), Call->getType()); return true; @@ -1223,6 +1255,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: case Builtin::BI__builtin_clzs: + case Builtin::BI__builtin_clzg: case Builtin::BI__lzcnt16: // Microsoft variants of count leading-zeroes case Builtin::BI__lzcnt: case Builtin::BI__lzcnt64: @@ -1234,6 +1267,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: case Builtin::BI__builtin_ctzs: + case Builtin::BI__builtin_ctzg: if (!interp__builtin_ctz(S, OpPC, Frame, F, Call)) return false; break; diff --git a/clang/lib/AST/Interp/Opcodes.td b/clang/lib/AST/Interp/Opcodes.td index cc1310f4c0d52..e17be3afd2572 100644 --- a/clang/lib/AST/Interp/Opcodes.td +++ b/clang/lib/AST/Interp/Opcodes.td @@ -59,6 +59,7 @@ def ArgCastKind : ArgType { let Name = "CastKind"; } def ArgCallExpr : ArgType { let Name = "const CallExpr *"; } def ArgOffsetOfExpr : ArgType { let Name = "const OffsetOfExpr *"; } def ArgDeclRef : ArgType { let Name = "const DeclRefExpr *"; } +def ArgDesc : ArgType { let Name = "const Descriptor *"; } def ArgCCI : ArgType { let Name = "const ComparisonCategoryInfo *"; } //===----------------------------------------------------------------------===// @@ -272,6 +273,7 @@ def ZeroIntAPS : Opcode { // [] -> [Pointer] def Null : Opcode { let Types = [PtrTypeClass]; + let Args = [ArgDesc]; let HasGroup = 1; } @@ -530,6 +532,11 @@ def GetFnPtr : Opcode { let Args = [ArgFunction]; } +def GetIntPtr : Opcode { + let Types = [AluTypeClass]; + let Args = [ArgDesc]; + let HasGroup = 1; +} //===----------------------------------------------------------------------===// // Binary operators. @@ -662,6 +669,11 @@ def CastPointerIntegral : Opcode { let HasGroup = 1; } +def DecayPtr : Opcode { + let Types = [PtrTypeClass, PtrTypeClass]; + let HasGroup = 1; +} + //===----------------------------------------------------------------------===// // Comparison opcodes. //===----------------------------------------------------------------------===// @@ -723,3 +735,8 @@ def CheckNonNullArg : Opcode { } def Memcpy : Opcode; + +//===----------------------------------------------------------------------===// +// Debugging. +//===----------------------------------------------------------------------===// +def Dump : Opcode; diff --git a/clang/lib/AST/Interp/Pointer.cpp b/clang/lib/AST/Interp/Pointer.cpp index af60ced0e10e9..e163e658d462b 100644 --- a/clang/lib/AST/Interp/Pointer.cpp +++ b/clang/lib/AST/Interp/Pointer.cpp @@ -26,60 +26,95 @@ Pointer::Pointer(Block *Pointee) Pointer::Pointer(Block *Pointee, unsigned BaseAndOffset) : Pointer(Pointee, BaseAndOffset, BaseAndOffset) {} -Pointer::Pointer(const Pointer &P) : Pointer(P.Pointee, P.Base, P.Offset) {} +Pointer::Pointer(const Pointer &P) + : Offset(P.Offset), PointeeStorage(P.PointeeStorage), + StorageKind(P.StorageKind) { -Pointer::Pointer(Pointer &&P) - : Pointee(P.Pointee), Base(P.Base), Offset(P.Offset) { - if (Pointee) - Pointee->replacePointer(&P, this); + if (isBlockPointer() && PointeeStorage.BS.Pointee) + PointeeStorage.BS.Pointee->addPointer(this); } Pointer::Pointer(Block *Pointee, unsigned Base, unsigned Offset) - : Pointee(Pointee), Base(Base), Offset(Offset) { + : Offset(Offset), StorageKind(Storage::Block) { assert((Base == RootPtrMark || Base % alignof(void *) == 0) && "wrong base"); + + PointeeStorage.BS = {Pointee, Base}; + if (Pointee) Pointee->addPointer(this); } +Pointer::Pointer(Pointer &&P) + : Offset(P.Offset), PointeeStorage(P.PointeeStorage), + StorageKind(P.StorageKind) { + + if (StorageKind == Storage::Block && PointeeStorage.BS.Pointee) + PointeeStorage.BS.Pointee->replacePointer(&P, this); +} + Pointer::~Pointer() { - if (Pointee) { - Pointee->removePointer(this); - Pointee->cleanup(); + if (isIntegralPointer()) + return; + + if (PointeeStorage.BS.Pointee) { + PointeeStorage.BS.Pointee->removePointer(this); + PointeeStorage.BS.Pointee->cleanup(); } } void Pointer::operator=(const Pointer &P) { - Block *Old = Pointee; - if (Pointee) - Pointee->removePointer(this); + if (!this->isIntegralPointer() || !P.isBlockPointer()) + assert(P.StorageKind == StorageKind); - Offset = P.Offset; - Base = P.Base; + bool WasBlockPointer = isBlockPointer(); + StorageKind = P.StorageKind; + if (StorageKind == Storage::Block) { + Block *Old = PointeeStorage.BS.Pointee; + if (WasBlockPointer && PointeeStorage.BS.Pointee) + PointeeStorage.BS.Pointee->removePointer(this); - Pointee = P.Pointee; - if (Pointee) - Pointee->addPointer(this); + Offset = P.Offset; + PointeeStorage.BS = P.PointeeStorage.BS; + + if (PointeeStorage.BS.Pointee) + PointeeStorage.BS.Pointee->addPointer(this); + + if (WasBlockPointer && Old) + Old->cleanup(); - if (Old) - Old->cleanup(); + } else if (StorageKind == Storage::Int) { + PointeeStorage.Int = P.PointeeStorage.Int; + } else { + assert(false && "Unhandled storage kind"); + } } void Pointer::operator=(Pointer &&P) { - Block *Old = Pointee; + if (!this->isIntegralPointer() || !P.isBlockPointer()) + assert(P.StorageKind == StorageKind); - if (Pointee) - Pointee->removePointer(this); + bool WasBlockPointer = isBlockPointer(); + StorageKind = P.StorageKind; + if (StorageKind == Storage::Block) { + Block *Old = PointeeStorage.BS.Pointee; + if (WasBlockPointer && PointeeStorage.BS.Pointee) + PointeeStorage.BS.Pointee->removePointer(this); - Offset = P.Offset; - Base = P.Base; + Offset = P.Offset; + PointeeStorage.BS = P.PointeeStorage.BS; - Pointee = P.Pointee; - if (Pointee) - Pointee->replacePointer(&P, this); + if (PointeeStorage.BS.Pointee) + PointeeStorage.BS.Pointee->addPointer(this); - if (Old) - Old->cleanup(); + if (WasBlockPointer && Old) + Old->cleanup(); + + } else if (StorageKind == Storage::Int) { + PointeeStorage.Int = P.PointeeStorage.Int; + } else { + assert(false && "Unhandled storage kind"); + } } APValue Pointer::toAPValue() const { @@ -88,6 +123,11 @@ APValue Pointer::toAPValue() const { if (isZero()) return APValue(static_cast(nullptr), CharUnits::Zero(), Path, /*IsOnePastEnd=*/false, /*IsNullPtr=*/true); + if (isIntegralPointer()) + return APValue(static_cast(nullptr), + CharUnits::fromQuantity(asIntPointer().Value + this->Offset), + Path, + /*IsOnePastEnd=*/false, /*IsNullPtr=*/false); // Build the lvalue base from the block. const Descriptor *Desc = getDeclDesc(); @@ -137,19 +177,52 @@ APValue Pointer::toAPValue() const { return APValue(Base, Offset, Path, IsOnePastEnd, /*IsNullPtr=*/false); } +void Pointer::print(llvm::raw_ostream &OS) const { + OS << PointeeStorage.BS.Pointee << " ("; + if (isBlockPointer()) { + OS << "Block) {"; + + if (PointeeStorage.BS.Base == RootPtrMark) + OS << "rootptr, "; + else + OS << PointeeStorage.BS.Base << ", "; + + if (Offset == PastEndMark) + OS << "pastend, "; + else + OS << Offset << ", "; + + if (isBlockPointer() && PointeeStorage.BS.Pointee) + OS << PointeeStorage.BS.Pointee->getSize(); + else + OS << "nullptr"; + } else { + OS << "Int) {"; + OS << PointeeStorage.Int.Value << ", " << PointeeStorage.Int.Desc; + } + OS << "}"; +} + std::string Pointer::toDiagnosticString(const ASTContext &Ctx) const { - if (!Pointee) + if (isZero()) return "nullptr"; + if (isIntegralPointer()) + return (Twine("&(") + Twine(asIntPointer().Value + Offset) + ")").str(); + return toAPValue().getAsString(Ctx, getType()); } bool Pointer::isInitialized() const { - assert(Pointee && "Cannot check if null pointer was initialized"); + if (isIntegralPointer()) + return true; + + assert(PointeeStorage.BS.Pointee && + "Cannot check if null pointer was initialized"); const Descriptor *Desc = getFieldDesc(); assert(Desc); if (Desc->isPrimitiveArray()) { - if (isStatic() && Base == 0) + if (isStatic() && PointeeStorage.BS.Base == 0) return true; InitMapPtr &IM = getInitMap(); @@ -164,17 +237,24 @@ bool Pointer::isInitialized() const { } // Field has its bit in an inline descriptor. - return Base == 0 || getInlineDesc()->IsInitialized; + return PointeeStorage.BS.Base == 0 || getInlineDesc()->IsInitialized; } void Pointer::initialize() const { - assert(Pointee && "Cannot initialize null pointer"); + if (isIntegralPointer()) + return; + + assert(PointeeStorage.BS.Pointee && "Cannot initialize null pointer"); const Descriptor *Desc = getFieldDesc(); assert(Desc); if (Desc->isPrimitiveArray()) { // Primitive global arrays don't have an initmap. - if (isStatic() && Base == 0) + if (isStatic() && PointeeStorage.BS.Base == 0) + return; + + // Nothing to do for these. + if (Desc->getNumElems() == 0) return; InitMapPtr &IM = getInitMap(); @@ -196,13 +276,15 @@ void Pointer::initialize() const { } // Field has its bit in an inline descriptor. - assert(Base != 0 && "Only composite fields can be initialised"); + assert(PointeeStorage.BS.Base != 0 && + "Only composite fields can be initialised"); getInlineDesc()->IsInitialized = true; } void Pointer::activate() const { // Field has its bit in an inline descriptor. - assert(Base != 0 && "Only composite fields can be initialised"); + assert(PointeeStorage.BS.Base != 0 && + "Only composite fields can be initialised"); getInlineDesc()->IsActive = true; } @@ -211,11 +293,23 @@ void Pointer::deactivate() const { } bool Pointer::hasSameBase(const Pointer &A, const Pointer &B) { - return A.Pointee == B.Pointee; + // Two null pointers always have the same base. + if (A.isZero() && B.isZero()) + return true; + + if (A.isIntegralPointer() && B.isIntegralPointer()) + return true; + + if (A.isIntegralPointer() || B.isIntegralPointer()) + return A.getSource() == B.getSource(); + + return A.asBlockPointer().Pointee == B.asBlockPointer().Pointee; } bool Pointer::hasSameArray(const Pointer &A, const Pointer &B) { - return hasSameBase(A, B) && A.Base == B.Base && A.getFieldDesc()->IsArray; + return hasSameBase(A, B) && + A.PointeeStorage.BS.Base == B.PointeeStorage.BS.Base && + A.getFieldDesc()->IsArray; } std::optional Pointer::toRValue(const Context &Ctx) const { @@ -338,6 +432,25 @@ std::optional Pointer::toRValue(const Context &Ctx) const { return false; } + // Vector types. + if (const auto *VT = Ty->getAs()) { + assert(Ptr.getFieldDesc()->isPrimitiveArray()); + QualType ElemTy = VT->getElementType(); + PrimType ElemT = *Ctx.classify(ElemTy); + + SmallVector Values; + Values.reserve(VT->getNumElements()); + for (unsigned I = 0; I != VT->getNumElements(); ++I) { + TYPE_SWITCH(ElemT, { + Values.push_back(Ptr.atIndex(I).deref().toAPValue()); + }); + } + + assert(Values.size() == VT->getNumElements()); + R = APValue(Values.data(), Values.size()); + return true; + } + llvm_unreachable("invalid value to return"); }; diff --git a/clang/lib/AST/Interp/Pointer.h b/clang/lib/AST/Interp/Pointer.h index fffb4aba492fc..fcd00aac62f93 100644 --- a/clang/lib/AST/Interp/Pointer.h +++ b/clang/lib/AST/Interp/Pointer.h @@ -28,11 +28,26 @@ class Block; class DeadBlock; class Pointer; class Context; +template class Integral; enum PrimType : unsigned; class Pointer; inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Pointer &P); +struct BlockPointer { + /// The block the pointer is pointing to. + Block *Pointee; + /// Start of the current subfield. + unsigned Base; +}; + +struct IntPointer { + const Descriptor *Desc; + uint64_t Value; +}; + +enum class Storage { Block, Int }; + /// A pointer to a memory block, live or dead. /// /// This object can be allocated into interpreter stack frames. If pointing to @@ -68,11 +83,20 @@ class Pointer { static constexpr unsigned RootPtrMark = ~0u; public: - Pointer() {} + Pointer() { + StorageKind = Storage::Int; + PointeeStorage.Int.Value = 0; + PointeeStorage.Int.Desc = nullptr; + } Pointer(Block *B); Pointer(Block *B, unsigned BaseAndOffset); Pointer(const Pointer &P); Pointer(Pointer &&P); + Pointer(uint64_t Address, const Descriptor *Desc, unsigned Offset = 0) + : Offset(Offset), StorageKind(Storage::Int) { + PointeeStorage.Int.Value = Address; + PointeeStorage.Int.Desc = Desc; + } ~Pointer(); void operator=(const Pointer &P); @@ -80,21 +104,30 @@ class Pointer { /// Equality operators are just for tests. bool operator==(const Pointer &P) const { - return Pointee == P.Pointee && Base == P.Base && Offset == P.Offset; - } + if (P.StorageKind != StorageKind) + return false; + if (isIntegralPointer()) + return P.asIntPointer().Value == asIntPointer().Value && + Offset == P.Offset; - bool operator!=(const Pointer &P) const { - return Pointee != P.Pointee || Base != P.Base || Offset != P.Offset; + assert(isBlockPointer()); + return P.asBlockPointer().Pointee == asBlockPointer().Pointee && + P.asBlockPointer().Base == asBlockPointer().Base && + Offset == P.Offset; } + bool operator!=(const Pointer &P) const { return !(P == *this); } + /// Converts the pointer to an APValue. APValue toAPValue() const; /// Converts the pointer to a string usable in diagnostics. std::string toDiagnosticString(const ASTContext &Ctx) const; - unsigned getIntegerRepresentation() const { - return reinterpret_cast(Pointee) + Offset; + uint64_t getIntegerRepresentation() const { + if (isIntegralPointer()) + return asIntPointer().Value + (Offset * elemSize()); + return reinterpret_cast(asBlockPointer().Pointee) + Offset; } /// Converts the pointer to an APValue that is an rvalue. @@ -102,20 +135,27 @@ class Pointer { /// Offsets a pointer inside an array. [[nodiscard]] Pointer atIndex(unsigned Idx) const { - if (Base == RootPtrMark) - return Pointer(Pointee, RootPtrMark, getDeclDesc()->getSize()); + if (isIntegralPointer()) + return Pointer(asIntPointer().Value, asIntPointer().Desc, Idx); + + if (asBlockPointer().Base == RootPtrMark) + return Pointer(asBlockPointer().Pointee, RootPtrMark, + getDeclDesc()->getSize()); unsigned Off = Idx * elemSize(); if (getFieldDesc()->ElemDesc) Off += sizeof(InlineDescriptor); else Off += sizeof(InitMapPtr); - return Pointer(Pointee, Base, Base + Off); + return Pointer(asBlockPointer().Pointee, asBlockPointer().Base, + asBlockPointer().Base + Off); } /// Creates a pointer to a field. [[nodiscard]] Pointer atField(unsigned Off) const { unsigned Field = Offset + Off; - return Pointer(Pointee, Field, Field); + if (isIntegralPointer()) + return Pointer(asIntPointer().Value + Field, asIntPointer().Desc); + return Pointer(asBlockPointer().Pointee, Field, Field); } /// Subtract the given offset from the current Base and Offset @@ -123,44 +163,49 @@ class Pointer { [[nodiscard]] Pointer atFieldSub(unsigned Off) const { assert(Offset >= Off); unsigned O = Offset - Off; - return Pointer(Pointee, O, O); + return Pointer(asBlockPointer().Pointee, O, O); } /// Restricts the scope of an array element pointer. [[nodiscard]] Pointer narrow() const { + if (!isBlockPointer()) + return *this; + assert(isBlockPointer()); // Null pointers cannot be narrowed. if (isZero() || isUnknownSizeArray()) return *this; // Pointer to an array of base types - enter block. - if (Base == RootPtrMark) - return Pointer(Pointee, sizeof(InlineDescriptor), + if (asBlockPointer().Base == RootPtrMark) + return Pointer(asBlockPointer().Pointee, sizeof(InlineDescriptor), Offset == 0 ? Offset : PastEndMark); // Pointer is one past end - magic offset marks that. if (isOnePastEnd()) - return Pointer(Pointee, Base, PastEndMark); + return Pointer(asBlockPointer().Pointee, asBlockPointer().Base, + PastEndMark); // Primitive arrays are a bit special since they do not have inline // descriptors. If Offset != Base, then the pointer already points to // an element and there is nothing to do. Otherwise, the pointer is // adjusted to the first element of the array. if (inPrimitiveArray()) { - if (Offset != Base) + if (Offset != asBlockPointer().Base) return *this; - return Pointer(Pointee, Base, Offset + sizeof(InitMapPtr)); + return Pointer(asBlockPointer().Pointee, asBlockPointer().Base, + Offset + sizeof(InitMapPtr)); } // Pointer is to a field or array element - enter it. - if (Offset != Base) - return Pointer(Pointee, Offset, Offset); + if (Offset != asBlockPointer().Base) + return Pointer(asBlockPointer().Pointee, Offset, Offset); // Enter the first element of an array. if (!getFieldDesc()->isArray()) return *this; - const unsigned NewBase = Base + sizeof(InlineDescriptor); - return Pointer(Pointee, NewBase, NewBase); + const unsigned NewBase = asBlockPointer().Base + sizeof(InlineDescriptor); + return Pointer(asBlockPointer().Pointee, NewBase, NewBase); } /// Expands a pointer to the containing array, undoing narrowing. @@ -172,72 +217,109 @@ class Pointer { Adjust = sizeof(InitMapPtr); else Adjust = sizeof(InlineDescriptor); - return Pointer(Pointee, Base, Base + getSize() + Adjust); + return Pointer(asBlockPointer().Pointee, asBlockPointer().Base, + asBlockPointer().Base + getSize() + Adjust); } // Do not step out of array elements. - if (Base != Offset) + if (asBlockPointer().Base != Offset) return *this; // If at base, point to an array of base types. - if (Base == 0 || Base == sizeof(InlineDescriptor)) - return Pointer(Pointee, RootPtrMark, 0); + if (asBlockPointer().Base == 0 || + asBlockPointer().Base == sizeof(InlineDescriptor)) + return Pointer(asBlockPointer().Pointee, RootPtrMark, 0); // Step into the containing array, if inside one. - unsigned Next = Base - getInlineDesc()->Offset; + unsigned Next = asBlockPointer().Base - getInlineDesc()->Offset; const Descriptor *Desc = Next == 0 ? getDeclDesc() : getDescriptor(Next)->Desc; if (!Desc->IsArray) return *this; - return Pointer(Pointee, Next, Offset); + return Pointer(asBlockPointer().Pointee, Next, Offset); } /// Checks if the pointer is null. - bool isZero() const { return Pointee == nullptr; } + bool isZero() const { + if (Offset != 0) + return false; + + if (isBlockPointer()) + return asBlockPointer().Pointee == nullptr; + assert(isIntegralPointer()); + return asIntPointer().Value == 0; + } /// Checks if the pointer is live. - bool isLive() const { return Pointee && !Pointee->IsDead; } + bool isLive() const { + if (isIntegralPointer()) + return true; + return asBlockPointer().Pointee && !asBlockPointer().Pointee->IsDead; + } /// Checks if the item is a field in an object. bool isField() const { + if (isIntegralPointer()) + return false; + + unsigned Base = asBlockPointer().Base; return Base != 0 && Base != sizeof(InlineDescriptor) && Base != RootPtrMark && getFieldDesc()->asDecl(); } /// Accessor for information about the declaration site. const Descriptor *getDeclDesc() const { - assert(Pointee); - return Pointee->Desc; + if (isIntegralPointer()) + return asIntPointer().Desc; + + assert(isBlockPointer()); + assert(asBlockPointer().Pointee); + return asBlockPointer().Pointee->Desc; } SourceLocation getDeclLoc() const { return getDeclDesc()->getLocation(); } + /// Returns the expression or declaration the pointer has been created for. + DeclTy getSource() const { + if (isBlockPointer()) + return getDeclDesc()->getSource(); + + assert(isIntegralPointer()); + return asIntPointer().Desc ? asIntPointer().Desc->getSource() : DeclTy(); + } + /// Returns a pointer to the object of which this pointer is a field. [[nodiscard]] Pointer getBase() const { - if (Base == RootPtrMark) { + if (asBlockPointer().Base == RootPtrMark) { assert(Offset == PastEndMark && "cannot get base of a block"); - return Pointer(Pointee, Base, 0); + return Pointer(asBlockPointer().Pointee, asBlockPointer().Base, 0); } - unsigned NewBase = Base - getInlineDesc()->Offset; - return Pointer(Pointee, NewBase, NewBase); + unsigned NewBase = asBlockPointer().Base - getInlineDesc()->Offset; + return Pointer(asBlockPointer().Pointee, NewBase, NewBase); } /// Returns the parent array. [[nodiscard]] Pointer getArray() const { - if (Base == RootPtrMark) { + if (asBlockPointer().Base == RootPtrMark) { assert(Offset != 0 && Offset != PastEndMark && "not an array element"); - return Pointer(Pointee, Base, 0); + return Pointer(asBlockPointer().Pointee, asBlockPointer().Base, 0); } - assert(Offset != Base && "not an array element"); - return Pointer(Pointee, Base, Base); + assert(Offset != asBlockPointer().Base && "not an array element"); + return Pointer(asBlockPointer().Pointee, asBlockPointer().Base, + asBlockPointer().Base); } /// Accessors for information about the innermost field. const Descriptor *getFieldDesc() const { - if (Base == 0 || Base == sizeof(InlineDescriptor) || Base == RootPtrMark) + if (isIntegralPointer()) + return asIntPointer().Desc; + if (isBlockPointer() && + (asBlockPointer().Base == 0 || + asBlockPointer().Base == sizeof(InlineDescriptor) || + asBlockPointer().Base == RootPtrMark)) return getDeclDesc(); return getInlineDesc()->Desc; } /// Returns the type of the innermost field. QualType getType() const { - if (inPrimitiveArray() && Offset != Base) { + if (inPrimitiveArray() && Offset != asBlockPointer().Base) { // Unfortunately, complex types are not array types in clang, but they are // for us. if (const auto *AT = getFieldDesc()->getType()->getAsArrayTypeUnsafe()) @@ -248,58 +330,104 @@ class Pointer { return getFieldDesc()->getType(); } - [[nodiscard]] Pointer getDeclPtr() const { return Pointer(Pointee); } + [[nodiscard]] Pointer getDeclPtr() const { + return Pointer(asBlockPointer().Pointee); + } /// Returns the element size of the innermost field. size_t elemSize() const { - if (Base == RootPtrMark) + if (isIntegralPointer()) { + if (!asIntPointer().Desc) + return 1; + return asIntPointer().Desc->getElemSize(); + } + + if (asBlockPointer().Base == RootPtrMark) return getDeclDesc()->getSize(); return getFieldDesc()->getElemSize(); } /// Returns the total size of the innermost field. - size_t getSize() const { return getFieldDesc()->getSize(); } + size_t getSize() const { + assert(isBlockPointer()); + return getFieldDesc()->getSize(); + } /// Returns the offset into an array. unsigned getOffset() const { assert(Offset != PastEndMark && "invalid offset"); - if (Base == RootPtrMark) + if (asBlockPointer().Base == RootPtrMark) return Offset; unsigned Adjust = 0; - if (Offset != Base) { + if (Offset != asBlockPointer().Base) { if (getFieldDesc()->ElemDesc) Adjust = sizeof(InlineDescriptor); else Adjust = sizeof(InitMapPtr); } - return Offset - Base - Adjust; + return Offset - asBlockPointer().Base - Adjust; } /// Whether this array refers to an array, but not /// to the first element. - bool isArrayRoot() const { return inArray() && Offset == Base; } + bool isArrayRoot() const { + return inArray() && Offset == asBlockPointer().Base; + } /// Checks if the innermost field is an array. - bool inArray() const { return getFieldDesc()->IsArray; } + bool inArray() const { + if (isBlockPointer()) + return getFieldDesc()->IsArray; + return false; + } /// Checks if the structure is a primitive array. - bool inPrimitiveArray() const { return getFieldDesc()->isPrimitiveArray(); } + bool inPrimitiveArray() const { + if (isBlockPointer()) + return getFieldDesc()->isPrimitiveArray(); + return false; + } /// Checks if the structure is an array of unknown size. bool isUnknownSizeArray() const { + if (!isBlockPointer()) + return false; // If this points inside a dummy block, return true. // FIXME: This might change in the future. If it does, we need // to set the proper Ctor/Dtor functions for dummy Descriptors. - if (Base != 0 && Base != sizeof(InlineDescriptor) && isDummy()) + if (asBlockPointer().Base != 0 && + asBlockPointer().Base != sizeof(InlineDescriptor) && isDummy()) return true; return getFieldDesc()->isUnknownSizeArray(); } /// Checks if the pointer points to an array. - bool isArrayElement() const { return inArray() && Base != Offset; } + bool isArrayElement() const { + if (isBlockPointer()) + return inArray() && asBlockPointer().Base != Offset; + return false; + } /// Pointer points directly to a block. bool isRoot() const { - return (Base == 0 || Base == RootPtrMark) && Offset == 0; + return (asBlockPointer().Base == 0 || + asBlockPointer().Base == RootPtrMark) && + Offset == 0; } /// If this pointer has an InlineDescriptor we can use to initialize. - bool canBeInitialized() const { return Pointee && Base > 0; } + bool canBeInitialized() const { + if (!isBlockPointer()) + return false; + + return asBlockPointer().Pointee && asBlockPointer().Base > 0; + } + + [[nodiscard]] const BlockPointer &asBlockPointer() const { + assert(isBlockPointer()); + return PointeeStorage.BS; + } + [[nodiscard]] const IntPointer &asIntPointer() const { + assert(isIntegralPointer()); + return PointeeStorage.Int; + } + bool isBlockPointer() const { return StorageKind == Storage::Block; } + bool isIntegralPointer() const { return StorageKind == Storage::Int; } /// Returns the record descriptor of a class. const Record *getRecord() const { return getFieldDesc()->ElemRecord; } @@ -315,71 +443,119 @@ class Pointer { bool isUnion() const; /// Checks if the storage is extern. - bool isExtern() const { return Pointee && Pointee->isExtern(); } + bool isExtern() const { + if (isBlockPointer()) + return asBlockPointer().Pointee && asBlockPointer().Pointee->isExtern(); + return false; + } /// Checks if the storage is static. bool isStatic() const { - assert(Pointee); - return Pointee->isStatic(); + if (isIntegralPointer()) + return true; + assert(asBlockPointer().Pointee); + return asBlockPointer().Pointee->isStatic(); } /// Checks if the storage is temporary. bool isTemporary() const { - assert(Pointee); - return Pointee->isTemporary(); + if (isBlockPointer()) { + assert(asBlockPointer().Pointee); + return asBlockPointer().Pointee->isTemporary(); + } + return false; } /// Checks if the storage is a static temporary. bool isStaticTemporary() const { return isStatic() && isTemporary(); } /// Checks if the field is mutable. bool isMutable() const { - return Base != 0 && Base != sizeof(InlineDescriptor) && + if (!isBlockPointer()) + return false; + return asBlockPointer().Base != 0 && + asBlockPointer().Base != sizeof(InlineDescriptor) && getInlineDesc()->IsFieldMutable; } + + bool isWeak() const { + if (isIntegralPointer()) + return false; + + assert(isBlockPointer()); + if (const ValueDecl *VD = getDeclDesc()->asValueDecl()) + return VD->isWeak(); + return false; + } /// Checks if an object was initialized. bool isInitialized() const; /// Checks if the object is active. bool isActive() const { - return Base == 0 || Base == sizeof(InlineDescriptor) || + if (!isBlockPointer()) + return true; + return asBlockPointer().Base == 0 || + asBlockPointer().Base == sizeof(InlineDescriptor) || getInlineDesc()->IsActive; } /// Checks if a structure is a base class. bool isBaseClass() const { return isField() && getInlineDesc()->IsBase; } /// Checks if the pointer points to a dummy value. bool isDummy() const { - if (!Pointee) + if (!isBlockPointer()) return false; + + if (!asBlockPointer().Pointee) + return false; + return getDeclDesc()->isDummy(); } /// Checks if an object or a subfield is mutable. bool isConst() const { - return (Base == 0 || Base == sizeof(InlineDescriptor)) + if (isIntegralPointer()) + return true; + return (asBlockPointer().Base == 0 || + asBlockPointer().Base == sizeof(InlineDescriptor)) ? getDeclDesc()->IsConst : getInlineDesc()->IsConst; } /// Returns the declaration ID. std::optional getDeclID() const { - assert(Pointee); - return Pointee->getDeclID(); + if (isBlockPointer()) { + assert(asBlockPointer().Pointee); + return asBlockPointer().Pointee->getDeclID(); + } + return std::nullopt; } /// Returns the byte offset from the start. unsigned getByteOffset() const { + if (isIntegralPointer()) + return asIntPointer().Value + Offset; return Offset; } /// Returns the number of elements. - unsigned getNumElems() const { return getSize() / elemSize(); } + unsigned getNumElems() const { + if (isIntegralPointer()) + return ~unsigned(0); + return getSize() / elemSize(); + } - const Block *block() const { return Pointee; } + const Block *block() const { return asBlockPointer().Pointee; } /// Returns the index into an array. int64_t getIndex() const { + if (!isBlockPointer()) + return 0; + + if (isZero()) + return 0; + if (isElementPastEnd()) return 1; // narrow()ed element in a composite array. - if (Base > sizeof(InlineDescriptor) && Base == Offset) + if (asBlockPointer().Base > sizeof(InlineDescriptor) && + asBlockPointer().Base == Offset) return 0; if (auto ElemSize = elemSize()) @@ -389,7 +565,10 @@ class Pointer { /// Checks if the index is one past end. bool isOnePastEnd() const { - if (!Pointee) + if (isIntegralPointer()) + return false; + + if (!asBlockPointer().Pointee) return false; return isElementPastEnd() || getSize() == getOffset(); } @@ -400,20 +579,25 @@ class Pointer { /// Dereferences the pointer, if it's live. template T &deref() const { assert(isLive() && "Invalid pointer"); - assert(Pointee); + assert(isBlockPointer()); + assert(asBlockPointer().Pointee); + assert(Offset + sizeof(T) <= + asBlockPointer().Pointee->getDescriptor()->getAllocSize()); + if (isArrayRoot()) - return *reinterpret_cast(Pointee->rawData() + Base + - sizeof(InitMapPtr)); + return *reinterpret_cast(asBlockPointer().Pointee->rawData() + + asBlockPointer().Base + sizeof(InitMapPtr)); - assert(Offset + sizeof(T) <= Pointee->getDescriptor()->getAllocSize()); - return *reinterpret_cast(Pointee->rawData() + Offset); + return *reinterpret_cast(asBlockPointer().Pointee->rawData() + Offset); } /// Dereferences a primitive element. template T &elem(unsigned I) const { assert(I < getNumElems()); - assert(Pointee); - return reinterpret_cast(Pointee->data() + sizeof(InitMapPtr))[I]; + assert(isBlockPointer()); + assert(asBlockPointer().Pointee); + return reinterpret_cast(asBlockPointer().Pointee->data() + + sizeof(InitMapPtr))[I]; } /// Initializes a field. @@ -442,24 +626,7 @@ class Pointer { static bool hasSameArray(const Pointer &A, const Pointer &B); /// Prints the pointer. - void print(llvm::raw_ostream &OS) const { - OS << Pointee << " {"; - if (Base == RootPtrMark) - OS << "rootptr, "; - else - OS << Base << ", "; - - if (Offset == PastEndMark) - OS << "pastend, "; - else - OS << Offset << ", "; - - if (Pointee) - OS << Pointee->getSize(); - else - OS << "nullptr"; - OS << "}"; - } + void print(llvm::raw_ostream &OS) const; private: friend class Block; @@ -469,33 +636,41 @@ class Pointer { Pointer(Block *Pointee, unsigned Base, unsigned Offset); /// Returns the embedded descriptor preceding a field. - InlineDescriptor *getInlineDesc() const { return getDescriptor(Base); } + InlineDescriptor *getInlineDesc() const { + return getDescriptor(asBlockPointer().Base); + } /// Returns a descriptor at a given offset. InlineDescriptor *getDescriptor(unsigned Offset) const { assert(Offset != 0 && "Not a nested pointer"); - assert(Pointee); - return reinterpret_cast(Pointee->rawData() + Offset) - + assert(isBlockPointer()); + assert(!isZero()); + return reinterpret_cast( + asBlockPointer().Pointee->rawData() + Offset) - 1; } /// Returns a reference to the InitMapPtr which stores the initialization map. InitMapPtr &getInitMap() const { - assert(Pointee); - return *reinterpret_cast(Pointee->rawData() + Base); + assert(isBlockPointer()); + assert(!isZero()); + return *reinterpret_cast(asBlockPointer().Pointee->rawData() + + asBlockPointer().Base); } - /// The block the pointer is pointing to. - Block *Pointee = nullptr; - /// Start of the current subfield. - unsigned Base = 0; - /// Offset into the block. + /// Offset into the storage. unsigned Offset = 0; /// Previous link in the pointer chain. Pointer *Prev = nullptr; /// Next link in the pointer chain. Pointer *Next = nullptr; + + union { + BlockPointer BS; + IntPointer Int; + } PointeeStorage; + Storage StorageKind = Storage::Int; }; inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Pointer &P) { diff --git a/clang/lib/AST/Interp/PrimType.h b/clang/lib/AST/Interp/PrimType.h index 2bc83b334643e..05a094d0c5b1f 100644 --- a/clang/lib/AST/Interp/PrimType.h +++ b/clang/lib/AST/Interp/PrimType.h @@ -46,6 +46,10 @@ enum PrimType : unsigned { PT_FnPtr, }; +inline constexpr bool isPtrType(PrimType T) { + return T == PT_Ptr || T == PT_FnPtr; +} + enum class CastKind : uint8_t { Reinterpret, Atomic, diff --git a/clang/lib/AST/Interp/Program.cpp b/clang/lib/AST/Interp/Program.cpp index 25e938e015032..82367164743fc 100644 --- a/clang/lib/AST/Interp/Program.cpp +++ b/clang/lib/AST/Interp/Program.cpp @@ -411,5 +411,12 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty, IsMutable); } + // Same with vector types. + if (const auto *VT = Ty->getAs()) { + PrimType ElemTy = *Ctx.classify(VT->getElementType()); + return allocateDescriptor(D, ElemTy, MDSize, VT->getNumElements(), IsConst, + IsTemporary, IsMutable); + } + return nullptr; } diff --git a/clang/lib/AST/Interp/Record.cpp b/clang/lib/AST/Interp/Record.cpp index 909416e6e1a1a..6a0a28bc9124b 100644 --- a/clang/lib/AST/Interp/Record.cpp +++ b/clang/lib/AST/Interp/Record.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "Record.h" +#include "clang/AST/ASTContext.h" using namespace clang; using namespace clang::interp; @@ -27,6 +28,14 @@ Record::Record(const RecordDecl *Decl, BaseList &&SrcBases, VirtualBaseMap[V.Decl] = &V; } +const std::string Record::getName() const { + std::string Ret; + llvm::raw_string_ostream OS(Ret); + Decl->getNameForDiagnostic(OS, Decl->getASTContext().getPrintingPolicy(), + /*Qualified=*/true); + return Ret; +} + const Record::Field *Record::getField(const FieldDecl *FD) const { auto It = FieldMap.find(FD); assert(It != FieldMap.end() && "Missing field"); diff --git a/clang/lib/AST/Interp/Record.h b/clang/lib/AST/Interp/Record.h index a6bde01062531..cf0480b3f62fa 100644 --- a/clang/lib/AST/Interp/Record.h +++ b/clang/lib/AST/Interp/Record.h @@ -51,7 +51,7 @@ class Record final { /// Returns the underlying declaration. const RecordDecl *getDecl() const { return Decl; } /// Returns the name of the underlying declaration. - const std::string getName() const { return Decl->getNameAsString(); } + const std::string getName() const; /// Checks if the record is a union. bool isUnion() const { return getDecl()->isUnion(); } /// Returns the size of the record. @@ -100,6 +100,10 @@ class Record final { unsigned getNumVirtualBases() const { return VirtualBases.size(); } const Base *getVirtualBase(unsigned I) const { return &VirtualBases[I]; } + void dump(llvm::raw_ostream &OS, unsigned Indentation = 0, + unsigned Offset = 0) const; + void dump() const { dump(llvm::errs()); } + private: /// Constructor used by Program to create record descriptors. Record(const RecordDecl *, BaseList &&Bases, FieldList &&Fields, diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index 32c7cde200c69..c4f014d9c7517 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -187,6 +187,8 @@ void JSONNodeDumper::Visit(const CXXCtorInitializer *Init) { llvm_unreachable("Unknown initializer type"); } +void JSONNodeDumper::Visit(const OpenACCClause *C) {} + void JSONNodeDumper::Visit(const OMPClause *C) {} void JSONNodeDumper::Visit(const BlockDecl::Capture &C) { diff --git a/clang/lib/AST/OpenACCClause.cpp b/clang/lib/AST/OpenACCClause.cpp new file mode 100644 index 0000000000000..e1db872f25c32 --- /dev/null +++ b/clang/lib/AST/OpenACCClause.cpp @@ -0,0 +1,17 @@ +//===---- OpenACCClause.cpp - Classes for OpenACC Clauses ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the subclasses of the OpenACCClause class declared in +// OpenACCClause.h +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/OpenACCClause.h" +#include "clang/AST/ASTContext.h" + +using namespace clang; diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp index e6191bc6db708..a381a8dd7b62c 100644 --- a/clang/lib/AST/StmtOpenACC.cpp +++ b/clang/lib/AST/StmtOpenACC.cpp @@ -15,20 +15,23 @@ using namespace clang; OpenACCComputeConstruct * -OpenACCComputeConstruct::CreateEmpty(const ASTContext &C, EmptyShell) { - void *Mem = C.Allocate(sizeof(OpenACCComputeConstruct), - alignof(OpenACCComputeConstruct)); - auto *Inst = new (Mem) OpenACCComputeConstruct; +OpenACCComputeConstruct::CreateEmpty(const ASTContext &C, unsigned NumClauses) { + void *Mem = C.Allocate( + OpenACCComputeConstruct::totalSizeToAlloc( + NumClauses)); + auto *Inst = new (Mem) OpenACCComputeConstruct(NumClauses); return Inst; } OpenACCComputeConstruct * OpenACCComputeConstruct::Create(const ASTContext &C, OpenACCDirectiveKind K, SourceLocation BeginLoc, SourceLocation EndLoc, + ArrayRef Clauses, Stmt *StructuredBlock) { - void *Mem = C.Allocate(sizeof(OpenACCComputeConstruct), - alignof(OpenACCComputeConstruct)); - auto *Inst = - new (Mem) OpenACCComputeConstruct(K, BeginLoc, EndLoc, StructuredBlock); + void *Mem = C.Allocate( + OpenACCComputeConstruct::totalSizeToAlloc( + Clauses.size())); + auto *Inst = new (Mem) + OpenACCComputeConstruct(K, BeginLoc, EndLoc, Clauses, StructuredBlock); return Inst; } diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 08db1878d2845..7bd7f1fb1f4ff 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -292,8 +292,11 @@ void StmtPrinter::VisitLabelStmt(LabelStmt *Node) { } void StmtPrinter::VisitAttributedStmt(AttributedStmt *Node) { - for (const auto *Attr : Node->getAttrs()) { + llvm::ArrayRef Attrs = Node->getAttrs(); + for (const auto *Attr : Attrs) { Attr->printPretty(OS, Policy); + if (Attr != Attrs.back()) + OS << ' '; } PrintStmt(Node->getSubStmt(), 0); @@ -1142,7 +1145,13 @@ void StmtPrinter::VisitOMPTargetParallelGenericLoopDirective( //===----------------------------------------------------------------------===// void StmtPrinter::VisitOpenACCComputeConstruct(OpenACCComputeConstruct *S) { Indent() << "#pragma acc " << S->getDirectiveKind(); - // TODO OpenACC: Print Clauses. + + if (!S->clauses().empty()) { + OS << ' '; + OpenACCClausePrinter Printer(OS); + Printer.VisitClauseList(S->clauses()); + } + PrintStmt(S->getStructuredBlock()); } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 1c08616997c44..557626f28e94e 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2038,6 +2038,7 @@ void StmtProfiler::VisitMSPropertySubscriptExpr( void StmtProfiler::VisitCXXThisExpr(const CXXThisExpr *S) { VisitExpr(S); ID.AddBoolean(S->isImplicit()); + ID.AddBoolean(S->isCapturedByCopyInLambdaWithExplicitObjectParameter()); } void StmtProfiler::VisitCXXThrowExpr(const CXXThrowExpr *S) { @@ -2468,11 +2469,30 @@ void StmtProfiler::VisitTemplateArgument(const TemplateArgument &Arg) { } } +namespace { +class OpenACCClauseProfiler + : public OpenACCClauseVisitor { + +public: + OpenACCClauseProfiler() = default; + + void VisitOpenACCClauseList(ArrayRef Clauses) { + for (const OpenACCClause *Clause : Clauses) { + // TODO OpenACC: When we have clauses with expressions, we should + // profile them too. + Visit(Clause); + } + } +}; +} // namespace + void StmtProfiler::VisitOpenACCComputeConstruct( const OpenACCComputeConstruct *S) { // VisitStmt handles children, so the AssociatedStmt is handled. VisitStmt(S); - // TODO OpenACC: Visit Clauses. + + OpenACCClauseProfiler P; + P.VisitOpenACCClauseList(S->clauses()); } void Stmt::Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context, diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index d09cb6d41af2e..2a1767db66f50 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -381,6 +381,20 @@ void TextNodeDumper::Visit(const OMPClause *C) { OS << " "; } +void TextNodeDumper::Visit(const OpenACCClause *C) { + if (!C) { + ColorScope Color(OS, ShowColors, NullColor); + OS << "<<>> OpenACCClause"; + return; + } + { + ColorScope Color(OS, ShowColors, AttrColor); + OS << C->getClauseKind(); + } + dumpPointer(C); + dumpSourceRange(SourceRange(C->getBeginLoc(), C->getEndLoc())); +} + void TextNodeDumper::Visit(const GenericSelectionExpr::ConstAssociation &A) { const TypeSourceInfo *TSI = A.getTypeSourceInfo(); if (TSI) { @@ -1180,8 +1194,11 @@ void TextNodeDumper::VisitDeclRefExpr(const DeclRefExpr *Node) { case NOUR_Constant: OS << " non_odr_use_constant"; break; case NOUR_Discarded: OS << " non_odr_use_discarded"; break; } - if (Node->refersToEnclosingVariableOrCapture()) + if (Node->isCapturedByCopyInLambdaWithExplicitObjectParameter()) + OS << " dependent_capture"; + else if (Node->refersToEnclosingVariableOrCapture()) OS << " refers_to_enclosing_variable_or_capture"; + if (Node->isImmediateEscalating()) OS << " immediate-escalating"; } @@ -1340,6 +1357,8 @@ void TextNodeDumper::VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *Node) { void TextNodeDumper::VisitCXXThisExpr(const CXXThisExpr *Node) { if (Node->isImplicit()) OS << " implicit"; + if (Node->isCapturedByCopyInLambdaWithExplicitObjectParameter()) + OS << " dependent_capture"; OS << " this"; } @@ -2687,5 +2706,4 @@ void TextNodeDumper::VisitHLSLBufferDecl(const HLSLBufferDecl *D) { void TextNodeDumper::VisitOpenACCConstructStmt(const OpenACCConstructStmt *S) { OS << " " << S->getDirectiveKind(); - // TODO OpenACC: Dump clauses as well. } diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 70ac0764476f6..1bfa7ebcfd50c 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -850,6 +850,7 @@ void Environment::setValue(const Expr &E, Value &Val) { if (auto *RecordVal = dyn_cast(&Val)) { assert(isOriginalRecordConstructor(CanonE) || &RecordVal->getLoc() == &getResultObjectLocation(CanonE)); + (void)RecordVal; } assert(CanonE.isPRValue()); diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 1569b5e04b770..c8d243a8fb7ae 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1543,10 +1543,13 @@ WindowsARM64TargetInfo::getBuiltinVaListKind() const { TargetInfo::CallingConvCheckResult WindowsARM64TargetInfo::checkCallingConvention(CallingConv CC) const { switch (CC) { + case CC_X86VectorCall: + if (getTriple().isWindowsArm64EC()) + return CCCR_OK; + return CCCR_Ignore; case CC_X86StdCall: case CC_X86ThisCall: case CC_X86FastCall: - case CC_X86VectorCall: return CCCR_Ignore; case CC_C: case CC_OpenCLKernel: diff --git a/clang/lib/Basic/Targets/Mips.h b/clang/lib/Basic/Targets/Mips.h index c9dcf434c93b0..0d6e4b4d08089 100644 --- a/clang/lib/Basic/Targets/Mips.h +++ b/clang/lib/Basic/Targets/Mips.h @@ -318,6 +318,7 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo { FPMode = isFP64Default() ? FP64 : FPXX; NoOddSpreg = false; bool OddSpregGiven = false; + bool StrictAlign = false; for (const auto &Feature : Features) { if (Feature == "+single-float") @@ -330,6 +331,10 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo { IsMicromips = true; else if (Feature == "+mips32r6" || Feature == "+mips64r6") HasUnalignedAccess = true; + // We cannot be sure that the order of strict-align vs mips32r6. + // Thus we need an extra variable here. + else if (Feature == "+strict-align") + StrictAlign = true; else if (Feature == "+dsp") DspRev = std::max(DspRev, DSP1); else if (Feature == "+dspr2") @@ -368,6 +373,9 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo { if (FPMode == FPXX && !OddSpregGiven) NoOddSpreg = true; + if (StrictAlign) + HasUnalignedAccess = false; + setDataLayout(); return true; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 65d7256806587..8dfb24d06e95c 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -83,11 +83,11 @@ #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/KCFI.h" +#include "llvm/Transforms/Instrumentation/LowerAllowCheckPass.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" -#include "llvm/Transforms/Instrumentation/RemoveTrapsPass.h" #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" @@ -96,7 +96,6 @@ #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Scalar/JumpThreading.h" -#include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -112,9 +111,6 @@ using namespace llvm; namespace llvm { extern cl::opt PrintPipelinePasses; -static cl::opt ClRemoveTraps("clang-remove-traps", cl::Optional, - cl::desc("Insert remove-traps pass.")); - // Experiment to move sanitizers earlier. static cl::opt ClSanitizeOnOptimizerEarlyEP( "sanitizer-early-opt-ep", cl::Optional, @@ -787,18 +783,13 @@ static void addSanitizers(const Triple &TargetTriple, PB.registerOptimizerLastEPCallback(SanitizersCallback); } - if (ClRemoveTraps) { + if (LowerAllowCheckPass::IsRequested()) { // We can optimize after inliner, and PGO profile matching. The hook below // is called at the end `buildFunctionSimplificationPipeline`, which called // from `buildInlinerPipeline`, which called after profile matching. PB.registerScalarOptimizerLateEPCallback( [](FunctionPassManager &FPM, OptimizationLevel Level) { - // RemoveTrapsPass expects trap blocks preceded by conditional - // branches, which usually is not the case without SimplifyCFG. - // TODO: Remove `SimplifyCFGPass` after switching to dedicated - // intrinsic. - FPM.addPass(SimplifyCFGPass()); - FPM.addPass(RemoveTrapsPass()); + FPM.addPass(LowerAllowCheckPass()); }); } } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 10f407dc54df2..032b434395f58 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13,6 +13,7 @@ #include "ABIInfo.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" +#include "CGHLSLRuntime.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGRecordLayout.h" @@ -17380,6 +17381,16 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); + // rldimi is 64-bit instruction, expand the intrinsic before isel to + // leverage peephole and avoid legalization efforts. + if (BuiltinID == PPC::BI__builtin_ppc_rldimi && + !getTarget().getTriple().isPPC64()) { + Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType()); + Op2 = Builder.CreateZExt(Op2, Int64Ty); + Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); + return Builder.CreateOr(Builder.CreateAnd(Shift, Op3), + Builder.CreateAnd(Op1, Builder.CreateNot(Op3))); + } return Builder.CreateCall( CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi ? Intrinsic::ppc_rldimi @@ -18264,6 +18275,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, return nullptr; switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_elementwise_all: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + return Builder.CreateIntrinsic( + /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), + CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef{Op0}, nullptr, + "hlsl.all"); + } case Builtin::BI__builtin_hlsl_elementwise_any: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index e9a59b70148e9..cd5905b8d748e 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5655,6 +5655,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, /*AttrOnCallSite=*/true, /*IsThunk=*/false); + if (CallingConv == llvm::CallingConv::X86_VectorCall && + getTarget().getTriple().isWindowsArm64EC()) { + CGM.Error(Loc, "__vectorcall calling convention is not currently " + "supported"); + } + if (const FunctionDecl *FD = dyn_cast_or_null(CurFuncDecl)) { if (FD->hasAttr()) // All calls within a strictfp function are marked strictfp diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index e7fd364ed6b6d..69e602008a9a9 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -1397,7 +1397,7 @@ FieldHasTrivialDestructorBody(ASTContext &Context, // The destructor for an implicit anonymous union member is never invoked. if (FieldClassDecl->isUnion() && FieldClassDecl->isAnonymousStructOrUnion()) - return false; + return true; return HasTrivialDestructorBody(Context, FieldClassDecl, FieldClassDecl); } diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index f87caf050eeaa..2fee6d3ebe4f7 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -694,7 +694,8 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // - whether there's a fallthrough llvm::BasicBlock *FallthroughSource = Builder.GetInsertBlock(); - bool HasFallthrough = (FallthroughSource != nullptr && IsActive); + bool HasFallthrough = + FallthroughSource != nullptr && (IsActive || HasExistingBranches); // Branch-through fall-throughs leave the insertion point set to the // end of the last cleanup, which points to the current scope. The @@ -719,7 +720,11 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { // If we have a prebranched fallthrough into an inactive normal // cleanup, rewrite it so that it leads to the appropriate place. - if (Scope.isNormalCleanup() && HasPrebranchedFallthrough && !IsActive) { + if (Scope.isNormalCleanup() && HasPrebranchedFallthrough && + !RequiresNormalCleanup) { + // FIXME: Come up with a program which would need forwarding prebranched + // fallthrough and add tests. Otherwise delete this and assert against it. + assert(!IsActive); llvm::BasicBlock *prebranchDest; // If the prebranch is semantically branching through the next @@ -792,6 +797,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { EmitSehCppScopeEnd(); } destroyOptimisticNormalEntry(*this, Scope); + Scope.MarkEmitted(); EHStack.popCleanup(); } else { // If we have a fallthrough and no other need for the cleanup, @@ -808,6 +814,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { } destroyOptimisticNormalEntry(*this, Scope); + Scope.MarkEmitted(); EHStack.popCleanup(); EmitCleanup(*this, Fn, cleanupFlags, NormalActiveFlag); @@ -944,6 +951,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { } // IV. Pop the cleanup and emit it. + Scope.MarkEmitted(); EHStack.popCleanup(); assert(EHStack.hasNormalCleanups() == HasEnclosingCleanups); diff --git a/clang/lib/CodeGen/CGCleanup.h b/clang/lib/CodeGen/CGCleanup.h index 7a7344c07160d..49364cf58de9a 100644 --- a/clang/lib/CodeGen/CGCleanup.h +++ b/clang/lib/CodeGen/CGCleanup.h @@ -16,8 +16,11 @@ #include "EHScopeStack.h" #include "Address.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instruction.h" namespace llvm { class BasicBlock; @@ -266,6 +269,51 @@ class alignas(8) EHCleanupScope : public EHScope { }; mutable struct ExtInfo *ExtInfo; + /// Erases auxillary allocas and their usages for an unused cleanup. + /// Cleanups should mark these allocas as 'used' if the cleanup is + /// emitted, otherwise these instructions would be erased. + struct AuxillaryAllocas { + SmallVector AuxAllocas; + bool used = false; + + // Records a potentially unused instruction to be erased later. + void Add(llvm::AllocaInst *Alloca) { AuxAllocas.push_back(Alloca); } + + // Mark all recorded instructions as used. These will not be erased later. + void MarkUsed() { + used = true; + AuxAllocas.clear(); + } + + ~AuxillaryAllocas() { + if (used) + return; + llvm::SetVector Uses; + for (auto *Inst : llvm::reverse(AuxAllocas)) + CollectUses(Inst, Uses); + // Delete uses in the reverse order of insertion. + for (auto *I : llvm::reverse(Uses)) + I->eraseFromParent(); + } + + private: + void CollectUses(llvm::Instruction *I, + llvm::SetVector &Uses) { + if (!I || !Uses.insert(I)) + return; + for (auto *User : I->users()) + CollectUses(cast(User), Uses); + } + }; + mutable struct AuxillaryAllocas *AuxAllocas; + + AuxillaryAllocas &getAuxillaryAllocas() { + if (!AuxAllocas) { + AuxAllocas = new struct AuxillaryAllocas(); + } + return *AuxAllocas; + } + /// The number of fixups required by enclosing scopes (not including /// this one). If this is the top cleanup scope, all the fixups /// from this index onwards belong to this scope. @@ -298,7 +346,7 @@ class alignas(8) EHCleanupScope : public EHScope { EHScopeStack::stable_iterator enclosingEH) : EHScope(EHScope::Cleanup, enclosingEH), EnclosingNormal(enclosingNormal), NormalBlock(nullptr), - ActiveFlag(Address::invalid()), ExtInfo(nullptr), + ActiveFlag(Address::invalid()), ExtInfo(nullptr), AuxAllocas(nullptr), FixupDepth(fixupDepth) { CleanupBits.IsNormalCleanup = isNormal; CleanupBits.IsEHCleanup = isEH; @@ -312,8 +360,15 @@ class alignas(8) EHCleanupScope : public EHScope { } void Destroy() { + if (AuxAllocas) + delete AuxAllocas; delete ExtInfo; } + void AddAuxAllocas(llvm::SmallVector Allocas) { + for (auto *Alloca : Allocas) + getAuxillaryAllocas().Add(Alloca); + } + void MarkEmitted() { getAuxillaryAllocas().MarkUsed(); } // Objects of EHCleanupScope are not destructed. Use Destroy(). ~EHCleanupScope() = delete; diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 16976dc92f017..6a7b54942eaaf 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -20,6 +20,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "EHScopeStack.h" #include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" @@ -2251,6 +2252,24 @@ void CodeGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr, destroyer, useEHCleanupForArray); } +// Pushes a destroy and defers its deactivation until its +// CleanupDeactivationScope is exited. +void CodeGenFunction::pushDestroyAndDeferDeactivation( + QualType::DestructionKind dtorKind, Address addr, QualType type) { + assert(dtorKind && "cannot push destructor for trivial type"); + + CleanupKind cleanupKind = getCleanupKind(dtorKind); + pushDestroyAndDeferDeactivation( + cleanupKind, addr, type, getDestroyer(dtorKind), cleanupKind & EHCleanup); +} + +void CodeGenFunction::pushDestroyAndDeferDeactivation( + CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer, + bool useEHCleanupForArray) { + pushCleanupAndDeferDeactivation( + cleanupKind, addr, type, destroyer, useEHCleanupForArray); +} + void CodeGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) { EHStack.pushCleanup(Kind, SPMem); } @@ -2267,16 +2286,19 @@ void CodeGenFunction::pushLifetimeExtendedDestroy(CleanupKind cleanupKind, // If we're not in a conditional branch, we don't need to bother generating a // conditional cleanup. if (!isInConditionalBranch()) { - // Push an EH-only cleanup for the object now. // FIXME: When popping normal cleanups, we need to keep this EH cleanup // around in case a temporary's destructor throws an exception. - if (cleanupKind & EHCleanup) - EHStack.pushCleanup( - static_cast(cleanupKind & ~NormalCleanup), addr, type, - destroyer, useEHCleanupForArray); + // Add the cleanup to the EHStack. After the full-expr, this would be + // deactivated before being popped from the stack. + pushDestroyAndDeferDeactivation(cleanupKind, addr, type, destroyer, + useEHCleanupForArray); + + // Since this is lifetime-extended, push it once again to the EHStack after + // the full expression. return pushCleanupAfterFullExprWithActiveFlag( - cleanupKind, Address::invalid(), addr, type, destroyer, useEHCleanupForArray); + cleanupKind, Address::invalid(), addr, type, destroyer, + useEHCleanupForArray); } // Otherwise, we should only destroy the object if it's been initialized. @@ -2291,13 +2313,12 @@ void CodeGenFunction::pushLifetimeExtendedDestroy(CleanupKind cleanupKind, Address ActiveFlag = createCleanupActiveFlag(); SavedType SavedAddr = saveValueInCond(addr); - if (cleanupKind & EHCleanup) { - EHStack.pushCleanup( - static_cast(cleanupKind & ~NormalCleanup), SavedAddr, type, - destroyer, useEHCleanupForArray); - initFullExprCleanupWithFlag(ActiveFlag); - } + pushCleanupAndDeferDeactivation( + cleanupKind, SavedAddr, type, destroyer, useEHCleanupForArray); + initFullExprCleanupWithFlag(ActiveFlag); + // Since this is lifetime-extended, push it once again to the EHStack after + // the full expression. pushCleanupAfterFullExprWithActiveFlag( cleanupKind, ActiveFlag, SavedAddr, type, destroyer, useEHCleanupForArray); @@ -2492,9 +2513,9 @@ namespace { }; } // end anonymous namespace -/// pushIrregularPartialArrayCleanup - Push an EH cleanup to destroy -/// already-constructed elements of the given array. The cleanup -/// may be popped with DeactivateCleanupBlock or PopCleanupBlock. +/// pushIrregularPartialArrayCleanup - Push a NormalAndEHCleanup to +/// destroy already-constructed elements of the given array. The cleanup may be +/// popped with DeactivateCleanupBlock or PopCleanupBlock. /// /// \param elementType - the immediate element type of the array; /// possibly still an array type @@ -2503,10 +2524,9 @@ void CodeGenFunction::pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin, QualType elementType, CharUnits elementAlign, Destroyer *destroyer) { - pushFullExprCleanup(EHCleanup, - arrayBegin, arrayEndPointer, - elementType, elementAlign, - destroyer); + pushFullExprCleanup( + NormalAndEHCleanup, arrayBegin, arrayEndPointer, elementType, + elementAlign, destroyer); } /// pushRegularPartialArrayCleanup - Push an EH cleanup to destroy diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 9cb4033bebfbb..9ed840b81af0b 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -57,7 +57,13 @@ using namespace CodeGen; // Experiment to make sanitizers easier to debug static llvm::cl::opt ClSanitizeDebugDeoptimization( "ubsan-unique-traps", llvm::cl::Optional, - llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check")); + llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check.")); + +// TODO: Introduce frontend options to enabled per sanitizers, similar to +// `fsanitize-trap`. +static llvm::cl::opt ClSanitizeGuardChecks( + "ubsan-guard-checks", llvm::cl::Optional, + llvm::cl::desc("Guard UBSAN checks with `llvm.allow.ubsan.check()`.")); //===--------------------------------------------------------------------===// // Miscellaneous Helper Methods @@ -110,10 +116,16 @@ Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align, llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, const Twine &Name, llvm::Value *ArraySize) { + llvm::AllocaInst *Alloca; if (ArraySize) - return Builder.CreateAlloca(Ty, ArraySize, Name); - return new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), - ArraySize, Name, AllocaInsertPt); + Alloca = Builder.CreateAlloca(Ty, ArraySize, Name); + else + Alloca = new llvm::AllocaInst(Ty, CGM.getDataLayout().getAllocaAddrSpace(), + ArraySize, Name, AllocaInsertPt); + if (Allocas) { + Allocas->Add(Alloca); + } + return Alloca; } /// CreateDefaultAlignTempAlloca - This creates an alloca with the @@ -3535,6 +3547,17 @@ void CodeGenFunction::EmitCheck( Cond = Cond ? Builder.CreateAnd(Cond, Check) : Check; } + if (ClSanitizeGuardChecks) { + llvm::Value *Allow = + Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::allow_ubsan_check), + llvm::ConstantInt::get(CGM.Int8Ty, CheckHandler)); + + for (llvm::Value **Cond : {&FatalCond, &RecoverableCond, &TrapCond}) { + if (*Cond) + *Cond = Builder.CreateOr(*Cond, Builder.CreateNot(Allow)); + } + } + if (TrapCond) EmitTrapCheck(TrapCond, CheckHandler); if (!FatalCond && !RecoverableCond) @@ -5616,11 +5639,44 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { break; } - RValue RV = EmitAnyExpr(E->getRHS()); + // TODO: Can we de-duplicate this code with the corresponding code in + // CGExprScalar, similar to the way EmitCompoundAssignmentLValue works? + RValue RV; + llvm::Value *Previous = nullptr; + QualType SrcType = E->getRHS()->getType(); + // Check if LHS is a bitfield, if RHS contains an implicit cast expression + // we want to extract that value and potentially (if the bitfield sanitizer + // is enabled) use it to check for an implicit conversion. + if (E->getLHS()->refersToBitField()) { + llvm::Value *RHS = + EmitWithOriginalRHSBitfieldAssignment(E, &Previous, &SrcType); + RV = RValue::get(RHS); + } else + RV = EmitAnyExpr(E->getRHS()); + LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); + if (RV.isScalar()) EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); - EmitStoreThroughLValue(RV, LV); + + if (LV.isBitField()) { + llvm::Value *Result = nullptr; + // If bitfield sanitizers are enabled we want to use the result + // to check whether a truncation or sign change has occurred. + if (SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) + EmitStoreThroughBitfieldLValue(RV, LV, &Result); + else + EmitStoreThroughBitfieldLValue(RV, LV); + + // If the expression contained an implicit conversion, make sure + // to use the value before the scalar conversion. + llvm::Value *Src = Previous ? Previous : RV.getScalarVal(); + QualType DstType = E->getLHS()->getType(); + EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, + LV.getBitFieldInfo(), E->getExprLoc()); + } else + EmitStoreThroughLValue(RV, LV); + if (getLangOpts().OpenMP) CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, E->getLHS()); diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 24f018f7ee534..0b203b9decd2d 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -15,6 +15,7 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" +#include "EHScopeStack.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" @@ -24,6 +25,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" using namespace clang; @@ -554,24 +556,27 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, // For that, we'll need an EH cleanup. QualType::DestructionKind dtorKind = elementType.isDestructedType(); Address endOfInit = Address::invalid(); - EHScopeStack::stable_iterator cleanup; - llvm::Instruction *cleanupDominator = nullptr; - if (CGF.needsEHCleanup(dtorKind)) { + CodeGenFunction::CleanupDeactivationScope deactivation(CGF); + + if (dtorKind) { + CodeGenFunction::AllocaTrackerRAII allocaTracker(CGF); // In principle we could tell the cleanup where we are more // directly, but the control flow can get so varied here that it // would actually be quite complex. Therefore we go through an // alloca. + llvm::Instruction *dominatingIP = + Builder.CreateFlagLoad(llvm::ConstantInt::getNullValue(CGF.Int8PtrTy)); endOfInit = CGF.CreateTempAlloca(begin->getType(), CGF.getPointerAlign(), "arrayinit.endOfInit"); - cleanupDominator = Builder.CreateStore(begin, endOfInit); + Builder.CreateStore(begin, endOfInit); CGF.pushIrregularPartialArrayCleanup(begin, endOfInit, elementType, elementAlign, CGF.getDestroyer(dtorKind)); - cleanup = CGF.EHStack.stable_begin(); + cast(*CGF.EHStack.find(CGF.EHStack.stable_begin())) + .AddAuxAllocas(allocaTracker.Take()); - // Otherwise, remember that we didn't need a cleanup. - } else { - dtorKind = QualType::DK_none; + CGF.DeferredDeactivationCleanupStack.push_back( + {CGF.EHStack.stable_begin(), dominatingIP}); } llvm::Value *one = llvm::ConstantInt::get(CGF.SizeTy, 1); @@ -667,9 +672,6 @@ void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, CGF.EmitBlock(endBB); } - - // Leave the partial-array cleanup if we entered one. - if (dtorKind) CGF.DeactivateCleanupBlock(cleanup, cleanupDominator); } //===----------------------------------------------------------------------===// @@ -1370,9 +1372,8 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { LValue SlotLV = CGF.MakeAddrLValue(Slot.getAddress(), E->getType()); // We'll need to enter cleanup scopes in case any of the element - // initializers throws an exception. - SmallVector Cleanups; - llvm::Instruction *CleanupDominator = nullptr; + // initializers throws an exception or contains branch out of the expressions. + CodeGenFunction::CleanupDeactivationScope scope(CGF); CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin(); for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(), @@ -1391,28 +1392,12 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { if (QualType::DestructionKind DtorKind = CurField->getType().isDestructedType()) { assert(LV.isSimple()); - if (CGF.needsEHCleanup(DtorKind)) { - if (!CleanupDominator) - CleanupDominator = CGF.Builder.CreateAlignedLoad( - CGF.Int8Ty, - llvm::Constant::getNullValue(CGF.Int8PtrTy), - CharUnits::One()); // placeholder - - CGF.pushDestroy(EHCleanup, LV.getAddress(CGF), CurField->getType(), - CGF.getDestroyer(DtorKind), false); - Cleanups.push_back(CGF.EHStack.stable_begin()); - } + if (DtorKind) + CGF.pushDestroyAndDeferDeactivation( + NormalAndEHCleanup, LV.getAddress(CGF), CurField->getType(), + CGF.getDestroyer(DtorKind), false); } } - - // Deactivate all the partial cleanups in reverse order, which - // generally means popping them. - for (unsigned i = Cleanups.size(); i != 0; --i) - CGF.DeactivateCleanupBlock(Cleanups[i-1], CleanupDominator); - - // Destroy the placeholder if we made one. - if (CleanupDominator) - CleanupDominator->eraseFromParent(); } void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { @@ -1701,14 +1686,7 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( // We'll need to enter cleanup scopes in case any of the element // initializers throws an exception. SmallVector cleanups; - llvm::Instruction *cleanupDominator = nullptr; - auto addCleanup = [&](const EHScopeStack::stable_iterator &cleanup) { - cleanups.push_back(cleanup); - if (!cleanupDominator) // create placeholder once needed - cleanupDominator = CGF.Builder.CreateAlignedLoad( - CGF.Int8Ty, llvm::Constant::getNullValue(CGF.Int8PtrTy), - CharUnits::One()); - }; + CodeGenFunction::CleanupDeactivationScope DeactivateCleanups(CGF); unsigned curInitIndex = 0; @@ -1731,10 +1709,8 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( CGF.EmitAggExpr(InitExprs[curInitIndex++], AggSlot); if (QualType::DestructionKind dtorKind = - Base.getType().isDestructedType()) { - CGF.pushDestroy(dtorKind, V, Base.getType()); - addCleanup(CGF.EHStack.stable_begin()); - } + Base.getType().isDestructedType()) + CGF.pushDestroyAndDeferDeactivation(dtorKind, V, Base.getType()); } } @@ -1809,10 +1785,10 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( if (QualType::DestructionKind dtorKind = field->getType().isDestructedType()) { assert(LV.isSimple()); - if (CGF.needsEHCleanup(dtorKind)) { - CGF.pushDestroy(EHCleanup, LV.getAddress(CGF), field->getType(), - CGF.getDestroyer(dtorKind), false); - addCleanup(CGF.EHStack.stable_begin()); + if (dtorKind) { + CGF.pushDestroyAndDeferDeactivation( + NormalAndEHCleanup, LV.getAddress(CGF), field->getType(), + CGF.getDestroyer(dtorKind), false); pushedCleanup = true; } } @@ -1825,17 +1801,6 @@ void AggExprEmitter::VisitCXXParenListOrInitListExpr( if (GEP->use_empty()) GEP->eraseFromParent(); } - - // Deactivate all the partial cleanups in reverse order, which - // generally means popping them. - assert((cleanupDominator || cleanups.empty()) && - "Missing cleanupDominator before deactivating cleanup blocks"); - for (unsigned i = cleanups.size(); i != 0; --i) - CGF.DeactivateCleanupBlock(cleanups[i-1], cleanupDominator); - - // Destroy the placeholder if we made one. - if (cleanupDominator) - cleanupDominator->eraseFromParent(); } void AggExprEmitter::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E, diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 35da0f1a89bc3..019b828850c70 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1005,8 +1005,8 @@ void CodeGenFunction::EmitNewArrayInitializer( const Expr *Init = E->getInitializer(); Address EndOfInit = Address::invalid(); QualType::DestructionKind DtorKind = ElementType.isDestructedType(); - EHScopeStack::stable_iterator Cleanup; - llvm::Instruction *CleanupDominator = nullptr; + CleanupDeactivationScope deactivation(*this); + bool pushedCleanup = false; CharUnits ElementSize = getContext().getTypeSizeInChars(ElementType); CharUnits ElementAlign = @@ -1102,18 +1102,24 @@ void CodeGenFunction::EmitNewArrayInitializer( } // Enter a partial-destruction Cleanup if necessary. - if (needsEHCleanup(DtorKind)) { + if (DtorKind) { + AllocaTrackerRAII AllocaTracker(*this); // In principle we could tell the Cleanup where we are more // directly, but the control flow can get so varied here that it // would actually be quite complex. Therefore we go through an // alloca. + llvm::Instruction *DominatingIP = + Builder.CreateFlagLoad(llvm::ConstantInt::getNullValue(Int8PtrTy)); EndOfInit = CreateTempAlloca(BeginPtr.getType(), getPointerAlign(), "array.init.end"); - CleanupDominator = Builder.CreateStore(BeginPtr.getPointer(), EndOfInit); pushIrregularPartialArrayCleanup(BeginPtr.getPointer(), EndOfInit, ElementType, ElementAlign, getDestroyer(DtorKind)); - Cleanup = EHStack.stable_begin(); + cast(*EHStack.find(EHStack.stable_begin())) + .AddAuxAllocas(AllocaTracker.Take()); + DeferredDeactivationCleanupStack.push_back( + {EHStack.stable_begin(), DominatingIP}); + pushedCleanup = true; } CharUnits StartAlign = CurPtr.getAlignment(); @@ -1159,9 +1165,6 @@ void CodeGenFunction::EmitNewArrayInitializer( // initialization. llvm::ConstantInt *ConstNum = dyn_cast(NumElements); if (ConstNum && ConstNum->getZExtValue() <= InitListElements) { - // If there was a Cleanup, deactivate it. - if (CleanupDominator) - DeactivateCleanupBlock(Cleanup, CleanupDominator); return; } @@ -1276,12 +1279,14 @@ void CodeGenFunction::EmitNewArrayInitializer( Builder.CreateStore(CurPtr.getPointer(), EndOfInit); // Enter a partial-destruction Cleanup if necessary. - if (!CleanupDominator && needsEHCleanup(DtorKind)) { + if (!pushedCleanup && needsEHCleanup(DtorKind)) { + llvm::Instruction *DominatingIP = + Builder.CreateFlagLoad(llvm::ConstantInt::getNullValue(Int8PtrTy)); pushRegularPartialArrayCleanup(BeginPtr.getPointer(), CurPtr.getPointer(), ElementType, ElementAlign, getDestroyer(DtorKind)); - Cleanup = EHStack.stable_begin(); - CleanupDominator = Builder.CreateUnreachable(); + DeferredDeactivationCleanupStack.push_back( + {EHStack.stable_begin(), DominatingIP}); } // Emit the initializer into this element. @@ -1289,10 +1294,7 @@ void CodeGenFunction::EmitNewArrayInitializer( AggValueSlot::DoesNotOverlap); // Leave the Cleanup if we entered one. - if (CleanupDominator) { - DeactivateCleanupBlock(Cleanup, CleanupDominator); - CleanupDominator->eraseFromParent(); - } + deactivation.ForceDeactivate(); // Advance to the next element by adjusting the pointer type as necessary. llvm::Value *NextPtr = diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index ec55b91b8b5be..bae8c7a266c2e 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -15,6 +15,7 @@ #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -308,6 +309,7 @@ class ScalarExprEmitter llvm::Type *DstTy, SourceLocation Loc); /// Known implicit conversion check kinds. + /// This is used for bitfield conversion checks as well. /// Keep in sync with the enum of the same name in ubsan_handlers.h enum ImplicitConversionCheckKind : unsigned char { ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7. @@ -1099,11 +1101,28 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, llvm::Constant *StaticArgs[] = { CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType), CGF.EmitCheckTypeDescriptor(DstType), - llvm::ConstantInt::get(Builder.getInt8Ty(), Check.first)}; + llvm::ConstantInt::get(Builder.getInt8Ty(), Check.first), + llvm::ConstantInt::get(Builder.getInt32Ty(), 0)}; + CGF.EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs, {Src, Dst}); } +static llvm::Value *EmitIsNegativeTestHelper(Value *V, QualType VType, + const char *Name, + CGBuilderTy &Builder) { + bool VSigned = VType->isSignedIntegerOrEnumerationType(); + llvm::Type *VTy = V->getType(); + if (!VSigned) { + // If the value is unsigned, then it is never negative. + return llvm::ConstantInt::getFalse(VTy->getContext()); + } + llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); + return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, + llvm::Twine(Name) + "." + V->getName() + + ".negativitycheck"); +} + // Should be called within CodeGenFunction::SanitizerScope RAII scope. // Returns 'i1 false' when the conversion Src -> Dst changed the sign. static std::pair Value * { - // Is this value a signed type? - bool VSigned = VType->isSignedIntegerOrEnumerationType(); - llvm::Type *VTy = V->getType(); - if (!VSigned) { - // If the value is unsigned, then it is never negative. - // FIXME: can we encounter non-scalar VTy here? - return llvm::ConstantInt::getFalse(VTy->getContext()); - } - // Get the zero of the same type with which we will be comparing. - llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); - // %V.isnegative = icmp slt %V, 0 - // I.e is %V *strictly* less than zero, does it have negative value? - return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, - llvm::Twine(Name) + "." + V->getName() + - ".negativitycheck"); - }; - // 1. Was the old Value negative? - llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src"); + llvm::Value *SrcIsNegative = + EmitIsNegativeTestHelper(Src, SrcType, "src", Builder); // 2. Is the new Value negative? - llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst"); + llvm::Value *DstIsNegative = + EmitIsNegativeTestHelper(Dst, DstType, "dst", Builder); // 3. Now, was the 'negativity status' preserved during the conversion? // NOTE: conversion from negative to zero is considered to change the sign. // (We want to get 'false' when the conversion changed the sign) @@ -1240,12 +1241,143 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, llvm::Constant *StaticArgs[] = { CGF.EmitCheckSourceLocation(Loc), CGF.EmitCheckTypeDescriptor(SrcType), CGF.EmitCheckTypeDescriptor(DstType), - llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind)}; + llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind), + llvm::ConstantInt::get(Builder.getInt32Ty(), 0)}; // EmitCheck() will 'and' all the checks together. CGF.EmitCheck(Checks, SanitizerHandler::ImplicitConversion, StaticArgs, {Src, Dst}); } +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the truncation Src -> Dst was lossy. +static std::pair> +EmitBitfieldTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + + ScalarExprEmitter::ImplicitConversionCheckKind Kind; + if (!SrcSigned && !DstSigned) + Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation; + else + Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation; + + llvm::Value *Check = nullptr; + // 1. Extend the truncated value back to the same width as the Src. + Check = Builder.CreateIntCast(Dst, Src->getType(), DstSigned, "bf.anyext"); + // 2. Equality-compare with the original source value + Check = Builder.CreateICmpEQ(Check, Src, "bf.truncheck"); + // If the comparison result is 'i1 false', then the truncation was lossy. + + return std::make_pair( + Kind, std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); +} + +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the conversion Src -> Dst changed the sign. +static std::pair> +EmitBitfieldSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + // 1. Was the old Value negative? + llvm::Value *SrcIsNegative = + EmitIsNegativeTestHelper(Src, SrcType, "bf.src", Builder); + // 2. Is the new Value negative? + llvm::Value *DstIsNegative = + EmitIsNegativeTestHelper(Dst, DstType, "bf.dst", Builder); + // 3. Now, was the 'negativity status' preserved during the conversion? + // NOTE: conversion from negative to zero is considered to change the sign. + // (We want to get 'false' when the conversion changed the sign) + // So we should just equality-compare the negativity statuses. + llvm::Value *Check = nullptr; + Check = + Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "bf.signchangecheck"); + // If the comparison result is 'false', then the conversion changed the sign. + return std::make_pair( + ScalarExprEmitter::ICCK_IntegerSignChange, + std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); +} + +void CodeGenFunction::EmitBitfieldConversionCheck(Value *Src, QualType SrcType, + Value *Dst, QualType DstType, + const CGBitFieldInfo &Info, + SourceLocation Loc) { + + if (!SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) + return; + + // We only care about int->int conversions here. + // We ignore conversions to/from pointer and/or bool. + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) + return; + + if (DstType->isBooleanType() || SrcType->isBooleanType()) + return; + + // This should be truncation of integral types. + assert(isa(Src->getType()) && + isa(Dst->getType()) && "non-integer llvm type"); + + // TODO: Calculate src width to avoid emitting code + // for unecessary cases. + unsigned SrcBits = ConvertType(SrcType)->getScalarSizeInBits(); + unsigned DstBits = Info.Size; + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + + CodeGenFunction::SanitizerScope SanScope(this); + + std::pair> + Check; + + // Truncation + bool EmitTruncation = DstBits < SrcBits; + // If Dst is signed and Src unsigned, we want to be more specific + // about the CheckKind we emit, in this case we want to emit + // ICCK_SignedIntegerTruncationOrSignChange. + bool EmitTruncationFromUnsignedToSigned = + EmitTruncation && DstSigned && !SrcSigned; + // Sign change + bool SameTypeSameSize = SrcSigned == DstSigned && SrcBits == DstBits; + bool BothUnsigned = !SrcSigned && !DstSigned; + bool LargerSigned = (DstBits > SrcBits) && DstSigned; + // We can avoid emitting sign change checks in some obvious cases + // 1. If Src and Dst have the same signedness and size + // 2. If both are unsigned sign check is unecessary! + // 3. If Dst is signed and bigger than Src, either + // sign-extension or zero-extension will make sure + // the sign remains. + bool EmitSignChange = !SameTypeSameSize && !BothUnsigned && !LargerSigned; + + if (EmitTruncation) + Check = + EmitBitfieldTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); + else if (EmitSignChange) { + assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) && + "either the widths should be different, or the signednesses."); + Check = + EmitBitfieldSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder); + } else + return; + + ScalarExprEmitter::ImplicitConversionCheckKind CheckKind = Check.first; + if (EmitTruncationFromUnsignedToSigned) + CheckKind = ScalarExprEmitter::ICCK_SignedIntegerTruncationOrSignChange; + + llvm::Constant *StaticArgs[] = { + EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(SrcType), + EmitCheckTypeDescriptor(DstType), + llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind), + llvm::ConstantInt::get(Builder.getInt32Ty(), Info.Size)}; + + EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs, + {Src, Dst}); +} + Value *ScalarExprEmitter::EmitScalarCast(Value *Src, QualType SrcType, QualType DstType, llvm::Type *SrcTy, llvm::Type *DstTy, @@ -2644,6 +2776,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::PHINode *atomicPHI = nullptr; llvm::Value *value; llvm::Value *input; + llvm::Value *Previous = nullptr; + QualType SrcType = E->getType(); int amount = (isInc ? 1 : -1); bool isSubtraction = !isInc; @@ -2732,7 +2866,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, "base or promoted) will be signed, or the bitwidths will match."); } if (CGF.SanOpts.hasOneOf( - SanitizerKind::ImplicitIntegerArithmeticValueChange) && + SanitizerKind::ImplicitIntegerArithmeticValueChange | + SanitizerKind::ImplicitBitfieldConversion) && canPerformLossyDemotionCheck) { // While `x += 1` (for `x` with width less than int) is modeled as // promotion+arithmetics+demotion, and we can catch lossy demotion with @@ -2743,13 +2878,26 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // the increment/decrement in the wider type, and finally // perform the demotion. This will catch lossy demotions. + // We have a special case for bitfields defined using all the bits of the + // type. In this case we need to do the same trick as for the integer + // sanitizer checks, i.e., promotion -> increment/decrement -> demotion. + value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); // Do pass non-default ScalarConversionOpts so that sanitizer check is - // emitted. + // emitted if LV is not a bitfield, otherwise the bitfield sanitizer + // checks will take care of the conversion. + ScalarConversionOpts Opts; + if (!LV.isBitField()) + Opts = ScalarConversionOpts(CGF.SanOpts); + else if (CGF.SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) { + Previous = value; + SrcType = promotedType; + } + value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), - ScalarConversionOpts(CGF.SanOpts)); + Opts); // Note that signed integer inc/dec with width less than int can't // overflow because of promotion rules; we're just eliding a few steps @@ -2934,9 +3082,12 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } // Store the updated result through the lvalue. - if (LV.isBitField()) + if (LV.isBitField()) { + Value *Src = Previous ? Previous : value; CGF.EmitStoreThroughBitfieldLValue(RValue::get(value), LV, &value); - else + CGF.EmitBitfieldConversionCheck(Src, SrcType, value, E->getType(), + LV.getBitFieldInfo(), E->getExprLoc()); + } else CGF.EmitStoreThroughLValue(RValue::get(value), LV); // If this is a postinc, return the value read from memory, otherwise use the @@ -3441,8 +3592,15 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Convert the result back to the LHS type, // potentially with Implicit Conversion sanitizer check. - Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, - ScalarConversionOpts(CGF.SanOpts)); + // If LHSLV is a bitfield, use default ScalarConversionOpts + // to avoid emit any implicit integer checks. + Value *Previous = nullptr; + if (LHSLV.isBitField()) { + Previous = Result; + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc); + } else + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, + ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); @@ -3461,9 +3619,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // specially because the result is altered by the store, i.e., [C99 6.5.16p1] // 'An assignment expression has the value of the left operand after the // assignment...'. - if (LHSLV.isBitField()) + if (LHSLV.isBitField()) { + Value *Src = Previous ? Previous : Result; + QualType SrcType = E->getRHS()->getType(); + QualType DstType = E->getLHS()->getType(); CGF.EmitStoreThroughBitfieldLValue(RValue::get(Result), LHSLV, &Result); - else + CGF.EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, + LHSLV.getBitFieldInfo(), E->getExprLoc()); + } else CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV); if (CGF.getLangOpts().OpenMP) @@ -4575,6 +4738,24 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } +llvm::Value *CodeGenFunction::EmitWithOriginalRHSBitfieldAssignment( + const BinaryOperator *E, Value **Previous, QualType *SrcType) { + // In case we have the integer or bitfield sanitizer checks enabled + // we want to get the expression before scalar conversion. + if (auto *ICE = dyn_cast(E->getRHS())) { + CastKind Kind = ICE->getCastKind(); + if (Kind == CK_IntegralCast || Kind == CK_LValueToRValue) { + *SrcType = ICE->getSubExpr()->getType(); + *Previous = EmitScalarExpr(ICE->getSubExpr()); + // Pass default ScalarConversionOpts to avoid emitting + // integer sanitizer checks as E refers to bitfield. + return EmitScalarConversion(*Previous, *SrcType, ICE->getType(), + ICE->getExprLoc()); + } + } + return EmitScalarExpr(E->getRHS()); +} + Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { bool Ignore = TestAndClearIgnoreResultAssign(); @@ -4603,7 +4784,16 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { case Qualifiers::OCL_None: // __block variables need to have the rhs evaluated first, plus // this should improve codegen just a little. - RHS = Visit(E->getRHS()); + Value *Previous = nullptr; + QualType SrcType = E->getRHS()->getType(); + // Check if LHS is a bitfield, if RHS contains an implicit cast expression + // we want to extract that value and potentially (if the bitfield sanitizer + // is enabled) use it to check for an implicit conversion. + if (E->getLHS()->refersToBitField()) + RHS = CGF.EmitWithOriginalRHSBitfieldAssignment(E, &Previous, &SrcType); + else + RHS = Visit(E->getRHS()); + LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); // Store the value into the LHS. Bit-fields are handled specially @@ -4612,6 +4802,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { // the assignment...'. if (LHS.isBitField()) { CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS); + // If the expression contained an implicit conversion, make sure + // to use the value before the scalar conversion. + Value *Src = Previous ? Previous : RHS; + QualType DstType = E->getLHS()->getType(); + CGF.EmitBitfieldConversionCheck(Src, SrcType, RHS, DstType, + LHS.getBitFieldInfo(), E->getExprLoc()); } else { CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc()); CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 794d93358b0a4..5e6a3dd4878f4 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -17,8 +17,6 @@ #include "CodeGenModule.h" #include "clang/AST/Decl.h" #include "clang/Basic/TargetOptions.h" -#include "llvm/IR/IntrinsicsDirectX.h" -#include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Support/FormatVariadic.h" @@ -117,6 +115,10 @@ GlobalVariable *replaceBuffer(CGHLSLRuntime::Buffer &Buf) { } // namespace +llvm::Triple::ArchType CGHLSLRuntime::getArch() { + return CGM.getTarget().getTriple().getArch(); +} + void CGHLSLRuntime::addConstant(VarDecl *D, Buffer &CB) { if (D->getStorageClass() == SC_Static) { // For static inside cbuffer, take as global static. @@ -343,18 +345,8 @@ llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B, return B.CreateCall(FunctionCallee(DxGroupIndex)); } if (D.hasAttr()) { - llvm::Function *ThreadIDIntrinsic; - switch (CGM.getTarget().getTriple().getArch()) { - case llvm::Triple::dxil: - ThreadIDIntrinsic = CGM.getIntrinsic(Intrinsic::dx_thread_id); - break; - case llvm::Triple::spirv: - ThreadIDIntrinsic = CGM.getIntrinsic(Intrinsic::spv_thread_id); - break; - default: - llvm_unreachable("Input semantic not supported by target"); - break; - } + llvm::Function *ThreadIDIntrinsic = + CGM.getIntrinsic(getThreadIdIntrinsic()); return buildVectorInput(B, ThreadIDIntrinsic, Ty); } assert(false && "Unhandled parameter attribute"); diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index bffefb66740a0..2b8073aef973f 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -16,7 +16,11 @@ #define LLVM_CLANG_LIB_CODEGEN_CGHLSLRUNTIME_H #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" +#include "llvm/IR/IntrinsicsSPIRV.h" +#include "clang/Basic/Builtins.h" #include "clang/Basic/HLSLRuntime.h" #include "llvm/ADT/SmallVector.h" @@ -26,6 +30,22 @@ #include #include +// A function generator macro for picking the right intrinsic +// for the target backend +#define GENERATE_HLSL_INTRINSIC_FUNCTION(FunctionName, IntrinsicPostfix) \ + llvm::Intrinsic::ID get##FunctionName##Intrinsic() { \ + llvm::Triple::ArchType Arch = getArch(); \ + switch (Arch) { \ + case llvm::Triple::dxil: \ + return llvm::Intrinsic::dx_##IntrinsicPostfix; \ + case llvm::Triple::spirv: \ + return llvm::Intrinsic::spv_##IntrinsicPostfix; \ + default: \ + llvm_unreachable("Intrinsic " #IntrinsicPostfix \ + " not supported by target architecture"); \ + } \ + } + namespace llvm { class GlobalVariable; class Function; @@ -48,6 +68,17 @@ class CodeGenModule; class CGHLSLRuntime { public: + //===----------------------------------------------------------------------===// + // Start of reserved area for HLSL intrinsic getters. + //===----------------------------------------------------------------------===// + + GENERATE_HLSL_INTRINSIC_FUNCTION(All, all) + GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id) + + //===----------------------------------------------------------------------===// + // End of reserved area for HLSL intrinsic getters. + //===----------------------------------------------------------------------===// + struct BufferResBinding { // The ID like 2 in register(b2, space1). std::optional Reg; @@ -96,6 +127,7 @@ class CGHLSLRuntime { BufferResBinding &Binding); void addConstant(VarDecl *D, Buffer &CB); void addBufferDecls(const DeclContext *DC, Buffer &CB); + llvm::Triple::ArchType getArch(); llvm::SmallVector Buffers; }; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 1abbcaa3680ad..e299a2d5e4718 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2647,9 +2647,9 @@ void CGOpenMPRuntime::emitDistributeStaticInit( void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) { - assert(DKind == OMPD_distribute || DKind == OMPD_for || - DKind == OMPD_sections && - "Expected distribute, for, or sections directive kind"); + assert((DKind == OMPD_distribute || DKind == OMPD_for || + DKind == OMPD_sections) && + "Expected distribute, for, or sections directive kind"); if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 2547d58ea7745..6d4b6680ae8f6 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -93,6 +93,8 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext) CodeGenFunction::~CodeGenFunction() { assert(LifetimeExtendedCleanupStack.empty() && "failed to emit a cleanup"); + assert(DeferredDeactivationCleanupStack.empty() && + "missed to deactivate a cleanup"); if (getLangOpts().OpenMP && CurFn) CGM.getOpenMPRuntime().functionFinished(*this); @@ -339,6 +341,10 @@ static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) { void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { assert(BreakContinueStack.empty() && "mismatched push/pop in break/continue stack!"); + assert(LifetimeExtendedCleanupStack.empty() && + "mismatched push/pop of cleanups in EHStack!"); + assert(DeferredDeactivationCleanupStack.empty() && + "mismatched activate/deactivate of cleanups!"); bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 && NumSimpleReturnExprs == NumReturnExprs diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 592e0f557fc37..a65f384b38425 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -39,6 +39,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/SanitizerStats.h" @@ -648,6 +649,51 @@ class CodeGenFunction : public CodeGenTypeCache { EHScopeStack EHStack; llvm::SmallVector LifetimeExtendedCleanupStack; + + // A stack of cleanups which were added to EHStack but have to be deactivated + // later before being popped or emitted. These are usually deactivated on + // exiting a `CleanupDeactivationScope` scope. For instance, after a + // full-expr. + // + // These are specially useful for correctly emitting cleanups while + // encountering branches out of expression (through stmt-expr or coroutine + // suspensions). + struct DeferredDeactivateCleanup { + EHScopeStack::stable_iterator Cleanup; + llvm::Instruction *DominatingIP; + }; + llvm::SmallVector DeferredDeactivationCleanupStack; + + // Enters a new scope for capturing cleanups which are deferred to be + // deactivated, all of which will be deactivated once the scope is exited. + struct CleanupDeactivationScope { + CodeGenFunction &CGF; + size_t OldDeactivateCleanupStackSize; + bool Deactivated; + CleanupDeactivationScope(CodeGenFunction &CGF) + : CGF(CGF), OldDeactivateCleanupStackSize( + CGF.DeferredDeactivationCleanupStack.size()), + Deactivated(false) {} + + void ForceDeactivate() { + assert(!Deactivated && "Deactivating already deactivated scope"); + auto &Stack = CGF.DeferredDeactivationCleanupStack; + for (size_t I = Stack.size(); I > OldDeactivateCleanupStackSize; I--) { + CGF.DeactivateCleanupBlock(Stack[I - 1].Cleanup, + Stack[I - 1].DominatingIP); + Stack[I - 1].DominatingIP->eraseFromParent(); + } + Stack.resize(OldDeactivateCleanupStackSize); + Deactivated = true; + } + + ~CleanupDeactivationScope() { + if (Deactivated) + return; + ForceDeactivate(); + } + }; + llvm::SmallVector SEHTryEpilogueStack; llvm::Instruction *CurrentFuncletPad = nullptr; @@ -853,6 +899,19 @@ class CodeGenFunction : public CodeGenTypeCache { new (Buffer + sizeof(Header) + sizeof(T)) Address(ActiveFlag); } + // Push a cleanup onto EHStack and deactivate it later. It is usually + // deactivated when exiting a `CleanupDeactivationScope` (for example: after a + // full expression). + template + void pushCleanupAndDeferDeactivation(CleanupKind Kind, As... A) { + // Placeholder dominating IP for this cleanup. + llvm::Instruction *DominatingIP = + Builder.CreateFlagLoad(llvm::Constant::getNullValue(Int8PtrTy)); + EHStack.pushCleanup(Kind, A...); + DeferredDeactivationCleanupStack.push_back( + {EHStack.stable_begin(), DominatingIP}); + } + /// Set up the last cleanup that was pushed as a conditional /// full-expression cleanup. void initFullExprCleanup() { @@ -904,6 +963,7 @@ class CodeGenFunction : public CodeGenTypeCache { class RunCleanupsScope { EHScopeStack::stable_iterator CleanupStackDepth, OldCleanupScopeDepth; size_t LifetimeExtendedCleanupStackSize; + CleanupDeactivationScope DeactivateCleanups; bool OldDidCallStackSave; protected: bool PerformCleanup; @@ -918,8 +978,7 @@ class CodeGenFunction : public CodeGenTypeCache { public: /// Enter a new cleanup scope. explicit RunCleanupsScope(CodeGenFunction &CGF) - : PerformCleanup(true), CGF(CGF) - { + : DeactivateCleanups(CGF), PerformCleanup(true), CGF(CGF) { CleanupStackDepth = CGF.EHStack.stable_begin(); LifetimeExtendedCleanupStackSize = CGF.LifetimeExtendedCleanupStack.size(); @@ -949,6 +1008,7 @@ class CodeGenFunction : public CodeGenTypeCache { void ForceCleanup(std::initializer_list ValuesToReload = {}) { assert(PerformCleanup && "Already forced cleanup"); CGF.DidCallStackSave = OldDidCallStackSave; + DeactivateCleanups.ForceDeactivate(); CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize, ValuesToReload); PerformCleanup = false; @@ -2158,6 +2218,11 @@ class CodeGenFunction : public CodeGenTypeCache { Address addr, QualType type); void pushDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray); + void pushDestroyAndDeferDeactivation(QualType::DestructionKind dtorKind, + Address addr, QualType type); + void pushDestroyAndDeferDeactivation(CleanupKind cleanupKind, Address addr, + QualType type, Destroyer *destroyer, + bool useEHCleanupForArray); void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray); @@ -2649,6 +2714,33 @@ class CodeGenFunction : public CodeGenTypeCache { TBAAAccessInfo *TBAAInfo = nullptr); LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy); +private: + struct AllocaTracker { + void Add(llvm::AllocaInst *I) { Allocas.push_back(I); } + llvm::SmallVector Take() { return std::move(Allocas); } + + private: + llvm::SmallVector Allocas; + }; + AllocaTracker *Allocas = nullptr; + +public: + // Captures all the allocas created during the scope of its RAII object. + struct AllocaTrackerRAII { + AllocaTrackerRAII(CodeGenFunction &CGF) + : CGF(CGF), OldTracker(CGF.Allocas) { + CGF.Allocas = &Tracker; + } + ~AllocaTrackerRAII() { CGF.Allocas = OldTracker; } + + llvm::SmallVector Take() { return Tracker.Take(); } + + private: + CodeGenFunction &CGF; + AllocaTracker *OldTracker; + AllocaTracker Tracker; + }; + /// CreateTempAlloca - This creates an alloca and inserts it into the entry /// block if \p ArraySize is nullptr, otherwise inserts it at the current /// insertion point of the builder. The caller is responsible for setting an @@ -2738,6 +2830,21 @@ class CodeGenFunction : public CodeGenTypeCache { /// expression and compare the result against zero, returning an Int1Ty value. llvm::Value *EvaluateExprAsBool(const Expr *E); + /// Retrieve the implicit cast expression of the rhs in a binary operator + /// expression by passing pointers to Value and QualType + /// This is used for implicit bitfield conversion checks, which + /// must compare with the value before potential truncation. + llvm::Value *EmitWithOriginalRHSBitfieldAssignment(const BinaryOperator *E, + llvm::Value **Previous, + QualType *SrcType); + + /// Emit a check that an [implicit] conversion of a bitfield. It is not UB, + /// so we use the value after conversion. + void EmitBitfieldConversionCheck(llvm::Value *Src, QualType SrcType, + llvm::Value *Dst, QualType DstType, + const CGBitFieldInfo &Info, + SourceLocation Loc); + /// EmitIgnoredExpr - Emit an expression in a context which ignores the result. void EmitIgnoredExpr(const Expr *E); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9a352247c3f27..5c4feb4994431 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2195,6 +2195,14 @@ void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, llvm::AttributeList PAL; ConstructAttributeList(F->getName(), Info, GD, PAL, CallingConv, /*AttrOnCallSite=*/false, IsThunk); + if (CallingConv == llvm::CallingConv::X86_VectorCall && + getTarget().getTriple().isWindowsArm64EC()) { + SourceLocation Loc; + if (const Decl *D = GD.getDecl()) + Loc = D->getLocation(); + + Error(Loc, "__vectorcall calling convention is not currently supported"); + } F->setAttributes(PAL); F->setCallingConv(static_cast(CallingConv)); } diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp index a1e14c5f0a8c7..da689ee6a13d7 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.cpp +++ b/clang/lib/CodeGen/CodeGenTBAA.cpp @@ -22,6 +22,7 @@ #include "clang/AST/Mangle.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" @@ -97,8 +98,6 @@ static bool TypeHasMayAlias(QualType QTy) { /// Check if the given type is a valid base type to be used in access tags. static bool isValidBaseType(QualType QTy) { - if (QTy->isReferenceType()) - return false; if (const RecordType *TTy = QTy->getAs()) { const RecordDecl *RD = TTy->getDecl()->getDefinition(); // Incomplete types are not valid base access types. @@ -242,9 +241,10 @@ llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { // aggregate will result into the may-alias access descriptor, meaning all // subsequent accesses to direct and indirect members of that aggregate will // be considered may-alias too. - // TODO: Combine getTypeInfo() and getBaseTypeInfo() into a single function. + // TODO: Combine getTypeInfo() and getValidBaseTypeInfo() into a single + // function. if (isValidBaseType(QTy)) - return getBaseTypeInfo(QTy); + return getValidBaseTypeInfo(QTy); const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); if (llvm::MDNode *N = MetadataCache[Ty]) @@ -319,7 +319,13 @@ CodeGenTBAA::CollectFields(uint64_t BaseOffset, // base type. if ((*i)->isBitField()) { const CGBitFieldInfo &Info = CGRL.getBitFieldInfo(*i); - if (Info.Offset != 0) + // For big endian targets the first bitfield in the consecutive run is + // at the most-significant end; see CGRecordLowering::setBitFieldInfo + // for more information. + bool IsBE = Context.getTargetInfo().isBigEndian(); + bool IsFirst = IsBE ? Info.StorageSize - (Info.Offset + Info.Size) == 0 + : Info.Offset == 0; + if (!IsFirst) continue; unsigned CurrentBitFieldSize = Info.StorageSize; uint64_t Size = @@ -387,7 +393,7 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { if (BaseRD->isEmpty()) continue; llvm::MDNode *TypeNode = isValidBaseType(BaseQTy) - ? getBaseTypeInfo(BaseQTy) + ? getValidBaseTypeInfo(BaseQTy) : getTypeInfo(BaseQTy); if (!TypeNode) return nullptr; @@ -411,8 +417,9 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { if (Field->isZeroSize(Context) || Field->isUnnamedBitfield()) continue; QualType FieldQTy = Field->getType(); - llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) ? - getBaseTypeInfo(FieldQTy) : getTypeInfo(FieldQTy); + llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) + ? getValidBaseTypeInfo(FieldQTy) + : getTypeInfo(FieldQTy); if (!TypeNode) return nullptr; @@ -449,9 +456,8 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { return nullptr; } -llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { - if (!isValidBaseType(QTy)) - return nullptr; +llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) { + assert(isValidBaseType(QTy) && "Must be a valid base type"); const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); @@ -470,6 +476,10 @@ llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { return TypeNode; } +llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { + return isValidBaseType(QTy) ? getValidBaseTypeInfo(QTy) : nullptr; +} + llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { assert(!Info.isIncomplete() && "Access to an object of an incomplete type!"); diff --git a/clang/lib/CodeGen/CodeGenTBAA.h b/clang/lib/CodeGen/CodeGenTBAA.h index aa6da2731a416..5d9ecec3ff0fe 100644 --- a/clang/lib/CodeGen/CodeGenTBAA.h +++ b/clang/lib/CodeGen/CodeGenTBAA.h @@ -168,6 +168,10 @@ class CodeGenTBAA { /// used to describe accesses to objects of the given base type. llvm::MDNode *getBaseTypeInfoHelper(const Type *Ty); + /// getValidBaseTypeInfo - Return metadata that describes the given base + /// access type. The type must be suitable. + llvm::MDNode *getValidBaseTypeInfo(QualType QTy); + public: CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes, llvm::Module &M, const CodeGenOptions &CGO, const LangOptions &Features, @@ -190,8 +194,9 @@ class CodeGenTBAA { /// the given type. llvm::MDNode *getTBAAStructInfo(QualType QTy); - /// getBaseTypeInfo - Get metadata that describes the given base access type. - /// Return null if the type is not suitable for use in TBAA access tags. + /// getBaseTypeInfo - Get metadata that describes the given base access + /// type. Return null if the type is not suitable for use in TBAA access + /// tags. llvm::MDNode *getBaseTypeInfo(QualType QTy); /// getAccessTagInfo - Get TBAA tag for a given memory access. diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 1f7181f373e52..abad6920c7872 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -2146,8 +2146,11 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, postMerge(Size, Lo, Hi); return; } + + bool IsInMemory = + Offset % getContext().getTypeAlign(i->getType().getCanonicalType()); // Note, skip this test for bit-fields, see below. - if (!BitField && Offset % getContext().getTypeAlign(i->getType())) { + if (!BitField && IsInMemory) { Lo = Memory; postMerge(Size, Lo, Hi); return; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ea9f9146739fe..8206c609e5805 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2484,6 +2484,12 @@ void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const { // Print out the install directory. OS << "InstalledDir: " << Dir << '\n'; + // Print the build config if it's non-default. + // Intended to help LLVM developers understand the configs of compilers + // they're investigating. + if (!llvm::cl::getCompilerBuildConfig().empty()) + llvm::cl::printBuildConfig(OS); + // If configuration files were used, print their paths. for (auto ConfigFile : ConfigFiles) OS << "Configuration file: " << ConfigFile << '\n'; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index f3f957c99fb4d..5948bc34d61d1 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -874,7 +874,13 @@ ToolChain::getTargetSubDirPath(StringRef BaseDir) const { std::optional ToolChain::getRuntimePath() const { SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "lib"); - return getTargetSubDirPath(P); + if (auto Ret = getTargetSubDirPath(P)) + return Ret; + // Darwin does not use per-target runtime directory. + if (Triple.isOSDarwin()) + return {}; + llvm::sys::path::append(P, Triple.str()); + return std::string(P); } std::optional ToolChain::getStdlibPath() const { diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 7a62b0f9aec41..3f10888596a29 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -17,6 +17,8 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Path.h" +#include + using AIX = clang::driver::toolchains::AIX; using namespace clang::driver; using namespace clang::driver::tools; diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 3e6e29584df3a..2cd2b35ee51bc 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -402,9 +402,6 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, if (Args.hasArg(options::OPT_ffixed_x28)) Features.push_back("+reserve-x28"); - if (Args.hasArg(options::OPT_ffixed_x30)) - Features.push_back("+reserve-x30"); - if (Args.hasArg(options::OPT_fcall_saved_x8)) Features.push_back("+call-saved-x8"); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 2746a308b7575..a0c5dd5ed9b7b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -1153,7 +1153,10 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } for (Arg *A : Args) { - DAL->append(A); + // Make sure flags are not duplicated. + if (!llvm::is_contained(*DAL, A)) { + DAL->append(A); + } } if (!BoundArch.empty()) { diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 70daa699e3a94..2c83f70eb7887 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -35,27 +35,18 @@ static void addDashXForInput(const ArgList &Args, const InputInfo &Input, void Flang::addFortranDialectOptions(const ArgList &Args, ArgStringList &CmdArgs) const { - Args.addAllArgs(CmdArgs, {options::OPT_ffixed_form, - options::OPT_ffree_form, - options::OPT_ffixed_line_length_EQ, - options::OPT_fopenmp, - options::OPT_fopenmp_version_EQ, - options::OPT_fopenacc, - options::OPT_finput_charset_EQ, - options::OPT_fimplicit_none, - options::OPT_fno_implicit_none, - options::OPT_fbackslash, - options::OPT_fno_backslash, - options::OPT_flogical_abbreviations, - options::OPT_fno_logical_abbreviations, - options::OPT_fxor_operator, - options::OPT_fno_xor_operator, - options::OPT_falternative_parameter_statement, - options::OPT_fdefault_real_8, - options::OPT_fdefault_integer_8, - options::OPT_fdefault_double_8, - options::OPT_flarge_sizes, - options::OPT_fno_automatic}); + Args.addAllArgs( + CmdArgs, {options::OPT_ffixed_form, options::OPT_ffree_form, + options::OPT_ffixed_line_length_EQ, options::OPT_fopenacc, + options::OPT_finput_charset_EQ, options::OPT_fimplicit_none, + options::OPT_fno_implicit_none, options::OPT_fbackslash, + options::OPT_fno_backslash, options::OPT_flogical_abbreviations, + options::OPT_fno_logical_abbreviations, + options::OPT_fxor_operator, options::OPT_fno_xor_operator, + options::OPT_falternative_parameter_statement, + options::OPT_fdefault_real_8, options::OPT_fdefault_integer_8, + options::OPT_fdefault_double_8, options::OPT_flarge_sizes, + options::OPT_fno_automatic}); } void Flang::addPreprocessingOptions(const ArgList &Args, @@ -763,6 +754,35 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, // Add other compile options addOtherOptions(Args, CmdArgs); + // Forward flags for OpenMP. We don't do this if the current action is an + // device offloading action other than OpenMP. + if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false) && + (JA.isDeviceOffloading(Action::OFK_None) || + JA.isDeviceOffloading(Action::OFK_OpenMP))) { + switch (D.getOpenMPRuntime(Args)) { + case Driver::OMPRT_OMP: + case Driver::OMPRT_IOMP5: + // Clang can generate useful OpenMP code for these two runtime libraries. + CmdArgs.push_back("-fopenmp"); + Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ); + + // FIXME: Clang supports a whole bunch more flags here. + break; + default: + // By default, if Clang doesn't know how to generate useful OpenMP code + // for a specific runtime library, we just don't pass the '-fopenmp' flag + // down to the actual compilation. + // FIXME: It would be better to have a mode which *only* omits IR + // generation based on the OpenMP support so that we get consistent + // semantic analysis, etc. + const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ); + D.Diag(diag::warn_drv_unsupported_openmp_library) + << A->getSpelling() << A->getValue(); + break; + } + } + // Offloading related options addOffloadOptions(C, Inputs, JA, Args, CmdArgs); diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 036f7e6a4efc1..f430d3764babe 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -404,7 +404,7 @@ bool FormatTokenLexer::tryMergeNullishCoalescingEqual() { return false; auto &NullishCoalescing = *(Tokens.end() - 2); auto &Equal = *(Tokens.end() - 1); - if (NullishCoalescing->getType() != TT_NullCoalescingOperator || + if (NullishCoalescing->isNot(TT_NullCoalescingOperator) || Equal->isNot(tok::equal)) { return false; } diff --git a/clang/lib/Format/FormatTokenSource.h b/clang/lib/Format/FormatTokenSource.h index cce19f527a923..2b93f302d3603 100644 --- a/clang/lib/Format/FormatTokenSource.h +++ b/clang/lib/Format/FormatTokenSource.h @@ -72,6 +72,15 @@ class FormatTokenSource { // getNextToken() -> a1 // getNextToken() -> a2 virtual FormatToken *insertTokens(ArrayRef Tokens) = 0; + + [[nodiscard]] FormatToken *getNextNonComment() { + FormatToken *Tok; + do { + Tok = getNextToken(); + assert(Tok); + } while (Tok->is(tok::comment)); + return Tok; + } }; class IndexedTokenSource : public FormatTokenSource { diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3e9988d509455..628f70417866c 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -825,8 +825,7 @@ class AnnotatingParser { Parent->overwriteFixedType(TT_BinaryOperator); } // An arrow after an ObjC method expression is not a lambda arrow. - if (CurrentToken->getType() == TT_ObjCMethodExpr && - CurrentToken->Next && + if (CurrentToken->is(TT_ObjCMethodExpr) && CurrentToken->Next && CurrentToken->Next->is(TT_TrailingReturnArrow)) { CurrentToken->Next->overwriteFixedType(TT_Unknown); } @@ -1563,7 +1562,7 @@ class AnnotatingParser { case tok::l_brace: if (Style.Language == FormatStyle::LK_TextProto) { FormatToken *Previous = Tok->getPreviousNonComment(); - if (Previous && Previous->getType() != TT_DictLiteral) + if (Previous && Previous->isNot(TT_DictLiteral)) Previous->setType(TT_SelectorName); } Scopes.push_back(getScopeType(*Tok)); @@ -1583,7 +1582,7 @@ class AnnotatingParser { Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { Tok->setType(TT_DictLiteral); FormatToken *Previous = Tok->getPreviousNonComment(); - if (Previous && Previous->getType() != TT_DictLiteral) + if (Previous && Previous->isNot(TT_DictLiteral)) Previous->setType(TT_SelectorName); } if (Style.isTableGen()) @@ -2355,7 +2354,8 @@ class AnnotatingParser { // Line.MightBeFunctionDecl can only be true after the parentheses of a // function declaration have been found. In this case, 'Current' is a // trailing token of this declaration and thus cannot be a name. - if (Current.is(Keywords.kw_instanceof)) { + if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && + Current.is(Keywords.kw_instanceof)) { Current.setType(TT_BinaryOperator); } else if (isStartOfName(Current) && (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { @@ -4754,8 +4754,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, // Objective-C dictionary literal -> no space before closing brace. return false; } - if (Right.getType() == TT_TrailingAnnotation && - Right.isOneOf(tok::amp, tok::ampamp) && + if (Right.is(TT_TrailingAnnotation) && Right.isOneOf(tok::amp, tok::ampamp) && Left.isOneOf(tok::kw_const, tok::kw_volatile) && (!Right.Next || Right.Next->is(tok::semi))) { // Match const and volatile ref-qualifiers without any additional diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 98ae1c8f62bbc..c1f7e2874beb2 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -365,11 +365,11 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, nextToken(); continue; } - tok::TokenKind kind = FormatTok->Tok.getKind(); - if (FormatTok->getType() == TT_MacroBlockBegin) - kind = tok::l_brace; - else if (FormatTok->getType() == TT_MacroBlockEnd) - kind = tok::r_brace; + tok::TokenKind Kind = FormatTok->Tok.getKind(); + if (FormatTok->is(TT_MacroBlockBegin)) + Kind = tok::l_brace; + else if (FormatTok->is(TT_MacroBlockEnd)) + Kind = tok::r_brace; auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] { @@ -380,7 +380,7 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, assert(StatementCount > 0 && "StatementCount overflow!"); }; - switch (kind) { + switch (Kind) { case tok::comment: nextToken(); addUnwrappedLine(); @@ -427,11 +427,7 @@ bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, break; case tok::kw_default: { unsigned StoredPosition = Tokens->getPosition(); - FormatToken *Next; - do { - Next = Tokens->getNextToken(); - assert(Next); - } while (Next->is(tok::comment)); + auto *Next = Tokens->getNextNonComment(); FormatTok = Tokens->setPosition(StoredPosition); if (Next->isNot(tok::colon)) { // default not followed by ':' is not a case label; treat it like @@ -495,20 +491,19 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { }; SmallVector LBraceStack; assert(Tok->is(tok::l_brace)); + do { - // Get next non-comment, non-preprocessor token. - FormatToken *NextTok; - do { - NextTok = Tokens->getNextToken(); - } while (NextTok->is(tok::comment)); - if (!Style.isTableGen()) { - // InTableGen, '#' is like binary operator. Not a preprocessor directive. - while (NextTok->is(tok::hash) && !Line->InMacroBody) { - NextTok = Tokens->getNextToken(); + auto *NextTok = Tokens->getNextNonComment(); + + if (!Line->InMacroBody && !Style.isTableGen()) { + // Skip PPDirective lines and comments. + while (NextTok->is(tok::hash)) { do { NextTok = Tokens->getNextToken(); - } while (NextTok->is(tok::comment) || - (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof))); + } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); + + while (NextTok->is(tok::comment)) + NextTok = Tokens->getNextToken(); } } @@ -543,16 +538,6 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { if (Style.Language == FormatStyle::LK_Proto) { ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); } else { - // Skip NextTok over preprocessor lines, otherwise we may not - // properly diagnose the block as a braced intializer - // if the comma separator appears after the pp directive. - while (NextTok->is(tok::hash)) { - ScopedMacroState MacroState(*Line, Tokens, NextTok); - do { - NextTok = Tokens->getNextToken(); - } while (NextTok->isNot(tok::eof)); - } - // Using OriginalColumn to distinguish between ObjC methods and // binary operators is a bit hacky. bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && @@ -611,6 +596,16 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { NextTok = Tokens->getNextToken(); ProbablyBracedList = NextTok->isNot(tok::l_square); } + + // Cpp macro definition body that is a nonempty braced list or block: + if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && + !FormatTok->Previous && NextTok->is(tok::eof) && + // A statement can end with only `;` (simple statement), a block + // closing brace (compound statement), or `:` (label statement). + // If PrevTok is a block opening brace, Tok ends an empty block. + !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { + ProbablyBracedList = true; + } } if (ProbablyBracedList) { Tok->setBlockKind(BK_BracedInit); @@ -640,6 +635,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { default: break; } + PrevTok = Tok; Tok = NextTok; } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); @@ -3277,8 +3273,8 @@ void UnwrappedLineParser::parseSwitch() { } // Operators that can follow a C variable. -static bool isCOperatorFollowingVar(tok::TokenKind kind) { - switch (kind) { +static bool isCOperatorFollowingVar(tok::TokenKind Kind) { + switch (Kind) { case tok::ampamp: case tok::ampequal: case tok::arrow: @@ -4706,14 +4702,13 @@ void UnwrappedLineParser::readToken(int LevelDifference) { do { FormatTok = Tokens->getNextToken(); assert(FormatTok); - while (FormatTok->getType() == TT_ConflictStart || - FormatTok->getType() == TT_ConflictEnd || - FormatTok->getType() == TT_ConflictAlternative) { - if (FormatTok->getType() == TT_ConflictStart) + while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, + TT_ConflictAlternative)) { + if (FormatTok->is(TT_ConflictStart)) conditionalCompilationStart(/*Unreachable=*/false); - else if (FormatTok->getType() == TT_ConflictAlternative) + else if (FormatTok->is(TT_ConflictAlternative)) conditionalCompilationAlternative(); - else if (FormatTok->getType() == TT_ConflictEnd) + else if (FormatTok->is(TT_ConflictEnd)) conditionalCompilationEnd(); FormatTok = Tokens->getNextToken(); FormatTok->MustBreakBefore = true; diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index d06c42d5f4c5c..4f822807dd987 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -473,8 +473,7 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, Style.ReferenceAlignment != FormatStyle::RAS_Right && Style.ReferenceAlignment != FormatStyle::RAS_Pointer; for (int Previous = i - 1; - Previous >= 0 && - Changes[Previous].Tok->getType() == TT_PointerOrReference; + Previous >= 0 && Changes[Previous].Tok->is(TT_PointerOrReference); --Previous) { assert(Changes[Previous].Tok->isPointerOrReference()); if (Changes[Previous].Tok->isNot(tok::star)) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 4cfc8db4af3f1..de830c2d2a085 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -533,10 +533,10 @@ static T extractMaskValue(T KeyPath) { #define PARSE_OPTION_WITH_MARSHALLING( \ ARGS, DIAGS, PREFIX_TYPE, SPELLING, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES, SHOULD_PARSE, \ - ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, \ - NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ - if ((VISIBILITY)&options::CC1Option) { \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ + SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \ + IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ + if ((VISIBILITY) & options::CC1Option) { \ KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE); \ if (IMPLIED_CHECK) \ KEYPATH = MERGER(KEYPATH, IMPLIED_VALUE); \ @@ -550,10 +550,10 @@ static T extractMaskValue(T KeyPath) { // with lifetime extension of the reference. #define GENERATE_OPTION_WITH_MARSHALLING( \ CONSUMER, PREFIX_TYPE, SPELLING, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES, SHOULD_PARSE, ALWAYS_EMIT, \ - KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, \ - DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ - if ((VISIBILITY)&options::CC1Option) { \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ + SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \ + IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ + if ((VISIBILITY) & options::CC1Option) { \ [&](const auto &Extracted) { \ if (ALWAYS_EMIT || \ (Extracted != \ diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 0bc26b694cfc8..642b14d8b09d9 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -69,10 +69,7 @@ void InitOnlyAction::ExecuteAction() { // Basically PreprocessOnlyAction::ExecuteAction. void ReadPCHAndPreprocessAction::ExecuteAction() { - CompilerInstance &CI = getCompilerInstance(); - AdjustCI(CI); - - Preprocessor &PP = CI.getPreprocessor(); + Preprocessor &PP = getCompilerInstance().getPreprocessor(); // Ignore unknown pragmas. PP.IgnorePragmas(); @@ -453,6 +450,8 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback { return "BuildingBuiltinDumpStructCall"; case CodeSynthesisContext::BuildingDeductionGuides: return "BuildingDeductionGuides"; + case CodeSynthesisContext::TypeAliasTemplateInstantiation: + return "TypeAliasTemplateInstantiation"; } return ""; } @@ -1191,8 +1190,6 @@ void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() { void GetDependenciesByModuleNameAction::ExecuteAction() { CompilerInstance &CI = getCompilerInstance(); - AdjustCI(CI); - Preprocessor &PP = CI.getPreprocessor(); SourceManager &SM = PP.getSourceManager(); FileID MainFileID = SM.getMainFileID(); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index f8b4154ea63db..207077139aa30 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -753,10 +753,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, if (LangOpts.CPlusPlus20) { Builder.defineMacro("__cpp_aggregate_paren_init", "201902L"); - // P0848 is implemented, but we're still waiting for other concepts - // issues to be addressed before bumping __cpp_concepts up to 202002L. - // Refer to the discussion of this at https://reviews.llvm.org/D128619. - Builder.defineMacro("__cpp_concepts", "201907L"); + Builder.defineMacro("__cpp_concepts", "202002"); Builder.defineMacro("__cpp_conditional_explicit", "201806L"); Builder.defineMacro("__cpp_consteval", "202211L"); Builder.defineMacro("__cpp_constexpr_dynamic_alloc", "201907L"); diff --git a/clang/lib/Headers/__stddef_unreachable.h b/clang/lib/Headers/__stddef_unreachable.h index 518580c92d3f5..61df43e9732f8 100644 --- a/clang/lib/Headers/__stddef_unreachable.h +++ b/clang/lib/Headers/__stddef_unreachable.h @@ -7,6 +7,8 @@ *===-----------------------------------------------------------------------=== */ +#ifndef __cplusplus + /* * When -fbuiltin-headers-in-system-modules is set this is a non-modular header * and needs to behave as if it was textual. @@ -15,3 +17,5 @@ (__has_feature(modules) && !__building_module(_Builtin_stddef)) #define unreachable() __builtin_unreachable() #endif + +#endif diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 9fb6204f90c9a..06409c6fc7741 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -100,6 +100,118 @@ double3 abs(double3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_abs) double4 abs(double4); +//===----------------------------------------------------------------------===// +// all builtins +//===----------------------------------------------------------------------===// + +/// \fn bool all(T x) +/// \brief Returns True if all components of the \a x parameter are non-zero; +/// otherwise, false. \param x The input value. + +#ifdef __HLSL_ENABLE_16_BIT +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int16_t4); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint16_t); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint16_t2); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint16_t3); +_HLSL_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint16_t4); +#endif + +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(half); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(half2); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(half3); +_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(half4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(bool); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(bool2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(bool3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(bool4); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(float); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(float2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(float3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(float4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(int64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint64_t); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint64_t2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint64_t3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(uint64_t4); + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(double); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(double2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(double3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_all) +bool all(double4); + //===----------------------------------------------------------------------===// // any builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index fd27955fbe002..7eb6dceaabfae 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -18,7 +18,7 @@ #include /* First include the standard intrinsics. */ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__)) #include #endif @@ -26,7 +26,7 @@ #include #endif -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__arm64ec__) #include #endif @@ -166,7 +166,7 @@ unsigned __int32 xbegin(void); void _xend(void); /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ -#ifdef __x86_64__ +#if defined(__x86_64__) && !defined(__arm64ec__) void __addgsbyte(unsigned long, unsigned char); void __addgsdword(unsigned long, unsigned long); void __addgsqword(unsigned long, unsigned __int64); @@ -236,7 +236,8 @@ __int64 _mul128(__int64, __int64, __int64 *); /*----------------------------------------------------------------------------*\ |* movs, stos \*----------------------------------------------------------------------------*/ -#if defined(__i386__) || defined(__x86_64__) + +#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__)) static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) { @@ -305,7 +306,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosw(unsigned short *__dst, : "memory"); } #endif -#ifdef __x86_64__ +#if defined(__x86_64__) && !defined(__arm64ec__) static __inline__ void __DEFAULT_FN_ATTRS __movsq( unsigned long long *__dst, unsigned long long const *__src, size_t __n) { __asm__ __volatile__("rep movsq" @@ -324,7 +325,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst, /*----------------------------------------------------------------------------*\ |* Misc \*----------------------------------------------------------------------------*/ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__)) static __inline__ void __DEFAULT_FN_ATTRS __halt(void) { __asm__ volatile("hlt"); } @@ -339,7 +340,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { /*----------------------------------------------------------------------------*\ |* MS AArch64 specific \*----------------------------------------------------------------------------*/ -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__arm64ec__) unsigned __int64 __getReg(int); long _InterlockedAdd(long volatile *Addend, long Value); __int64 _InterlockedAdd64(__int64 volatile *Addend, __int64 Value); @@ -383,7 +384,7 @@ void __cdecl __prefetch(void *); /*----------------------------------------------------------------------------*\ |* Privileged intrinsics \*----------------------------------------------------------------------------*/ -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__)) static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS __readmsr(unsigned long __register) { // Loads the contents of a 64-bit model specific register (MSR) specified in @@ -397,7 +398,6 @@ __readmsr(unsigned long __register) { __asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register)); return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax; } -#endif static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) { unsigned __LPTRINT_TYPE__ __cr3_val; @@ -413,6 +413,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __writecr3(unsigned __INTPTR_TYPE__ __cr3_val) { __asm__ ("mov {%0, %%cr3|cr3, %0}" : : "r"(__cr3_val) : "memory"); } +#endif #ifdef __cplusplus } diff --git a/clang/lib/Headers/intrin0.h b/clang/lib/Headers/intrin0.h index 31f362ec84d5c..866c8896617d2 100644 --- a/clang/lib/Headers/intrin0.h +++ b/clang/lib/Headers/intrin0.h @@ -15,7 +15,7 @@ #ifndef __INTRIN0_H #define __INTRIN0_H -#ifdef __x86_64__ +#if defined(__x86_64__) && !defined(__arm64ec__) #include #endif @@ -27,7 +27,7 @@ unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); void _ReadWriteBarrier(void); -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(__arm64ec__) unsigned int _CountLeadingZeros(unsigned long); unsigned int _CountLeadingZeros64(unsigned _int64); unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination, @@ -44,7 +44,7 @@ unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination, __int64 *_ComparandResult); #endif -#ifdef __x86_64__ +#ifdef __x86_64__ && !defined(__arm64ec__) unsigned __int64 _umul128(unsigned __int64, unsigned __int64, unsigned __int64 *); unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, @@ -55,7 +55,7 @@ unsigned __int64 __shiftright128(unsigned __int64 _LowPart, unsigned char _Shift); #endif -#if defined(__x86_64__) || defined(__i386__) +#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__)) void _mm_pause(void); #endif @@ -83,7 +83,7 @@ __int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask); __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask); #endif -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__) /*----------------------------------------------------------------------------*\ |* Interlocked Exchange Add \*----------------------------------------------------------------------------*/ diff --git a/clang/lib/InstallAPI/CMakeLists.txt b/clang/lib/InstallAPI/CMakeLists.txt index e0bc8d969ecb3..b36493942300b 100644 --- a/clang/lib/InstallAPI/CMakeLists.txt +++ b/clang/lib/InstallAPI/CMakeLists.txt @@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_library(clangInstallAPI + DiagnosticBuilderWrappers.cpp DylibVerifier.cpp FileList.cpp Frontend.cpp diff --git a/clang/lib/InstallAPI/DiagnosticBuilderWrappers.cpp b/clang/lib/InstallAPI/DiagnosticBuilderWrappers.cpp new file mode 100644 index 0000000000000..cc252d51e3b67 --- /dev/null +++ b/clang/lib/InstallAPI/DiagnosticBuilderWrappers.cpp @@ -0,0 +1,110 @@ +//===- DiagnosticBuilderWrappers.cpp ----------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DiagnosticBuilderWrappers.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TextAPI/Platform.h" + +using clang::DiagnosticBuilder; + +namespace llvm { +namespace MachO { +const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, + const Architecture &Arch) { + DB.AddString(getArchitectureName(Arch)); + return DB; +} + +const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, + const ArchitectureSet &ArchSet) { + DB.AddString(std::string(ArchSet)); + return DB; +} + +const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, + const PlatformType &Platform) { + DB.AddString(getPlatformName(Platform)); + return DB; +} + +const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, + const PlatformVersionSet &Platforms) { + std::string PlatformAsString; + raw_string_ostream Stream(PlatformAsString); + + Stream << "[ "; + llvm::interleaveComma( + Platforms, Stream, + [&Stream](const std::pair &PV) { + Stream << getPlatformName(PV.first); + if (!PV.second.empty()) + Stream << PV.second.getAsString(); + }); + Stream << " ]"; + DB.AddString(Stream.str()); + return DB; +} + +const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, + const FileType &Type) { + switch (Type) { + case FileType::MachO_Bundle: + DB.AddString("mach-o bundle"); + return DB; + case FileType::MachO_DynamicLibrary: + DB.AddString("mach-o dynamic library"); + return DB; + case FileType::MachO_DynamicLibrary_Stub: + DB.AddString("mach-o dynamic library stub"); + return DB; + case FileType::TBD_V1: + DB.AddString("tbd-v1"); + return DB; + case FileType::TBD_V2: + DB.AddString("tbd-v2"); + return DB; + case FileType::TBD_V3: + DB.AddString("tbd-v3"); + return DB; + case FileType::TBD_V4: + DB.AddString("tbd-v4"); + return DB; + case FileType::TBD_V5: + DB.AddString("tbd-v5"); + return DB; + case FileType::Invalid: + case FileType::All: + break; + } + llvm_unreachable("Unexpected file type for diagnostics."); +} + +const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB, + const PackedVersion &Version) { + std::string VersionString; + raw_string_ostream OS(VersionString); + OS << Version; + DB.AddString(OS.str()); + return DB; +} + +const clang::DiagnosticBuilder & +operator<<(const clang::DiagnosticBuilder &DB, + const StringMapEntry &LibAttr) { + std::string IFAsString; + raw_string_ostream OS(IFAsString); + + OS << LibAttr.getKey() << " [ " << LibAttr.getValue() << " ]"; + DB.AddString(OS.str()); + return DB; +} + +} // namespace MachO +} // namespace llvm diff --git a/clang/lib/InstallAPI/DiagnosticBuilderWrappers.h b/clang/lib/InstallAPI/DiagnosticBuilderWrappers.h new file mode 100644 index 0000000000000..48cfefbf65e6b --- /dev/null +++ b/clang/lib/InstallAPI/DiagnosticBuilderWrappers.h @@ -0,0 +1,49 @@ +//===- DiagnosticBuilderWrappers.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Diagnostic wrappers for TextAPI types for error reporting. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_INSTALLAPI_DIAGNOSTICBUILDER_WRAPPER_H +#define LLVM_CLANG_INSTALLAPI_DIAGNOSTICBUILDER_WRAPPER_H + +#include "clang/Basic/Diagnostic.h" +#include "llvm/TextAPI/Architecture.h" +#include "llvm/TextAPI/ArchitectureSet.h" +#include "llvm/TextAPI/InterfaceFile.h" +#include "llvm/TextAPI/Platform.h" + +namespace llvm { +namespace MachO { + +const clang::DiagnosticBuilder &operator<<(const clang::DiagnosticBuilder &DB, + const PlatformType &Platform); + +const clang::DiagnosticBuilder &operator<<(const clang::DiagnosticBuilder &DB, + const PlatformVersionSet &Platforms); + +const clang::DiagnosticBuilder &operator<<(const clang::DiagnosticBuilder &DB, + const Architecture &Arch); + +const clang::DiagnosticBuilder &operator<<(const clang::DiagnosticBuilder &DB, + const ArchitectureSet &ArchSet); + +const clang::DiagnosticBuilder &operator<<(const clang::DiagnosticBuilder &DB, + const FileType &Type); + +const clang::DiagnosticBuilder &operator<<(const clang::DiagnosticBuilder &DB, + const PackedVersion &Version); + +const clang::DiagnosticBuilder & +operator<<(const clang::DiagnosticBuilder &DB, + const StringMapEntry &LibAttr); + +} // namespace MachO +} // namespace llvm +#endif // LLVM_CLANG_INSTALLAPI_DIAGNOSTICBUILDER_WRAPPER_H diff --git a/clang/lib/InstallAPI/DylibVerifier.cpp b/clang/lib/InstallAPI/DylibVerifier.cpp index c0eda1d81b9b9..4fa2d4e9292c7 100644 --- a/clang/lib/InstallAPI/DylibVerifier.cpp +++ b/clang/lib/InstallAPI/DylibVerifier.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/InstallAPI/DylibVerifier.h" +#include "DiagnosticBuilderWrappers.h" #include "clang/InstallAPI/FrontendRecords.h" #include "clang/InstallAPI/InstallAPIDiagnostic.h" #include "llvm/Demangle/Demangle.h" @@ -178,6 +179,22 @@ bool DylibVerifier::shouldIgnoreObsolete(const Record *R, SymbolContext &SymCtx, return SymCtx.FA->Avail.isObsoleted(); } +bool DylibVerifier::shouldIgnoreReexport(const Record *R, + SymbolContext &SymCtx) const { + if (Reexports.empty()) + return false; + + for (const InterfaceFile &Lib : Reexports) { + if (!Lib.hasTarget(Ctx.Target)) + continue; + if (auto Sym = + Lib.getSymbol(SymCtx.Kind, SymCtx.SymbolName, SymCtx.ObjCIFKind)) + if ((*Sym)->hasTarget(Ctx.Target)) + return true; + } + return false; +} + bool DylibVerifier::compareObjCInterfaceSymbols(const Record *R, SymbolContext &SymCtx, const ObjCInterfaceRecord *DR) { @@ -197,16 +214,16 @@ bool DylibVerifier::compareObjCInterfaceSymbols(const Record *R, StringRef SymName, bool PrintAsWarning = false) { if (SymLinkage == RecordLinkage::Unknown) Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - PrintAsWarning ? diag::warn_library_missing_symbol - : diag::err_library_missing_symbol) + Ctx.Diag->Report(SymCtx.FA->Loc, PrintAsWarning + ? diag::warn_library_missing_symbol + : diag::err_library_missing_symbol) << SymName; }); else Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - PrintAsWarning ? diag::warn_library_hidden_symbol - : diag::err_library_hidden_symbol) + Ctx.Diag->Report(SymCtx.FA->Loc, PrintAsWarning + ? diag::warn_library_hidden_symbol + : diag::err_library_hidden_symbol) << SymName; }); }; @@ -253,16 +270,14 @@ DylibVerifier::Result DylibVerifier::compareVisibility(const Record *R, if (R->isExported()) { if (!DR) { Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - diag::err_library_missing_symbol) + Ctx.Diag->Report(SymCtx.FA->Loc, diag::err_library_missing_symbol) << getAnnotatedName(R, SymCtx); }); return Result::Invalid; } if (DR->isInternal()) { Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - diag::err_library_hidden_symbol) + Ctx.Diag->Report(SymCtx.FA->Loc, diag::err_library_hidden_symbol) << getAnnotatedName(R, SymCtx); }); return Result::Invalid; @@ -289,8 +304,7 @@ DylibVerifier::Result DylibVerifier::compareVisibility(const Record *R, Outcome = Result::Invalid; } Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), ID) - << getAnnotatedName(R, SymCtx); + Ctx.Diag->Report(SymCtx.FA->Loc, ID) << getAnnotatedName(R, SymCtx); }); return Outcome; } @@ -312,15 +326,13 @@ DylibVerifier::Result DylibVerifier::compareAvailability(const Record *R, switch (Mode) { case VerificationMode::ErrorsAndWarnings: Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - diag::warn_header_availability_mismatch) + Ctx.Diag->Report(SymCtx.FA->Loc, diag::warn_header_availability_mismatch) << getAnnotatedName(R, SymCtx) << IsDeclAvailable << IsDeclAvailable; }); return Result::Ignore; case VerificationMode::Pedantic: Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - diag::err_header_availability_mismatch) + Ctx.Diag->Report(SymCtx.FA->Loc, diag::err_header_availability_mismatch) << getAnnotatedName(R, SymCtx) << IsDeclAvailable << IsDeclAvailable; }); return Result::Invalid; @@ -336,16 +348,14 @@ bool DylibVerifier::compareSymbolFlags(const Record *R, SymbolContext &SymCtx, const Record *DR) { if (DR->isThreadLocalValue() && !R->isThreadLocalValue()) { Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - diag::err_dylib_symbol_flags_mismatch) + Ctx.Diag->Report(SymCtx.FA->Loc, diag::err_dylib_symbol_flags_mismatch) << getAnnotatedName(DR, SymCtx) << DR->isThreadLocalValue(); }); return false; } if (!DR->isThreadLocalValue() && R->isThreadLocalValue()) { Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - diag::err_header_symbol_flags_mismatch) + Ctx.Diag->Report(SymCtx.FA->Loc, diag::err_header_symbol_flags_mismatch) << getAnnotatedName(R, SymCtx) << R->isThreadLocalValue(); }); return false; @@ -353,16 +363,14 @@ bool DylibVerifier::compareSymbolFlags(const Record *R, SymbolContext &SymCtx, if (DR->isWeakDefined() && !R->isWeakDefined()) { Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - diag::err_dylib_symbol_flags_mismatch) + Ctx.Diag->Report(SymCtx.FA->Loc, diag::err_dylib_symbol_flags_mismatch) << getAnnotatedName(DR, SymCtx) << R->isWeakDefined(); }); return false; } if (!DR->isWeakDefined() && R->isWeakDefined()) { Ctx.emitDiag([&]() { - Ctx.Diag->Report(SymCtx.FA->D->getLocation(), - diag::err_header_symbol_flags_mismatch) + Ctx.Diag->Report(SymCtx.FA->Loc, diag::err_header_symbol_flags_mismatch) << getAnnotatedName(R, SymCtx) << R->isWeakDefined(); }); return false; @@ -383,6 +391,11 @@ DylibVerifier::Result DylibVerifier::verifyImpl(Record *R, return Ctx.FrontendState; } + if (shouldIgnoreReexport(R, SymCtx)) { + updateState(Result::Ignore); + return Ctx.FrontendState; + } + Record *DR = findRecordFromSlice(Ctx.DylibSlice, SymCtx.SymbolName, SymCtx.Kind); if (DR) @@ -465,6 +478,14 @@ void DylibVerifier::setTarget(const Target &T) { assignSlice(T); } +void DylibVerifier::setSourceManager( + IntrusiveRefCntPtr SourceMgr) { + if (!Ctx.Diag) + return; + SourceManagers.push_back(std::move(SourceMgr)); + Ctx.Diag->setSourceManager(SourceManagers.back().get()); +} + DylibVerifier::Result DylibVerifier::verify(ObjCIVarRecord *R, const FrontendAttrs *FA, const StringRef SuperClass) { @@ -702,5 +723,179 @@ DylibVerifier::Result DylibVerifier::verifyRemainingSymbols() { return getState(); } +bool DylibVerifier::verifyBinaryAttrs(const ArrayRef ProvidedTargets, + const BinaryAttrs &ProvidedBA, + const LibAttrs &ProvidedReexports, + const LibAttrs &ProvidedClients, + const LibAttrs &ProvidedRPaths, + const FileType &FT) { + assert(!Dylib.empty() && "Need dylib to verify."); + + // Pickup any load commands that can differ per slice to compare. + TargetList DylibTargets; + LibAttrs DylibReexports; + LibAttrs DylibClients; + LibAttrs DylibRPaths; + for (const std::shared_ptr &RS : Dylib) { + DylibTargets.push_back(RS->getTarget()); + const BinaryAttrs &BinInfo = RS->getBinaryAttrs(); + for (const StringRef LibName : BinInfo.RexportedLibraries) + DylibReexports[LibName].set(DylibTargets.back().Arch); + for (const StringRef LibName : BinInfo.AllowableClients) + DylibClients[LibName].set(DylibTargets.back().Arch); + // Compare attributes that are only representable in >= TBD_V5. + if (FT >= FileType::TBD_V5) + for (const StringRef Name : BinInfo.RPaths) + DylibRPaths[Name].set(DylibTargets.back().Arch); + } + + // Check targets first. + ArchitectureSet ProvidedArchs = mapToArchitectureSet(ProvidedTargets); + ArchitectureSet DylibArchs = mapToArchitectureSet(DylibTargets); + if (ProvidedArchs != DylibArchs) { + Ctx.Diag->Report(diag::err_architecture_mismatch) + << ProvidedArchs << DylibArchs; + return false; + } + auto ProvidedPlatforms = mapToPlatformVersionSet(ProvidedTargets); + auto DylibPlatforms = mapToPlatformVersionSet(DylibTargets); + if (ProvidedPlatforms != DylibPlatforms) { + const bool DiffMinOS = + mapToPlatformSet(ProvidedTargets) == mapToPlatformSet(DylibTargets); + if (DiffMinOS) + Ctx.Diag->Report(diag::warn_platform_mismatch) + << ProvidedPlatforms << DylibPlatforms; + else { + Ctx.Diag->Report(diag::err_platform_mismatch) + << ProvidedPlatforms << DylibPlatforms; + return false; + } + } + + // Because InstallAPI requires certain attributes to match across architecture + // slices, take the first one to compare those with. + const BinaryAttrs &DylibBA = (*Dylib.begin())->getBinaryAttrs(); + + if (ProvidedBA.InstallName != DylibBA.InstallName) { + Ctx.Diag->Report(diag::err_install_name_mismatch) + << ProvidedBA.InstallName << DylibBA.InstallName; + return false; + } + + if (ProvidedBA.CurrentVersion != DylibBA.CurrentVersion) { + Ctx.Diag->Report(diag::err_current_version_mismatch) + << ProvidedBA.CurrentVersion << DylibBA.CurrentVersion; + return false; + } + + if (ProvidedBA.CompatVersion != DylibBA.CompatVersion) { + Ctx.Diag->Report(diag::err_compatibility_version_mismatch) + << ProvidedBA.CompatVersion << DylibBA.CompatVersion; + return false; + } + + if (ProvidedBA.AppExtensionSafe != DylibBA.AppExtensionSafe) { + Ctx.Diag->Report(diag::err_appextension_safe_mismatch) + << (ProvidedBA.AppExtensionSafe ? "true" : "false") + << (DylibBA.AppExtensionSafe ? "true" : "false"); + return false; + } + + if (!DylibBA.TwoLevelNamespace) { + Ctx.Diag->Report(diag::err_no_twolevel_namespace); + return false; + } + + if (ProvidedBA.OSLibNotForSharedCache != DylibBA.OSLibNotForSharedCache) { + Ctx.Diag->Report(diag::err_shared_cache_eligiblity_mismatch) + << (ProvidedBA.OSLibNotForSharedCache ? "true" : "false") + << (DylibBA.OSLibNotForSharedCache ? "true" : "false"); + return false; + } + + if (ProvidedBA.ParentUmbrella.empty() && !DylibBA.ParentUmbrella.empty()) { + Ctx.Diag->Report(diag::err_parent_umbrella_missing) + << "installAPI option" << DylibBA.ParentUmbrella; + return false; + } + + if (!ProvidedBA.ParentUmbrella.empty() && DylibBA.ParentUmbrella.empty()) { + Ctx.Diag->Report(diag::err_parent_umbrella_missing) + << "binary file" << ProvidedBA.ParentUmbrella; + return false; + } + + if ((!ProvidedBA.ParentUmbrella.empty()) && + (ProvidedBA.ParentUmbrella != DylibBA.ParentUmbrella)) { + Ctx.Diag->Report(diag::err_parent_umbrella_mismatch) + << ProvidedBA.ParentUmbrella << DylibBA.ParentUmbrella; + return false; + } + + auto CompareLibraries = [&](const LibAttrs &Provided, const LibAttrs &Dylib, + unsigned DiagID_missing, unsigned DiagID_mismatch, + bool Fatal = true) { + if (Provided == Dylib) + return true; + + for (const llvm::StringMapEntry &PAttr : Provided) { + const auto DAttrIt = Dylib.find(PAttr.getKey()); + if (DAttrIt == Dylib.end()) { + Ctx.Diag->Report(DiagID_missing) << "binary file" << PAttr; + if (Fatal) + return false; + } + + if (PAttr.getValue() != DAttrIt->getValue()) { + Ctx.Diag->Report(DiagID_mismatch) << PAttr << *DAttrIt; + if (Fatal) + return false; + } + } + + for (const llvm::StringMapEntry &DAttr : Dylib) { + const auto PAttrIt = Provided.find(DAttr.getKey()); + if (PAttrIt == Provided.end()) { + Ctx.Diag->Report(DiagID_missing) << "installAPI option" << DAttr; + if (!Fatal) + continue; + return false; + } + + if (PAttrIt->getValue() != DAttr.getValue()) { + if (Fatal) + llvm_unreachable("this case was already covered above."); + } + } + return true; + }; + + if (!CompareLibraries(ProvidedReexports, DylibReexports, + diag::err_reexported_libraries_missing, + diag::err_reexported_libraries_mismatch)) + return false; + + if (!CompareLibraries(ProvidedClients, DylibClients, + diag::err_allowable_clients_missing, + diag::err_allowable_clients_mismatch)) + return false; + + if (FT >= FileType::TBD_V5) { + // Ignore rpath differences if building an asan variant, since the + // compiler injects additional paths. + // FIXME: Building with sanitizers does not always change the install + // name, so this is not a foolproof solution. + if (!ProvidedBA.InstallName.ends_with("_asan")) { + if (!CompareLibraries(ProvidedRPaths, DylibRPaths, + diag::warn_rpaths_missing, + diag::warn_rpaths_mismatch, + /*Fatal=*/false)) + return true; + } + } + + return true; +} + } // namespace installapi } // namespace clang diff --git a/clang/lib/InstallAPI/Frontend.cpp b/clang/lib/InstallAPI/Frontend.cpp index e07ccb14e0b80..04d06f46d2652 100644 --- a/clang/lib/InstallAPI/Frontend.cpp +++ b/clang/lib/InstallAPI/Frontend.cpp @@ -23,7 +23,8 @@ std::pair FrontendRecordsSlice::addGlobal( GlobalRecord *GR = llvm::MachO::RecordsSlice::addGlobal(Name, Linkage, GV, Flags, Inlined); - auto Result = FrontendRecords.insert({GR, FrontendAttrs{Avail, D, Access}}); + auto Result = FrontendRecords.insert( + {GR, FrontendAttrs{Avail, D, D->getLocation(), Access}}); return {GR, &(Result.first->second)}; } @@ -39,8 +40,8 @@ FrontendRecordsSlice::addObjCInterface(StringRef Name, RecordLinkage Linkage, ObjCInterfaceRecord *ObjCR = llvm::MachO::RecordsSlice::addObjCInterface(Name, Linkage, SymType); - auto Result = - FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}}); + auto Result = FrontendRecords.insert( + {ObjCR, FrontendAttrs{Avail, D, D->getLocation(), Access}}); return {ObjCR, &(Result.first->second)}; } @@ -51,8 +52,8 @@ FrontendRecordsSlice::addObjCCategory(StringRef ClassToExtend, const Decl *D, HeaderType Access) { ObjCCategoryRecord *ObjCR = llvm::MachO::RecordsSlice::addObjCCategory(ClassToExtend, CategoryName); - auto Result = - FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}}); + auto Result = FrontendRecords.insert( + {ObjCR, FrontendAttrs{Avail, D, D->getLocation(), Access}}); return {ObjCR, &(Result.first->second)}; } @@ -67,8 +68,8 @@ std::pair FrontendRecordsSlice::addObjCIVar( Linkage = RecordLinkage::Internal; ObjCIVarRecord *ObjCR = llvm::MachO::RecordsSlice::addObjCIVar(Container, IvarName, Linkage); - auto Result = - FrontendRecords.insert({ObjCR, FrontendAttrs{Avail, D, Access}}); + auto Result = FrontendRecords.insert( + {ObjCR, FrontendAttrs{Avail, D, D->getLocation(), Access}}); return {ObjCR, &(Result.first->second)}; } @@ -162,4 +163,58 @@ std::unique_ptr createInputBuffer(InstallAPIContext &Ctx) { return llvm::MemoryBuffer::getMemBufferCopy(Contents, BufferName); } +std::string findLibrary(StringRef InstallName, FileManager &FM, + ArrayRef FrameworkSearchPaths, + ArrayRef LibrarySearchPaths, + ArrayRef SearchPaths) { + auto getLibrary = + [&](const StringRef FullPath) -> std::optional { + // Prefer TextAPI files when possible. + SmallString TextAPIFilePath = FullPath; + replace_extension(TextAPIFilePath, ".tbd"); + + if (FM.getOptionalFileRef(TextAPIFilePath)) + return std::string(TextAPIFilePath); + + if (FM.getOptionalFileRef(FullPath)) + return std::string(FullPath); + + return std::nullopt; + }; + + const StringRef Filename = sys::path::filename(InstallName); + const bool IsFramework = sys::path::parent_path(InstallName) + .ends_with((Filename + ".framework").str()); + if (IsFramework) { + for (const StringRef Path : FrameworkSearchPaths) { + SmallString FullPath(Path); + sys::path::append(FullPath, Filename + StringRef(".framework"), Filename); + if (auto LibOrNull = getLibrary(FullPath)) + return *LibOrNull; + } + } else { + // Copy Apple's linker behavior: If this is a .dylib inside a framework, do + // not search -L paths. + bool IsEmbeddedDylib = (sys::path::extension(InstallName) == ".dylib") && + InstallName.contains(".framework/"); + if (!IsEmbeddedDylib) { + for (const StringRef Path : LibrarySearchPaths) { + SmallString FullPath(Path); + sys::path::append(FullPath, Filename); + if (auto LibOrNull = getLibrary(FullPath)) + return *LibOrNull; + } + } + } + + for (const StringRef Path : SearchPaths) { + SmallString FullPath(Path); + sys::path::append(FullPath, InstallName); + if (auto LibOrNull = getLibrary(FullPath)) + return *LibOrNull; + } + + return {}; +} + } // namespace clang::installapi diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index fcc2b56df166b..7dffcf0e941e0 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -1307,6 +1307,23 @@ OptionalFileEntryRef HeaderSearch::LookupSubframeworkHeader( // File Info Management. //===----------------------------------------------------------------------===// +static void mergeHeaderFileInfoModuleBits(HeaderFileInfo &HFI, + bool isModuleHeader, + bool isTextualModuleHeader) { + assert((!isModuleHeader || !isTextualModuleHeader) && + "A header can't build with a module and be textual at the same time"); + HFI.isModuleHeader |= isModuleHeader; + if (HFI.isModuleHeader) + HFI.isTextualModuleHeader = false; + else + HFI.isTextualModuleHeader |= isTextualModuleHeader; +} + +void HeaderFileInfo::mergeModuleMembership(ModuleMap::ModuleHeaderRole Role) { + mergeHeaderFileInfoModuleBits(*this, ModuleMap::isModular(Role), + (Role & ModuleMap::TextualHeader)); +} + /// Merge the header file info provided by \p OtherHFI into the current /// header file info (\p HFI) static void mergeHeaderFileInfo(HeaderFileInfo &HFI, @@ -1315,7 +1332,8 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI, HFI.isImport |= OtherHFI.isImport; HFI.isPragmaOnce |= OtherHFI.isPragmaOnce; - HFI.isModuleHeader |= OtherHFI.isModuleHeader; + mergeHeaderFileInfoModuleBits(HFI, OtherHFI.isModuleHeader, + OtherHFI.isTextualModuleHeader); if (!HFI.ControllingMacro && !HFI.ControllingMacroID) { HFI.ControllingMacro = OtherHFI.ControllingMacro; @@ -1403,11 +1421,9 @@ bool HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const { void HeaderSearch::MarkFileModuleHeader(FileEntryRef FE, ModuleMap::ModuleHeaderRole Role, bool isCompilingModuleHeader) { - bool isModularHeader = ModuleMap::isModular(Role); - // Don't mark the file info as non-external if there's nothing to change. if (!isCompilingModuleHeader) { - if (!isModularHeader) + if ((Role & ModuleMap::ExcludedHeader)) return; auto *HFI = getExistingFileInfo(FE); if (HFI && HFI->isModuleHeader) @@ -1415,7 +1431,7 @@ void HeaderSearch::MarkFileModuleHeader(FileEntryRef FE, } auto &HFI = getFileInfo(FE); - HFI.isModuleHeader |= isModularHeader; + HFI.mergeModuleMembership(Role); HFI.isCompilingModuleHeader |= isCompilingModuleHeader; } @@ -1423,74 +1439,128 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP, FileEntryRef File, bool isImport, bool ModulesEnabled, Module *M, bool &IsFirstIncludeOfFile) { - ++NumIncluded; // Count # of attempted #includes. - + // An include file should be entered if either: + // 1. This is the first include of the file. + // 2. This file can be included multiple times, that is it's not an + // "include-once" file. + // + // Include-once is controlled by these preprocessor directives. + // + // #pragma once + // This directive is in the include file, and marks it as an include-once + // file. + // + // #import + // This directive is in the includer, and indicates that the include file + // should only be entered if this is the first include. + ++NumIncluded; IsFirstIncludeOfFile = false; - - // Get information about this file. HeaderFileInfo &FileInfo = getFileInfo(File); - // FIXME: this is a workaround for the lack of proper modules-aware support - // for #import / #pragma once - auto TryEnterImported = [&]() -> bool { - if (!ModulesEnabled) + auto MaybeReenterImportedFile = [&]() -> bool { + // Modules add a wrinkle though: what's included isn't necessarily visible. + // Consider this module. + // module Example { + // module A { header "a.h" export * } + // module B { header "b.h" export * } + // } + // b.h includes c.h. The main file includes a.h, which will trigger a module + // build of Example, and c.h will be included. However, c.h isn't visible to + // the main file. Normally this is fine, the main file can just include c.h + // if it needs it. If c.h is in a module, the include will translate into a + // module import, this function will be skipped, and everything will work as + // expected. However, if c.h is not in a module (or is `textual`), then this + // function will run. If c.h is include-once, it will not be entered from + // the main file and it will still not be visible. + + // If modules aren't enabled then there's no visibility issue. Always + // respect `#pragma once`. + if (!ModulesEnabled || FileInfo.isPragmaOnce) return false; + // Ensure FileInfo bits are up to date. ModMap.resolveHeaderDirectives(File); - // Modules with builtins are special; multiple modules use builtins as - // modular headers, example: - // - // module stddef { header "stddef.h" export * } - // - // After module map parsing, this expands to: - // - // module stddef { - // header "/path_to_builtin_dirs/stddef.h" - // textual "stddef.h" - // } + + // This brings up a subtlety of #import - it's not a very good indicator of + // include-once. Developers are often unaware of the difference between + // #include and #import, and tend to use one or the other indiscrimiately. + // In order to support #include on include-once headers that lack macro + // guards and `#pragma once` (which is the vast majority of Objective-C + // headers), if a file is ever included with #import, it's marked as + // isImport in the HeaderFileInfo and treated as include-once. This allows + // #include to work in Objective-C. + // #include + // #include + // Foundation.h has an #import of NSString.h, and so the second #include is + // skipped even though NSString.h has no `#pragma once` and no macro guard. // - // It's common that libc++ and system modules will both define such - // submodules. Make sure cached results for a builtin header won't - // prevent other builtin modules from potentially entering the builtin - // header. Note that builtins are header guarded and the decision to - // actually enter them is postponed to the controlling macros logic below. - bool TryEnterHdr = false; - if (FileInfo.isCompilingModuleHeader && FileInfo.isModuleHeader) - TryEnterHdr = ModMap.isBuiltinHeader(File); - - // Textual headers can be #imported from different modules. Since ObjC - // headers find in the wild might rely only on #import and do not contain - // controlling macros, be conservative and only try to enter textual headers - // if such macro is present. - if (!FileInfo.isModuleHeader && - FileInfo.getControllingMacro(ExternalLookup)) - TryEnterHdr = true; - return TryEnterHdr; + // However, this helpfulness causes problems with modules. If c.h is not an + // include-once file, but something included it with #import anyway (as is + // typical in Objective-C code), this include will be skipped and c.h will + // not be visible. Consider it not include-once if it is a `textual` header + // in a module. + if (FileInfo.isTextualModuleHeader) + return true; + + if (FileInfo.isCompilingModuleHeader) { + // It's safer to re-enter a file whose module is being built because its + // declarations will still be scoped to a single module. + if (FileInfo.isModuleHeader) { + // Headers marked as "builtin" are covered by the system module maps + // rather than the builtin ones. Some versions of the Darwin module fail + // to mark stdarg.h and stddef.h as textual. Attempt to re-enter these + // files while building their module to allow them to function properly. + if (ModMap.isBuiltinHeader(File)) + return true; + } else { + // Files that are excluded from their module can potentially be + // re-entered from their own module. This might cause redeclaration + // errors if another module saw this file first, but there's a + // reasonable chance that its module will build first. However if + // there's no controlling macro, then trust the #import and assume this + // really is an include-once file. + if (FileInfo.getControllingMacro(ExternalLookup)) + return true; + } + } + // If the include file has a macro guard, then it might still not be + // re-entered if the controlling macro is visibly defined. e.g. another + // header in the module being built included this file and local submodule + // visibility is not enabled. + + // It might be tempting to re-enter the include-once file if it's not + // visible in an attempt to make it visible. However this will still cause + // redeclaration errors against the known-but-not-visible declarations. The + // include file not being visible will most likely cause "undefined x" + // errors, but at least there's a slim chance of compilation succeeding. + return false; }; - // If this is a #import directive, check that we have not already imported - // this header. if (isImport) { - // If this has already been imported, don't import it again. + // As discussed above, record that this file was ever `#import`ed, and treat + // it as an include-once file from here out. FileInfo.isImport = true; - - // Has this already been #import'ed or #include'd? - if (PP.alreadyIncluded(File) && !TryEnterImported()) + if (PP.alreadyIncluded(File) && !MaybeReenterImportedFile()) return false; } else { - // Otherwise, if this is a #include of a file that was previously #import'd - // or if this is the second #include of a #pragma once file, ignore it. - if ((FileInfo.isPragmaOnce || FileInfo.isImport) && !TryEnterImported()) + // isPragmaOnce and isImport are only set after the file has been included + // at least once. If either are set then this is a repeat #include of an + // include-once file. + if (FileInfo.isPragmaOnce || + (FileInfo.isImport && !MaybeReenterImportedFile())) return false; } - // Next, check to see if the file is wrapped with #ifndef guards. If so, and - // if the macro that guards it is defined, we know the #include has no effect. - if (const IdentifierInfo *ControllingMacro - = FileInfo.getControllingMacro(ExternalLookup)) { + // As a final optimization, check for a macro guard and skip entering the file + // if the controlling macro is defined. The macro guard will effectively erase + // the file's contents, and the include would have no effect other than to + // waste time opening and reading a file. + if (const IdentifierInfo *ControllingMacro = + FileInfo.getControllingMacro(ExternalLookup)) { // If the header corresponds to a module, check whether the macro is already - // defined in that module rather than checking in the current set of visible - // modules. + // defined in that module rather than checking all visible modules. This is + // mainly to cover corner cases where the same controlling macro is used in + // different files in multiple modules. if (M ? PP.isMacroDefinedInLocalModule(ControllingMacro, M) : PP.isMacroDefined(ControllingMacro)) { ++NumMultiIncludeFileOptzn; @@ -1499,7 +1569,6 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP, } IsFirstIncludeOfFile = PP.markIncluded(File); - return true; } diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index a0cc2b516574c..3b1b6df1dbae4 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -93,10 +93,16 @@ bool Preprocessor::EnterSourceFile(FileID FID, ConstSearchDirIterator CurDir, } Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile); - if (DependencyDirectivesForFile && FID != PredefinesFileID) - if (OptionalFileEntryRef File = SourceMgr.getFileEntryRefForID(FID)) - if (auto DepDirectives = DependencyDirectivesForFile(*File)) + if (getPreprocessorOpts().DependencyDirectivesForFile && + FID != PredefinesFileID) { + if (OptionalFileEntryRef File = SourceMgr.getFileEntryRefForID(FID)) { + if (std::optional> + DepDirectives = + getPreprocessorOpts().DependencyDirectivesForFile(*File)) { TheLexer->DepDirectives = *DepDirectives; + } + } + } EnterSourceFileWithLexer(TheLexer, CurDir); return false; diff --git a/clang/lib/Parse/ParseHLSL.cpp b/clang/lib/Parse/ParseHLSL.cpp index 4fc6a2203cec3..5afc958600fa5 100644 --- a/clang/lib/Parse/ParseHLSL.cpp +++ b/clang/lib/Parse/ParseHLSL.cpp @@ -15,6 +15,7 @@ #include "clang/Parse/ParseDiagnostic.h" #include "clang/Parse/Parser.h" #include "clang/Parse/RAIIObjectsForParser.h" +#include "clang/Sema/SemaHLSL.h" using namespace clang; @@ -71,9 +72,9 @@ Decl *Parser::ParseHLSLBuffer(SourceLocation &DeclEnd) { return nullptr; } - Decl *D = Actions.ActOnStartHLSLBuffer(getCurScope(), IsCBuffer, BufferLoc, - Identifier, IdentifierLoc, - T.getOpenLocation()); + Decl *D = Actions.HLSL().ActOnStartHLSLBuffer( + getCurScope(), IsCBuffer, BufferLoc, Identifier, IdentifierLoc, + T.getOpenLocation()); while (Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { // FIXME: support attribute on constants inside cbuffer/tbuffer. @@ -87,7 +88,7 @@ Decl *Parser::ParseHLSLBuffer(SourceLocation &DeclEnd) { T.skipToEnd(); DeclEnd = T.getCloseLocation(); BufferScope.Exit(); - Actions.ActOnFinishHLSLBuffer(D, DeclEnd); + Actions.HLSL().ActOnFinishHLSLBuffer(D, DeclEnd); return nullptr; } } @@ -95,7 +96,7 @@ Decl *Parser::ParseHLSLBuffer(SourceLocation &DeclEnd) { T.consumeClose(); DeclEnd = T.getCloseLocation(); BufferScope.Exit(); - Actions.ActOnFinishHLSLBuffer(D, DeclEnd); + Actions.HLSL().ActOnFinishHLSLBuffer(D, DeclEnd); Actions.ProcessDeclAttributeList(Actions.CurScope, D, Attrs); return D; diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index 07dd2ba0106a4..f434e1542c801 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "clang/AST/OpenACCClause.h" #include "clang/Basic/OpenACCKinds.h" #include "clang/Parse/ParseDiagnostic.h" #include "clang/Parse/Parser.h" @@ -582,12 +583,26 @@ unsigned getOpenACCScopeFlags(OpenACCDirectiveKind DirKind) { } // namespace +Parser::OpenACCClauseParseResult Parser::OpenACCCanContinue() { + return {nullptr, OpenACCParseCanContinue::Can}; +} + +Parser::OpenACCClauseParseResult Parser::OpenACCCannotContinue() { + return {nullptr, OpenACCParseCanContinue::Cannot}; +} + +Parser::OpenACCClauseParseResult Parser::OpenACCSuccess(OpenACCClause *Clause) { + return {Clause, OpenACCParseCanContinue::Can}; +} + // OpenACC 3.3, section 1.7: // To simplify the specification and convey appropriate constraint information, // a pqr-list is a comma-separated list of pdr items. The one exception is a // clause-list, which is a list of one or more clauses optionally separated by // commas. -void Parser::ParseOpenACCClauseList(OpenACCDirectiveKind DirKind) { +SmallVector +Parser::ParseOpenACCClauseList(OpenACCDirectiveKind DirKind) { + SmallVector Clauses; bool FirstClause = true; while (getCurToken().isNot(tok::annot_pragma_openacc_end)) { // Comma is optional in a clause-list. @@ -595,13 +610,17 @@ void Parser::ParseOpenACCClauseList(OpenACCDirectiveKind DirKind) { ConsumeToken(); FirstClause = false; - // Recovering from a bad clause is really difficult, so we just give up on - // error. - if (ParseOpenACCClause(DirKind)) { + OpenACCClauseParseResult Result = ParseOpenACCClause(Clauses, DirKind); + if (OpenACCClause *Clause = Result.getPointer()) { + Clauses.push_back(Clause); + } else if (Result.getInt() == OpenACCParseCanContinue::Cannot) { + // Recovering from a bad clause is really difficult, so we just give up on + // error. SkipUntilEndOfDirective(*this); - return; + return Clauses; } } + return Clauses; } ExprResult Parser::ParseOpenACCIntExpr() { @@ -762,42 +781,48 @@ bool Parser::ParseOpenACCGangArgList() { // really have its owner grammar and each individual one has its own definition. // However, they all are named with a single-identifier (or auto/default!) // token, followed in some cases by either braces or parens. -bool Parser::ParseOpenACCClause(OpenACCDirectiveKind DirKind) { +Parser::OpenACCClauseParseResult +Parser::ParseOpenACCClause(ArrayRef ExistingClauses, + OpenACCDirectiveKind DirKind) { // A number of clause names are actually keywords, so accept a keyword that // can be converted to a name. if (expectIdentifierOrKeyword(*this)) - return true; + return OpenACCCannotContinue(); OpenACCClauseKind Kind = getOpenACCClauseKind(getCurToken()); - if (Kind == OpenACCClauseKind::Invalid) - return Diag(getCurToken(), diag::err_acc_invalid_clause) - << getCurToken().getIdentifierInfo(); + if (Kind == OpenACCClauseKind::Invalid) { + Diag(getCurToken(), diag::err_acc_invalid_clause) + << getCurToken().getIdentifierInfo(); + return OpenACCCannotContinue(); + } // Consume the clause name. SourceLocation ClauseLoc = ConsumeToken(); - bool Result = ParseOpenACCClauseParams(DirKind, Kind); - getActions().OpenACC().ActOnClause(Kind, ClauseLoc); - return Result; + return ParseOpenACCClauseParams(ExistingClauses, DirKind, Kind, ClauseLoc); } -bool Parser::ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, - OpenACCClauseKind Kind) { +Parser::OpenACCClauseParseResult Parser::ParseOpenACCClauseParams( + ArrayRef ExistingClauses, + OpenACCDirectiveKind DirKind, OpenACCClauseKind ClauseKind, + SourceLocation ClauseLoc) { BalancedDelimiterTracker Parens(*this, tok::l_paren, tok::annot_pragma_openacc_end); + SemaOpenACC::OpenACCParsedClause ParsedClause(DirKind, ClauseKind, ClauseLoc); - if (ClauseHasRequiredParens(DirKind, Kind)) { + if (ClauseHasRequiredParens(DirKind, ClauseKind)) { + ParsedClause.setLParenLoc(getCurToken().getLocation()); if (Parens.expectAndConsume()) { // We are missing a paren, so assume that the person just forgot the // parameter. Return 'false' so we try to continue on and parse the next // clause. SkipUntil(tok::comma, tok::r_paren, tok::annot_pragma_openacc_end, Parser::StopBeforeMatch); - return false; + return OpenACCCanContinue(); } - switch (Kind) { + switch (ClauseKind) { case OpenACCClauseKind::Default: { Token DefKindTok = getCurToken(); @@ -818,34 +843,34 @@ bool Parser::ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, // this clause list. if (CondExpr.isInvalid()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; } case OpenACCClauseKind::CopyIn: tryParseAndConsumeSpecialTokenKind( - *this, OpenACCSpecialTokenKind::ReadOnly, Kind); - if (ParseOpenACCClauseVarList(Kind)) { + *this, OpenACCSpecialTokenKind::ReadOnly, ClauseKind); + if (ParseOpenACCClauseVarList(ClauseKind)) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; case OpenACCClauseKind::Create: case OpenACCClauseKind::CopyOut: tryParseAndConsumeSpecialTokenKind(*this, OpenACCSpecialTokenKind::Zero, - Kind); - if (ParseOpenACCClauseVarList(Kind)) { + ClauseKind); + if (ParseOpenACCClauseVarList(ClauseKind)) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; case OpenACCClauseKind::Reduction: // If we're missing a clause-kind (or it is invalid), see if we can parse // the var-list anyway. ParseReductionOperator(*this); - if (ParseOpenACCClauseVarList(Kind)) { + if (ParseOpenACCClauseVarList(ClauseKind)) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; case OpenACCClauseKind::Self: @@ -868,19 +893,19 @@ bool Parser::ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, case OpenACCClauseKind::Present: case OpenACCClauseKind::Private: case OpenACCClauseKind::UseDevice: - if (ParseOpenACCClauseVarList(Kind)) { + if (ParseOpenACCClauseVarList(ClauseKind)) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; case OpenACCClauseKind::Collapse: { tryParseAndConsumeSpecialTokenKind(*this, OpenACCSpecialTokenKind::Force, - Kind); + ClauseKind); ExprResult NumLoops = getActions().CorrectDelayedTyposInExpr(ParseConstantExpression()); if (NumLoops.isInvalid()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; } @@ -888,7 +913,7 @@ bool Parser::ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, ExprResult BindArg = ParseOpenACCBindClauseArgument(); if (BindArg.isInvalid()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; } @@ -900,7 +925,7 @@ bool Parser::ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, ExprResult IntExpr = ParseOpenACCIntExpr(); if (IntExpr.isInvalid()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; } @@ -912,23 +937,28 @@ bool Parser::ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, ConsumeToken(); } else if (ParseOpenACCDeviceTypeList()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; case OpenACCClauseKind::Tile: if (ParseOpenACCSizeExprList()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; default: llvm_unreachable("Not a required parens type?"); } - return Parens.consumeClose(); - } else if (ClauseHasOptionalParens(DirKind, Kind)) { + ParsedClause.setEndLoc(getCurToken().getLocation()); + + if (Parens.consumeClose()) + return OpenACCCannotContinue(); + + } else if (ClauseHasOptionalParens(DirKind, ClauseKind)) { + ParsedClause.setLParenLoc(getCurToken().getLocation()); if (!Parens.consumeOpen()) { - switch (Kind) { + switch (ClauseKind) { case OpenACCClauseKind::Self: { assert(DirKind != OpenACCDirectiveKind::Update); ExprResult CondExpr = ParseOpenACCConditionalExpr(*this); @@ -936,21 +966,22 @@ bool Parser::ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, // this clause list. if (CondExpr.isInvalid()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; } case OpenACCClauseKind::Vector: case OpenACCClauseKind::Worker: { tryParseAndConsumeSpecialTokenKind(*this, - Kind == OpenACCClauseKind::Vector + ClauseKind == + OpenACCClauseKind::Vector ? OpenACCSpecialTokenKind::Length : OpenACCSpecialTokenKind::Num, - Kind); + ClauseKind); ExprResult IntExpr = ParseOpenACCIntExpr(); if (IntExpr.isInvalid()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; } @@ -958,29 +989,32 @@ bool Parser::ParseOpenACCClauseParams(OpenACCDirectiveKind DirKind, ExprResult AsyncArg = ParseOpenACCAsyncArgument(); if (AsyncArg.isInvalid()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; } case OpenACCClauseKind::Gang: if (ParseOpenACCGangArgList()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; case OpenACCClauseKind::Wait: if (ParseOpenACCWaitArgument()) { Parens.skipToEnd(); - return false; + return OpenACCCanContinue(); } break; default: llvm_unreachable("Not an optional parens type?"); } - Parens.consumeClose(); + ParsedClause.setEndLoc(getCurToken().getLocation()); + if (Parens.consumeClose()) + return OpenACCCannotContinue(); } } - return false; + return OpenACCSuccess( + Actions.OpenACC().ActOnClause(ExistingClauses, ParsedClause)); } /// OpenACC 3.3 section 2.16: @@ -1204,15 +1238,17 @@ Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() { Diag(Tok, diag::err_expected) << tok::l_paren; } - // Parses the list of clauses, if present. - ParseOpenACCClauseList(DirKind); + // Parses the list of clauses, if present, plus set up return value. + OpenACCDirectiveParseInfo ParseInfo{DirKind, StartLoc, SourceLocation{}, + ParseOpenACCClauseList(DirKind)}; assert(Tok.is(tok::annot_pragma_openacc_end) && "Didn't parse all OpenACC Clauses"); - SourceLocation EndLoc = ConsumeAnnotationToken(); - assert(EndLoc.isValid()); + ParseInfo.EndLoc = ConsumeAnnotationToken(); + assert(ParseInfo.EndLoc.isValid() && + "Terminating annotation token not present"); - return OpenACCDirectiveParseInfo{DirKind, StartLoc, EndLoc}; + return ParseInfo; } // Parse OpenACC directive on a declaration. @@ -1255,5 +1291,6 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() { } return getActions().OpenACC().ActOnEndStmtDirective( - DirInfo.DirKind, DirInfo.StartLoc, DirInfo.EndLoc, AssocStmt); + DirInfo.DirKind, DirInfo.StartLoc, DirInfo.EndLoc, DirInfo.Clauses, + AssocStmt); } diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 3759e32c01364..7db3c2f1e2bd7 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -38,6 +38,7 @@ add_clang_library(clangSema SemaAttr.cpp SemaAPINotes.cpp SemaAvailability.cpp + SemaBase.cpp SemaCXXScopeSpec.cpp SemaCast.cpp SemaChecking.cpp diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 066e262bb9d49..09b56fddea882 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -42,6 +42,7 @@ #include "clang/Sema/Scope.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaConsumer.h" +#include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/SemaOpenACC.h" #include "clang/Sema/TemplateDeduction.h" @@ -190,14 +191,16 @@ const uint64_t Sema::MaximumAlignment; Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, TranslationUnitKind TUKind, CodeCompleteConsumer *CodeCompleter) - : CollectStats(false), TUKind(TUKind), CurFPFeatures(pp.getLangOpts()), - LangOpts(pp.getLangOpts()), PP(pp), Context(ctxt), Consumer(consumer), - Diags(PP.getDiagnostics()), SourceMgr(PP.getSourceManager()), - APINotes(SourceMgr, LangOpts), AnalysisWarnings(*this), - ThreadSafetyDeclCache(nullptr), LateTemplateParser(nullptr), - LateTemplateParserCleanup(nullptr), OpaqueParser(nullptr), - CurContext(nullptr), ExternalSource(nullptr), CurScope(nullptr), - Ident_super(nullptr), OpenACCPtr(std::make_unique(*this)), + : SemaBase(*this), CollectStats(false), TUKind(TUKind), + CurFPFeatures(pp.getLangOpts()), LangOpts(pp.getLangOpts()), PP(pp), + Context(ctxt), Consumer(consumer), Diags(PP.getDiagnostics()), + SourceMgr(PP.getSourceManager()), APINotes(SourceMgr, LangOpts), + AnalysisWarnings(*this), ThreadSafetyDeclCache(nullptr), + LateTemplateParser(nullptr), LateTemplateParserCleanup(nullptr), + OpaqueParser(nullptr), CurContext(nullptr), ExternalSource(nullptr), + CurScope(nullptr), Ident_super(nullptr), + HLSLPtr(std::make_unique(*this)), + OpenACCPtr(std::make_unique(*this)), MSPointerToMemberRepresentationMethod( LangOpts.getMSPointerToMemberRepresentationMethod()), MSStructPragmaOn(false), VtorDispStack(LangOpts.getVtorDispMode()), @@ -1655,11 +1658,6 @@ void Sema::EmitCurrentDiagnostic(unsigned DiagID) { PrintContextStack(); } -Sema::SemaDiagnosticBuilder -Sema::Diag(SourceLocation Loc, const PartialDiagnostic &PD, bool DeferHint) { - return Diag(Loc, PD.getDiagID(), DeferHint) << PD; -} - bool Sema::hasUncompilableErrorOccurred() const { if (getDiagnostics().hasUncompilableErrorOccurred()) return true; @@ -2006,30 +2004,6 @@ Sema::targetDiag(SourceLocation Loc, unsigned DiagID, const FunctionDecl *FD) { FD, *this, DeviceDiagnosticReason::All); } -Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID, - bool DeferHint) { - bool IsError = Diags.getDiagnosticIDs()->isDefaultMappingAsError(DiagID); - bool ShouldDefer = getLangOpts().CUDA && LangOpts.GPUDeferDiag && - DiagnosticIDs::isDeferrable(DiagID) && - (DeferHint || DeferDiags || !IsError); - auto SetIsLastErrorImmediate = [&](bool Flag) { - if (IsError) - IsLastErrorImmediate = Flag; - }; - if (!ShouldDefer) { - SetIsLastErrorImmediate(true); - return SemaDiagnosticBuilder(SemaDiagnosticBuilder::K_Immediate, Loc, - DiagID, getCurFunctionDecl(), *this, - DeviceDiagnosticReason::All); - } - - SemaDiagnosticBuilder DB = getLangOpts().CUDAIsDevice - ? CUDADiagIfDeviceCode(Loc, DiagID) - : CUDADiagIfHostCode(Loc, DiagID); - SetIsLastErrorImmediate(DB.isImmediate()); - return DB; -} - void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { if (isUnevaluatedContext() || Ty.isNull()) return; diff --git a/clang/lib/Sema/SemaBase.cpp b/clang/lib/Sema/SemaBase.cpp new file mode 100644 index 0000000000000..3a2f54e8699c4 --- /dev/null +++ b/clang/lib/Sema/SemaBase.cpp @@ -0,0 +1,87 @@ +#include "clang/Sema/SemaBase.h" +#include "clang/Sema/Sema.h" + +namespace clang { + +SemaBase::SemaBase(Sema &S) : SemaRef(S) {} + +ASTContext &SemaBase::getASTContext() const { return SemaRef.Context; } +DiagnosticsEngine &SemaBase::getDiagnostics() const { return SemaRef.Diags; } +const LangOptions &SemaBase::getLangOpts() const { return SemaRef.LangOpts; } + +SemaBase::ImmediateDiagBuilder::~ImmediateDiagBuilder() { + // If we aren't active, there is nothing to do. + if (!isActive()) + return; + + // Otherwise, we need to emit the diagnostic. First clear the diagnostic + // builder itself so it won't emit the diagnostic in its own destructor. + // + // This seems wasteful, in that as written the DiagnosticBuilder dtor will + // do its own needless checks to see if the diagnostic needs to be + // emitted. However, because we take care to ensure that the builder + // objects never escape, a sufficiently smart compiler will be able to + // eliminate that code. + Clear(); + + // Dispatch to Sema to emit the diagnostic. + SemaRef.EmitCurrentDiagnostic(DiagID); +} + +const SemaBase::SemaDiagnosticBuilder & +operator<<(const SemaBase::SemaDiagnosticBuilder &Diag, + const PartialDiagnostic &PD) { + if (Diag.ImmediateDiag) + PD.Emit(*Diag.ImmediateDiag); + else if (Diag.PartialDiagId) + Diag.S.DeviceDeferredDiags[Diag.Fn][*Diag.PartialDiagId].getDiag().second = + PD; + return Diag; +} + +void SemaBase::SemaDiagnosticBuilder::AddFixItHint( + const FixItHint &Hint) const { + if (ImmediateDiag) + ImmediateDiag->AddFixItHint(Hint); + else if (PartialDiagId) + S.DeviceDeferredDiags[Fn][*PartialDiagId].getDiag().second.AddFixItHint( + Hint); +} + +SemaBase::SemaDiagnosticBuilder::DeferredDiagnosticsType & +SemaBase::SemaDiagnosticBuilder::getDeviceDeferredDiags() const { + return S.DeviceDeferredDiags; +} + +Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, unsigned DiagID, + bool DeferHint) { + bool IsError = + getDiagnostics().getDiagnosticIDs()->isDefaultMappingAsError(DiagID); + bool ShouldDefer = getLangOpts().CUDA && getLangOpts().GPUDeferDiag && + DiagnosticIDs::isDeferrable(DiagID) && + (DeferHint || SemaRef.DeferDiags || !IsError); + auto SetIsLastErrorImmediate = [&](bool Flag) { + if (IsError) + SemaRef.IsLastErrorImmediate = Flag; + }; + if (!ShouldDefer) { + SetIsLastErrorImmediate(true); + return SemaDiagnosticBuilder(SemaDiagnosticBuilder::K_Immediate, Loc, + DiagID, SemaRef.getCurFunctionDecl(), SemaRef, + DeviceDiagnosticReason::All); + } + + SemaDiagnosticBuilder DB = getLangOpts().CUDAIsDevice + ? SemaRef.CUDADiagIfDeviceCode(Loc, DiagID) + : SemaRef.CUDADiagIfHostCode(Loc, DiagID); + SetIsLastErrorImmediate(DB.isImmediate()); + return DB; +} + +Sema::SemaDiagnosticBuilder SemaBase::Diag(SourceLocation Loc, + const PartialDiagnostic &PD, + bool DeferHint) { + return Diag(Loc, PD.getDiagID(), DeferHint) << PD; +} + +} // namespace clang diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index b92486b3bf3fb..8268d7b8d6c80 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -191,7 +191,7 @@ static bool convertArgumentToType(Sema &S, Expr *&Value, QualType Ty) { /// Check that the first argument to __builtin_annotation is an integer /// and the second argument is a non-wide string literal. -static bool SemaBuiltinAnnotation(Sema &S, CallExpr *TheCall) { +static bool BuiltinAnnotation(Sema &S, CallExpr *TheCall) { if (checkArgCount(S, TheCall, 2)) return true; @@ -217,7 +217,7 @@ static bool SemaBuiltinAnnotation(Sema &S, CallExpr *TheCall) { return false; } -static bool SemaBuiltinMSVCAnnotation(Sema &S, CallExpr *TheCall) { +static bool BuiltinMSVCAnnotation(Sema &S, CallExpr *TheCall) { // We need at least one argument. if (TheCall->getNumArgs() < 1) { S.Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_at_least) @@ -241,7 +241,7 @@ static bool SemaBuiltinMSVCAnnotation(Sema &S, CallExpr *TheCall) { /// Check that the argument to __builtin_addressof is a glvalue, and set the /// result type to the corresponding pointer type. -static bool SemaBuiltinAddressof(Sema &S, CallExpr *TheCall) { +static bool BuiltinAddressof(Sema &S, CallExpr *TheCall) { if (checkArgCount(S, TheCall, 1)) return true; @@ -256,7 +256,7 @@ static bool SemaBuiltinAddressof(Sema &S, CallExpr *TheCall) { } /// Check that the argument to __builtin_function_start is a function. -static bool SemaBuiltinFunctionStart(Sema &S, CallExpr *TheCall) { +static bool BuiltinFunctionStart(Sema &S, CallExpr *TheCall) { if (checkArgCount(S, TheCall, 1)) return true; @@ -280,7 +280,7 @@ static bool SemaBuiltinFunctionStart(Sema &S, CallExpr *TheCall) { /// Check the number of arguments and set the result type to /// the argument type. -static bool SemaBuiltinPreserveAI(Sema &S, CallExpr *TheCall) { +static bool BuiltinPreserveAI(Sema &S, CallExpr *TheCall) { if (checkArgCount(S, TheCall, 1)) return true; @@ -291,7 +291,7 @@ static bool SemaBuiltinPreserveAI(Sema &S, CallExpr *TheCall) { /// Check that the value argument for __builtin_is_aligned(value, alignment) and /// __builtin_aligned_{up,down}(value, alignment) is an integer or a pointer /// type (but not a function pointer) and that the alignment is a power-of-two. -static bool SemaBuiltinAlignment(Sema &S, CallExpr *TheCall, unsigned ID) { +static bool BuiltinAlignment(Sema &S, CallExpr *TheCall, unsigned ID) { if (checkArgCount(S, TheCall, 2)) return true; @@ -369,8 +369,7 @@ static bool SemaBuiltinAlignment(Sema &S, CallExpr *TheCall, unsigned ID) { return false; } -static bool SemaBuiltinOverflow(Sema &S, CallExpr *TheCall, - unsigned BuiltinID) { +static bool BuiltinOverflow(Sema &S, CallExpr *TheCall, unsigned BuiltinID) { if (checkArgCount(S, TheCall, 3)) return true; @@ -698,7 +697,7 @@ struct BuiltinDumpStructGenerator { }; } // namespace -static ExprResult SemaBuiltinDumpStruct(Sema &S, CallExpr *TheCall) { +static ExprResult BuiltinDumpStruct(Sema &S, CallExpr *TheCall) { if (checkArgCountAtLeast(S, TheCall, 2)) return ExprError(); @@ -764,7 +763,7 @@ static ExprResult SemaBuiltinDumpStruct(Sema &S, CallExpr *TheCall) { return Generator.buildWrapper(); } -static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) { +static bool BuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) { if (checkArgCount(S, BuiltinCall, 2)) return true; @@ -1430,9 +1429,9 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, << FunctionName << DestinationStr << SourceStr); } -static bool SemaBuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, - Scope::ScopeFlags NeededScopeFlags, - unsigned DiagID) { +static bool BuiltinSEHScopeCheck(Sema &SemaRef, CallExpr *TheCall, + Scope::ScopeFlags NeededScopeFlags, + unsigned DiagID) { // Scopes aren't available during instantiation. Fortunately, builtin // functions cannot be template args so they cannot be formed through template // instantiation. Therefore checking once during the parse is sufficient. @@ -1506,7 +1505,7 @@ static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) { return false; } -static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { +static bool OpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { if (checkArgCount(S, TheCall, 2)) return true; @@ -1533,7 +1532,7 @@ static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) { /// OpenCL C v2.0, s6.13.17.6 - Check the argument to the /// get_kernel_work_group_size /// and get_kernel_preferred_work_group_size_multiple builtin functions. -static bool SemaOpenCLBuiltinKernelWorkGroupSize(Sema &S, CallExpr *TheCall) { +static bool OpenCLBuiltinKernelWorkGroupSize(Sema &S, CallExpr *TheCall) { if (checkArgCount(S, TheCall, 1)) return true; @@ -1609,7 +1608,7 @@ static bool checkOpenCLEnqueueVariadicArgs(Sema &S, CallExpr *TheCall, /// clk_event_t *event_ret, /// void (^block)(local void*, ...), /// uint size0, ...) -static bool SemaOpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) { +static bool OpenCLBuiltinEnqueueKernel(Sema &S, CallExpr *TheCall) { unsigned NumArgs = TheCall->getNumArgs(); if (NumArgs < 4) { @@ -1808,7 +1807,7 @@ static bool checkOpenCLPipePacketType(Sema &S, CallExpr *Call, unsigned Idx) { // \param S Reference to the semantic analyzer. // \param Call A pointer to the builtin call. // \return True if a semantic error has been found, false otherwise. -static bool SemaBuiltinRWPipe(Sema &S, CallExpr *Call) { +static bool BuiltinRWPipe(Sema &S, CallExpr *Call) { // OpenCL v2.0 s6.13.16.2 - The built-in read/write // functions have two forms. switch (Call->getNumArgs()) { @@ -1863,7 +1862,7 @@ static bool SemaBuiltinRWPipe(Sema &S, CallExpr *Call) { // \param S Reference to the semantic analyzer. // \param Call The call to the builtin function to be analyzed. // \return True if a semantic error was found, false otherwise. -static bool SemaBuiltinReserveRWPipe(Sema &S, CallExpr *Call) { +static bool BuiltinReserveRWPipe(Sema &S, CallExpr *Call) { if (checkArgCount(S, Call, 2)) return true; @@ -1892,7 +1891,7 @@ static bool SemaBuiltinReserveRWPipe(Sema &S, CallExpr *Call) { // \param S Reference to the semantic analyzer. // \param Call The call to the builtin function to be analyzed. // \return True if a semantic error was found, false otherwise. -static bool SemaBuiltinCommitRWPipe(Sema &S, CallExpr *Call) { +static bool BuiltinCommitRWPipe(Sema &S, CallExpr *Call) { if (checkArgCount(S, Call, 2)) return true; @@ -1915,7 +1914,7 @@ static bool SemaBuiltinCommitRWPipe(Sema &S, CallExpr *Call) { // \param S Reference to the semantic analyzer. // \param Call The call to the builtin function to be analyzed. // \return True if a semantic error was found, false otherwise. -static bool SemaBuiltinPipePackets(Sema &S, CallExpr *Call) { +static bool BuiltinPipePackets(Sema &S, CallExpr *Call) { if (checkArgCount(S, Call, 1)) return true; @@ -1934,8 +1933,7 @@ static bool SemaBuiltinPipePackets(Sema &S, CallExpr *Call) { // \param BuiltinID ID of the builtin function. // \param Call A pointer to the builtin call. // \return True if a semantic error has been found, false otherwise. -static bool SemaOpenCLBuiltinToAddr(Sema &S, unsigned BuiltinID, - CallExpr *Call) { +static bool OpenCLBuiltinToAddr(Sema &S, unsigned BuiltinID, CallExpr *Call) { if (checkArgCount(S, Call, 1)) return true; @@ -2090,7 +2088,7 @@ static bool checkPointerAuthValue(Sema &S, Expr *&Arg, return false; } -static ExprResult SemaPointerAuthStrip(Sema &S, CallExpr *Call) { +static ExprResult PointerAuthStrip(Sema &S, CallExpr *Call) { if (checkArgCount(S, Call, 2)) return ExprError(); if (checkPointerAuthEnabled(S, Call)) @@ -2103,7 +2101,7 @@ static ExprResult SemaPointerAuthStrip(Sema &S, CallExpr *Call) { return Call; } -static ExprResult SemaPointerAuthBlendDiscriminator(Sema &S, CallExpr *Call) { +static ExprResult PointerAuthBlendDiscriminator(Sema &S, CallExpr *Call) { if (checkArgCount(S, Call, 2)) return ExprError(); if (checkPointerAuthEnabled(S, Call)) @@ -2116,7 +2114,7 @@ static ExprResult SemaPointerAuthBlendDiscriminator(Sema &S, CallExpr *Call) { return Call; } -static ExprResult SemaPointerAuthSignGenericData(Sema &S, CallExpr *Call) { +static ExprResult PointerAuthSignGenericData(Sema &S, CallExpr *Call) { if (checkArgCount(S, Call, 2)) return ExprError(); if (checkPointerAuthEnabled(S, Call)) @@ -2129,8 +2127,8 @@ static ExprResult SemaPointerAuthSignGenericData(Sema &S, CallExpr *Call) { return Call; } -static ExprResult SemaPointerAuthSignOrAuth(Sema &S, CallExpr *Call, - PointerAuthOpKind OpKind) { +static ExprResult PointerAuthSignOrAuth(Sema &S, CallExpr *Call, + PointerAuthOpKind OpKind) { if (checkArgCount(S, Call, 3)) return ExprError(); if (checkPointerAuthEnabled(S, Call)) @@ -2144,7 +2142,7 @@ static ExprResult SemaPointerAuthSignOrAuth(Sema &S, CallExpr *Call, return Call; } -static ExprResult SemaPointerAuthAuthAndResign(Sema &S, CallExpr *Call) { +static ExprResult PointerAuthAuthAndResign(Sema &S, CallExpr *Call) { if (checkArgCount(S, Call, 5)) return ExprError(); if (checkPointerAuthEnabled(S, Call)) @@ -2160,7 +2158,7 @@ static ExprResult SemaPointerAuthAuthAndResign(Sema &S, CallExpr *Call) { return Call; } -static ExprResult SemaBuiltinLaunder(Sema &S, CallExpr *TheCall) { +static ExprResult BuiltinLaunder(Sema &S, CallExpr *TheCall) { if (checkArgCount(S, TheCall, 1)) return ExprError(); @@ -2333,11 +2331,11 @@ static bool checkFPMathBuiltinElementType(Sema &S, SourceLocation Loc, return false; } -/// SemaBuiltinCpu{Supports|Is} - Handle __builtin_cpu_{supports|is}(char *). +/// BuiltinCpu{Supports|Is} - Handle __builtin_cpu_{supports|is}(char *). /// This checks that the target supports the builtin and that the string /// argument is constant and valid. -static bool SemaBuiltinCpu(Sema &S, const TargetInfo &TI, CallExpr *TheCall, - const TargetInfo *AuxTI, unsigned BuiltinID) { +static bool BuiltinCpu(Sema &S, const TargetInfo &TI, CallExpr *TheCall, + const TargetInfo *AuxTI, unsigned BuiltinID) { assert((BuiltinID == Builtin::BI__builtin_cpu_supports || BuiltinID == Builtin::BI__builtin_cpu_is) && "Expecting __builtin_cpu_..."); @@ -2382,7 +2380,7 @@ static bool SemaBuiltinCpu(Sema &S, const TargetInfo &TI, CallExpr *TheCall, /// Checks that __builtin_popcountg was called with a single argument, which is /// an unsigned integer. -static bool SemaBuiltinPopcountg(Sema &S, CallExpr *TheCall) { +static bool BuiltinPopcountg(Sema &S, CallExpr *TheCall) { if (checkArgCount(S, TheCall, 1)) return true; @@ -2406,7 +2404,7 @@ static bool SemaBuiltinPopcountg(Sema &S, CallExpr *TheCall) { /// Checks that __builtin_{clzg,ctzg} was called with a first argument, which is /// an unsigned integer, and an optional second argument, which is promoted to /// an 'int'. -static bool SemaBuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) { +static bool BuiltinCountZeroBitsGeneric(Sema &S, CallExpr *TheCall) { if (checkArgCountRange(S, TheCall, 1, 2)) return true; @@ -2466,7 +2464,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, // If we don't have enough arguments, continue so we can issue better // diagnostic in checkArgCount(...) if (ArgNo < TheCall->getNumArgs() && - SemaBuiltinConstantArg(TheCall, ArgNo, Result)) + BuiltinConstantArg(TheCall, ArgNo, Result)) return true; ICEArguments &= ~(1 << ArgNo); } @@ -2475,8 +2473,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, switch (BuiltinID) { case Builtin::BI__builtin_cpu_supports: case Builtin::BI__builtin_cpu_is: - if (SemaBuiltinCpu(*this, Context.getTargetInfo(), TheCall, - Context.getAuxTargetInfo(), BuiltinID)) + if (BuiltinCpu(*this, Context.getTargetInfo(), TheCall, + Context.getAuxTargetInfo(), BuiltinID)) return ExprError(); break; case Builtin::BI__builtin_cpu_init: @@ -2501,7 +2499,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_ms_va_start: case Builtin::BI__builtin_stdarg_start: case Builtin::BI__builtin_va_start: - if (SemaBuiltinVAStart(BuiltinID, TheCall)) + if (BuiltinVAStart(BuiltinID, TheCall)) return ExprError(); break; case Builtin::BI__va_start: { @@ -2509,11 +2507,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case llvm::Triple::aarch64: case llvm::Triple::arm: case llvm::Triple::thumb: - if (SemaBuiltinVAStartARMMicrosoft(TheCall)) + if (BuiltinVAStartARMMicrosoft(TheCall)) return ExprError(); break; default: - if (SemaBuiltinVAStart(BuiltinID, TheCall)) + if (BuiltinVAStart(BuiltinID, TheCall)) return ExprError(); break; } @@ -2561,15 +2559,15 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_islessequal: case Builtin::BI__builtin_islessgreater: case Builtin::BI__builtin_isunordered: - if (SemaBuiltinUnorderedCompare(TheCall, BuiltinID)) + if (BuiltinUnorderedCompare(TheCall, BuiltinID)) return ExprError(); break; case Builtin::BI__builtin_fpclassify: - if (SemaBuiltinFPClassification(TheCall, 6, BuiltinID)) + if (BuiltinFPClassification(TheCall, 6, BuiltinID)) return ExprError(); break; case Builtin::BI__builtin_isfpclass: - if (SemaBuiltinFPClassification(TheCall, 2, BuiltinID)) + if (BuiltinFPClassification(TheCall, 2, BuiltinID)) return ExprError(); break; case Builtin::BI__builtin_isfinite: @@ -2583,20 +2581,20 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: case Builtin::BI__builtin_signbitl: - if (SemaBuiltinFPClassification(TheCall, 1, BuiltinID)) + if (BuiltinFPClassification(TheCall, 1, BuiltinID)) return ExprError(); break; case Builtin::BI__builtin_shufflevector: - return SemaBuiltinShuffleVector(TheCall); + return BuiltinShuffleVector(TheCall); // TheCall will be freed by the smart pointer here, but that's fine, since - // SemaBuiltinShuffleVector guts it, but then doesn't release it. + // BuiltinShuffleVector guts it, but then doesn't release it. case Builtin::BI__builtin_prefetch: - if (SemaBuiltinPrefetch(TheCall)) + if (BuiltinPrefetch(TheCall)) return ExprError(); break; case Builtin::BI__builtin_alloca_with_align: case Builtin::BI__builtin_alloca_with_align_uninitialized: - if (SemaBuiltinAllocaWithAlign(TheCall)) + if (BuiltinAllocaWithAlign(TheCall)) return ExprError(); [[fallthrough]]; case Builtin::BI__builtin_alloca: @@ -2605,29 +2603,29 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, << TheCall->getDirectCallee(); break; case Builtin::BI__arithmetic_fence: - if (SemaBuiltinArithmeticFence(TheCall)) + if (BuiltinArithmeticFence(TheCall)) return ExprError(); break; case Builtin::BI__assume: case Builtin::BI__builtin_assume: - if (SemaBuiltinAssume(TheCall)) + if (BuiltinAssume(TheCall)) return ExprError(); break; case Builtin::BI__builtin_assume_aligned: - if (SemaBuiltinAssumeAligned(TheCall)) + if (BuiltinAssumeAligned(TheCall)) return ExprError(); break; case Builtin::BI__builtin_dynamic_object_size: case Builtin::BI__builtin_object_size: - if (SemaBuiltinConstantArgRange(TheCall, 1, 0, 3)) + if (BuiltinConstantArgRange(TheCall, 1, 0, 3)) return ExprError(); break; case Builtin::BI__builtin_longjmp: - if (SemaBuiltinLongjmp(TheCall)) + if (BuiltinLongjmp(TheCall)) return ExprError(); break; case Builtin::BI__builtin_setjmp: - if (SemaBuiltinSetjmp(TheCall)) + if (BuiltinSetjmp(TheCall)) return ExprError(); break; case Builtin::BI__builtin_classify_type: @@ -2635,7 +2633,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, TheCall->setType(Context.IntTy); break; case Builtin::BI__builtin_complex: - if (SemaBuiltinComplex(TheCall)) + if (BuiltinComplex(TheCall)) return ExprError(); break; case Builtin::BI__builtin_constant_p: { @@ -2647,7 +2645,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; } case Builtin::BI__builtin_launder: - return SemaBuiltinLaunder(*this, TheCall); + return BuiltinLaunder(*this, TheCall); case Builtin::BI__sync_fetch_and_add: case Builtin::BI__sync_fetch_and_add_1: case Builtin::BI__sync_fetch_and_add_2: @@ -2750,14 +2748,14 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__sync_swap_4: case Builtin::BI__sync_swap_8: case Builtin::BI__sync_swap_16: - return SemaBuiltinAtomicOverloaded(TheCallResult); + return BuiltinAtomicOverloaded(TheCallResult); case Builtin::BI__sync_synchronize: Diag(TheCall->getBeginLoc(), diag::warn_atomic_implicit_seq_cst) << TheCall->getCallee()->getSourceRange(); break; case Builtin::BI__builtin_nontemporal_load: case Builtin::BI__builtin_nontemporal_store: - return SemaBuiltinNontemporalOverloaded(TheCallResult); + return BuiltinNontemporalOverloaded(TheCallResult); case Builtin::BI__builtin_memcpy_inline: { clang::Expr *SizeOp = TheCall->getArg(2); // We warn about copying to or from `nullptr` pointers when `size` is @@ -2783,49 +2781,49 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; } #define BUILTIN(ID, TYPE, ATTRS) -#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \ - case Builtin::BI##ID: \ - return SemaAtomicOpsOverloaded(TheCallResult, AtomicExpr::AO##ID); +#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \ + case Builtin::BI##ID: \ + return AtomicOpsOverloaded(TheCallResult, AtomicExpr::AO##ID); #include "clang/Basic/Builtins.inc" case Builtin::BI__annotation: - if (SemaBuiltinMSVCAnnotation(*this, TheCall)) + if (BuiltinMSVCAnnotation(*this, TheCall)) return ExprError(); break; case Builtin::BI__builtin_annotation: - if (SemaBuiltinAnnotation(*this, TheCall)) + if (BuiltinAnnotation(*this, TheCall)) return ExprError(); break; case Builtin::BI__builtin_addressof: - if (SemaBuiltinAddressof(*this, TheCall)) + if (BuiltinAddressof(*this, TheCall)) return ExprError(); break; case Builtin::BI__builtin_function_start: - if (SemaBuiltinFunctionStart(*this, TheCall)) + if (BuiltinFunctionStart(*this, TheCall)) return ExprError(); break; case Builtin::BI__builtin_is_aligned: case Builtin::BI__builtin_align_up: case Builtin::BI__builtin_align_down: - if (SemaBuiltinAlignment(*this, TheCall, BuiltinID)) + if (BuiltinAlignment(*this, TheCall, BuiltinID)) return ExprError(); break; case Builtin::BI__builtin_add_overflow: case Builtin::BI__builtin_sub_overflow: case Builtin::BI__builtin_mul_overflow: - if (SemaBuiltinOverflow(*this, TheCall, BuiltinID)) + if (BuiltinOverflow(*this, TheCall, BuiltinID)) return ExprError(); break; case Builtin::BI__builtin_operator_new: case Builtin::BI__builtin_operator_delete: { bool IsDelete = BuiltinID == Builtin::BI__builtin_operator_delete; ExprResult Res = - SemaBuiltinOperatorNewDeleteOverloaded(TheCallResult, IsDelete); + BuiltinOperatorNewDeleteOverloaded(TheCallResult, IsDelete); if (Res.isInvalid()) CorrectDelayedTyposInExpr(TheCallResult.get()); return Res; } case Builtin::BI__builtin_dump_struct: - return SemaBuiltinDumpStruct(*this, TheCall); + return BuiltinDumpStruct(*this, TheCall); case Builtin::BI__builtin_expect_with_probability: { // We first want to ensure we are called with 3 arguments if (checkArgCount(*this, TheCall, 3)) @@ -2856,23 +2854,23 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; } case Builtin::BI__builtin_preserve_access_index: - if (SemaBuiltinPreserveAI(*this, TheCall)) + if (BuiltinPreserveAI(*this, TheCall)) return ExprError(); break; case Builtin::BI__builtin_call_with_static_chain: - if (SemaBuiltinCallWithStaticChain(*this, TheCall)) + if (BuiltinCallWithStaticChain(*this, TheCall)) return ExprError(); break; case Builtin::BI__exception_code: case Builtin::BI_exception_code: - if (SemaBuiltinSEHScopeCheck(*this, TheCall, Scope::SEHExceptScope, - diag::err_seh___except_block)) + if (BuiltinSEHScopeCheck(*this, TheCall, Scope::SEHExceptScope, + diag::err_seh___except_block)) return ExprError(); break; case Builtin::BI__exception_info: case Builtin::BI_exception_info: - if (SemaBuiltinSEHScopeCheck(*this, TheCall, Scope::SEHFilterScope, - diag::err_seh___except_filter)) + if (BuiltinSEHScopeCheck(*this, TheCall, Scope::SEHFilterScope, + diag::err_seh___except_filter)) return ExprError(); break; case Builtin::BI__GetExceptionInfo: @@ -2915,82 +2913,82 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; } case Builtin::BI__builtin_ptrauth_strip: - return SemaPointerAuthStrip(*this, TheCall); + return PointerAuthStrip(*this, TheCall); case Builtin::BI__builtin_ptrauth_blend_discriminator: - return SemaPointerAuthBlendDiscriminator(*this, TheCall); + return PointerAuthBlendDiscriminator(*this, TheCall); case Builtin::BI__builtin_ptrauth_sign_unauthenticated: - return SemaPointerAuthSignOrAuth(*this, TheCall, PAO_Sign); + return PointerAuthSignOrAuth(*this, TheCall, PAO_Sign); case Builtin::BI__builtin_ptrauth_auth: - return SemaPointerAuthSignOrAuth(*this, TheCall, PAO_Auth); + return PointerAuthSignOrAuth(*this, TheCall, PAO_Auth); case Builtin::BI__builtin_ptrauth_sign_generic_data: - return SemaPointerAuthSignGenericData(*this, TheCall); + return PointerAuthSignGenericData(*this, TheCall); case Builtin::BI__builtin_ptrauth_auth_and_resign: - return SemaPointerAuthAuthAndResign(*this, TheCall); + return PointerAuthAuthAndResign(*this, TheCall); // OpenCL v2.0, s6.13.16 - Pipe functions case Builtin::BIread_pipe: case Builtin::BIwrite_pipe: // Since those two functions are declared with var args, we need a semantic // check for the argument. - if (SemaBuiltinRWPipe(*this, TheCall)) + if (BuiltinRWPipe(*this, TheCall)) return ExprError(); break; case Builtin::BIreserve_read_pipe: case Builtin::BIreserve_write_pipe: case Builtin::BIwork_group_reserve_read_pipe: case Builtin::BIwork_group_reserve_write_pipe: - if (SemaBuiltinReserveRWPipe(*this, TheCall)) + if (BuiltinReserveRWPipe(*this, TheCall)) return ExprError(); break; case Builtin::BIsub_group_reserve_read_pipe: case Builtin::BIsub_group_reserve_write_pipe: if (checkOpenCLSubgroupExt(*this, TheCall) || - SemaBuiltinReserveRWPipe(*this, TheCall)) + BuiltinReserveRWPipe(*this, TheCall)) return ExprError(); break; case Builtin::BIcommit_read_pipe: case Builtin::BIcommit_write_pipe: case Builtin::BIwork_group_commit_read_pipe: case Builtin::BIwork_group_commit_write_pipe: - if (SemaBuiltinCommitRWPipe(*this, TheCall)) + if (BuiltinCommitRWPipe(*this, TheCall)) return ExprError(); break; case Builtin::BIsub_group_commit_read_pipe: case Builtin::BIsub_group_commit_write_pipe: if (checkOpenCLSubgroupExt(*this, TheCall) || - SemaBuiltinCommitRWPipe(*this, TheCall)) + BuiltinCommitRWPipe(*this, TheCall)) return ExprError(); break; case Builtin::BIget_pipe_num_packets: case Builtin::BIget_pipe_max_packets: - if (SemaBuiltinPipePackets(*this, TheCall)) + if (BuiltinPipePackets(*this, TheCall)) return ExprError(); break; case Builtin::BIto_global: case Builtin::BIto_local: case Builtin::BIto_private: - if (SemaOpenCLBuiltinToAddr(*this, BuiltinID, TheCall)) + if (OpenCLBuiltinToAddr(*this, BuiltinID, TheCall)) return ExprError(); break; // OpenCL v2.0, s6.13.17 - Enqueue kernel functions. case Builtin::BIenqueue_kernel: - if (SemaOpenCLBuiltinEnqueueKernel(*this, TheCall)) + if (OpenCLBuiltinEnqueueKernel(*this, TheCall)) return ExprError(); break; case Builtin::BIget_kernel_work_group_size: case Builtin::BIget_kernel_preferred_work_group_size_multiple: - if (SemaOpenCLBuiltinKernelWorkGroupSize(*this, TheCall)) + if (OpenCLBuiltinKernelWorkGroupSize(*this, TheCall)) return ExprError(); break; case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: case Builtin::BIget_kernel_sub_group_count_for_ndrange: - if (SemaOpenCLBuiltinNDRangeAndBlock(*this, TheCall)) + if (OpenCLBuiltinNDRangeAndBlock(*this, TheCall)) return ExprError(); break; case Builtin::BI__builtin_os_log_format: Cleanup.setExprNeedsCleanups(true); [[fallthrough]]; case Builtin::BI__builtin_os_log_format_buffer_size: - if (SemaBuiltinOSLogFormat(TheCall)) + if (BuiltinOSLogFormat(TheCall)) return ExprError(); break; case Builtin::BI__builtin_intel_fpga_reg: @@ -3035,7 +3033,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; case Builtin::BI__builtin_frame_address: case Builtin::BI__builtin_return_address: { - if (SemaBuiltinConstantArgRange(TheCall, 0, 0, 0xFFFF)) + if (BuiltinConstantArgRange(TheCall, 0, 0, 0xFFFF)) return ExprError(); // -Wframe-address warning if non-zero passed to builtin @@ -3053,7 +3051,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, } case Builtin::BI__builtin_nondeterministic_value: { - if (SemaBuiltinNonDeterministicValue(TheCall)) + if (BuiltinNonDeterministicValue(TheCall)) return ExprError(); break; } @@ -3106,7 +3104,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; } case Builtin::BI__builtin_elementwise_fma: { - if (SemaBuiltinElementwiseTernaryMath(TheCall)) + if (BuiltinElementwiseTernaryMath(TheCall)) return ExprError(); break; } @@ -3114,7 +3112,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, // These builtins restrict the element type to floating point // types only, and take in two arguments. case Builtin::BI__builtin_elementwise_pow: { - if (SemaBuiltinElementwiseMath(TheCall)) + if (BuiltinElementwiseMath(TheCall)) return ExprError(); QualType ArgTy = TheCall->getArg(0)->getType(); @@ -3130,7 +3128,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, // types only. case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { - if (SemaBuiltinElementwiseMath(TheCall)) + if (BuiltinElementwiseMath(TheCall)) return ExprError(); const Expr *Arg = TheCall->getArg(0); @@ -3150,7 +3148,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_elementwise_min: case Builtin::BI__builtin_elementwise_max: - if (SemaBuiltinElementwiseMath(TheCall)) + if (BuiltinElementwiseMath(TheCall)) return ExprError(); break; @@ -3241,13 +3239,13 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, } case Builtin::BI__builtin_matrix_transpose: - return SemaBuiltinMatrixTranspose(TheCall, TheCallResult); + return BuiltinMatrixTranspose(TheCall, TheCallResult); case Builtin::BI__builtin_matrix_column_major_load: - return SemaBuiltinMatrixColumnMajorLoad(TheCall, TheCallResult); + return BuiltinMatrixColumnMajorLoad(TheCall, TheCallResult); case Builtin::BI__builtin_matrix_column_major_store: - return SemaBuiltinMatrixColumnMajorStore(TheCall, TheCallResult); + return BuiltinMatrixColumnMajorStore(TheCall, TheCallResult); case Builtin::BI__builtin_get_device_side_mangled_name: { auto Check = [](CallExpr *TheCall) { @@ -3270,12 +3268,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, break; } case Builtin::BI__builtin_popcountg: - if (SemaBuiltinPopcountg(*this, TheCall)) + if (BuiltinPopcountg(*this, TheCall)) return ExprError(); break; case Builtin::BI__builtin_clzg: case Builtin::BI__builtin_ctzg: - if (SemaBuiltinCountZeroBitsGeneric(*this, TheCall)) + if (BuiltinCountZeroBitsGeneric(*this, TheCall)) return ExprError(); break; } @@ -3426,7 +3424,7 @@ bool Sema::ParseSVEImmChecks( // Check constant-ness first. llvm::APSInt Imm; - if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm)) + if (BuiltinConstantArg(TheCall, ArgNum, Imm)) return true; if (!CheckImm(Imm.getSExtValue())) @@ -3436,65 +3434,63 @@ bool Sema::ParseSVEImmChecks( switch ((SVETypeFlags::ImmCheckType)CheckTy) { case SVETypeFlags::ImmCheck0_31: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 31)) HasError = true; break; case SVETypeFlags::ImmCheck0_13: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 13)) HasError = true; break; case SVETypeFlags::ImmCheck1_16: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 1, 16)) HasError = true; break; case SVETypeFlags::ImmCheck0_7: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 7)) HasError = true; break; case SVETypeFlags::ImmCheck1_1: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 1)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 1, 1)) HasError = true; break; case SVETypeFlags::ImmCheck1_3: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 3)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 1, 3)) HasError = true; break; case SVETypeFlags::ImmCheck1_7: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 7)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 1, 7)) HasError = true; break; case SVETypeFlags::ImmCheckExtract: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, - (2048 / ElementSizeInBits) - 1)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, + (2048 / ElementSizeInBits) - 1)) HasError = true; break; case SVETypeFlags::ImmCheckShiftRight: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits)) HasError = true; break; case SVETypeFlags::ImmCheckShiftRightNarrow: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, - ElementSizeInBits / 2)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits / 2)) HasError = true; break; case SVETypeFlags::ImmCheckShiftLeft: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, - ElementSizeInBits - 1)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, ElementSizeInBits - 1)) HasError = true; break; case SVETypeFlags::ImmCheckLaneIndex: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, - (128 / (1 * ElementSizeInBits)) - 1)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (1 * ElementSizeInBits)) - 1)) HasError = true; break; case SVETypeFlags::ImmCheckLaneIndexCompRotate: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, - (128 / (2 * ElementSizeInBits)) - 1)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (2 * ElementSizeInBits)) - 1)) HasError = true; break; case SVETypeFlags::ImmCheckLaneIndexDot: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, - (128 / (4 * ElementSizeInBits)) - 1)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, + (128 / (4 * ElementSizeInBits)) - 1)) HasError = true; break; case SVETypeFlags::ImmCheckComplexRot90_270: @@ -3511,32 +3507,32 @@ bool Sema::ParseSVEImmChecks( HasError = true; break; case SVETypeFlags::ImmCheck0_1: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 1)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 1)) HasError = true; break; case SVETypeFlags::ImmCheck0_2: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 2)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 2)) HasError = true; break; case SVETypeFlags::ImmCheck0_3: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 3)) HasError = true; break; case SVETypeFlags::ImmCheck0_0: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 0)) HasError = true; break; case SVETypeFlags::ImmCheck0_15: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 15)) HasError = true; break; case SVETypeFlags::ImmCheck0_255: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 0, 255)) HasError = true; break; case SVETypeFlags::ImmCheck2_4_Mul2: - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) || - SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2)) + if (BuiltinConstantArgRange(TheCall, ArgNum, 2, 4) || + BuiltinConstantArgMultiple(TheCall, ArgNum, 2)) HasError = true; break; } @@ -3712,7 +3708,7 @@ bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI, // the immediate which specifies which variant to emit. unsigned ImmArg = TheCall->getNumArgs()-1; if (mask) { - if (SemaBuiltinConstantArg(TheCall, ImmArg, Result)) + if (BuiltinConstantArg(TheCall, ImmArg, Result)) return true; TV = Result.getLimitedValue(64); @@ -3760,7 +3756,7 @@ bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI, #undef GET_NEON_IMMEDIATE_CHECK } - return SemaBuiltinConstantArgRange(TheCall, i, l, u + l); + return BuiltinConstantArgRange(TheCall, i, l, u + l); } bool Sema::CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { @@ -3934,19 +3930,19 @@ bool Sema::CheckARMBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, } if (BuiltinID == ARM::BI__builtin_arm_prefetch) { - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); + return BuiltinConstantArgRange(TheCall, 1, 0, 1) || + BuiltinConstantArgRange(TheCall, 2, 0, 1); } if (BuiltinID == ARM::BI__builtin_arm_rsr64 || BuiltinID == ARM::BI__builtin_arm_wsr64) - return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 3, false); + return BuiltinARMSpecialReg(BuiltinID, TheCall, 0, 3, false); if (BuiltinID == ARM::BI__builtin_arm_rsr || BuiltinID == ARM::BI__builtin_arm_rsrp || BuiltinID == ARM::BI__builtin_arm_wsr || BuiltinID == ARM::BI__builtin_arm_wsrp) - return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true); + return BuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true); if (CheckNeonBuiltinFunctionCall(TI, BuiltinID, TheCall)) return true; @@ -3961,21 +3957,21 @@ bool Sema::CheckARMBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, switch (BuiltinID) { default: return false; case ARM::BI__builtin_arm_ssat: - return SemaBuiltinConstantArgRange(TheCall, 1, 1, 32); + return BuiltinConstantArgRange(TheCall, 1, 1, 32); case ARM::BI__builtin_arm_usat: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + return BuiltinConstantArgRange(TheCall, 1, 0, 31); case ARM::BI__builtin_arm_ssat16: - return SemaBuiltinConstantArgRange(TheCall, 1, 1, 16); + return BuiltinConstantArgRange(TheCall, 1, 1, 16); case ARM::BI__builtin_arm_usat16: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + return BuiltinConstantArgRange(TheCall, 1, 0, 15); case ARM::BI__builtin_arm_vcvtr_f: case ARM::BI__builtin_arm_vcvtr_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + return BuiltinConstantArgRange(TheCall, 1, 0, 1); case ARM::BI__builtin_arm_dmb: case ARM::BI__builtin_arm_dsb: case ARM::BI__builtin_arm_isb: case ARM::BI__builtin_arm_dbg: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15); + return BuiltinConstantArgRange(TheCall, 0, 0, 15); case ARM::BI__builtin_arm_cdp: case ARM::BI__builtin_arm_cdp2: case ARM::BI__builtin_arm_mcr: @@ -3994,7 +3990,7 @@ bool Sema::CheckARMBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case ARM::BI__builtin_arm_stcl: case ARM::BI__builtin_arm_stc2: case ARM::BI__builtin_arm_stc2l: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 15) || + return BuiltinConstantArgRange(TheCall, 0, 0, 15) || CheckARMCoprocessorImmediate(TI, TheCall->getArg(0), /*WantCDE*/ false); } @@ -4011,17 +4007,17 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, } if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 3) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 1) || - SemaBuiltinConstantArgRange(TheCall, 4, 0, 1); + return BuiltinConstantArgRange(TheCall, 1, 0, 1) || + BuiltinConstantArgRange(TheCall, 2, 0, 3) || + BuiltinConstantArgRange(TheCall, 3, 0, 1) || + BuiltinConstantArgRange(TheCall, 4, 0, 1); } if (BuiltinID == AArch64::BI__builtin_arm_rsr64 || BuiltinID == AArch64::BI__builtin_arm_wsr64 || BuiltinID == AArch64::BI__builtin_arm_rsr128 || BuiltinID == AArch64::BI__builtin_arm_wsr128) - return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true); + return BuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true); // Memory Tagging Extensions (MTE) Intrinsics if (BuiltinID == AArch64::BI__builtin_arm_irg || @@ -4030,27 +4026,27 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, BuiltinID == AArch64::BI__builtin_arm_ldg || BuiltinID == AArch64::BI__builtin_arm_stg || BuiltinID == AArch64::BI__builtin_arm_subp) { - return SemaBuiltinARMMemoryTaggingCall(BuiltinID, TheCall); + return BuiltinARMMemoryTaggingCall(BuiltinID, TheCall); } if (BuiltinID == AArch64::BI__builtin_arm_rsr || BuiltinID == AArch64::BI__builtin_arm_rsrp || BuiltinID == AArch64::BI__builtin_arm_wsr || BuiltinID == AArch64::BI__builtin_arm_wsrp) - return SemaBuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true); + return BuiltinARMSpecialReg(BuiltinID, TheCall, 0, 5, true); // Only check the valid encoding range. Any constant in this range would be // converted to a register of the form S1_2_C3_C4_5. Let the hardware throw // an exception for incorrect registers. This matches MSVC behavior. if (BuiltinID == AArch64::BI_ReadStatusReg || BuiltinID == AArch64::BI_WriteStatusReg) - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 0x7fff); + return BuiltinConstantArgRange(TheCall, 0, 0, 0x7fff); if (BuiltinID == AArch64::BI__getReg) - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31); + return BuiltinConstantArgRange(TheCall, 0, 0, 31); if (BuiltinID == AArch64::BI__break) - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 0xffff); + return BuiltinConstantArgRange(TheCall, 0, 0, 0xffff); if (CheckNeonBuiltinFunctionCall(TI, BuiltinID, TheCall)) return true; @@ -4072,7 +4068,7 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI, case AArch64::BI__builtin_arm_tcancel: l = 0; u = 65535; break; } - return SemaBuiltinConstantArgRange(TheCall, i, l, u + l); + return BuiltinConstantArgRange(TheCall, i, l, u + l); } static bool isValidBPFPreserveFieldInfoArg(Expr *Arg) { @@ -4471,13 +4467,13 @@ bool Sema::CheckHexagonBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall) { int32_t Min = A.IsSigned ? -(1 << (A.BitWidth - 1)) : 0; int32_t Max = (1 << (A.IsSigned ? A.BitWidth - 1 : A.BitWidth)) - 1; if (!A.Align) { - Error |= SemaBuiltinConstantArgRange(TheCall, A.OpNum, Min, Max); + Error |= BuiltinConstantArgRange(TheCall, A.OpNum, Min, Max); } else { unsigned M = 1 << A.Align; Min *= M; Max *= M; - Error |= SemaBuiltinConstantArgRange(TheCall, A.OpNum, Min, Max); - Error |= SemaBuiltinConstantArgMultiple(TheCall, A.OpNum, M); + Error |= BuiltinConstantArgRange(TheCall, A.OpNum, Min, Max); + Error |= BuiltinConstantArgMultiple(TheCall, A.OpNum, M); } } return Error; @@ -4497,9 +4493,8 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, // Basic intrinsics. case LoongArch::BI__builtin_loongarch_cacop_d: case LoongArch::BI__builtin_loongarch_cacop_w: { - SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); - SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), - llvm::maxIntN(12)); + BuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); + BuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), llvm::maxIntN(12)); break; } case LoongArch::BI__builtin_loongarch_break: @@ -4507,22 +4502,22 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_loongarch_ibar: case LoongArch::BI__builtin_loongarch_syscall: // Check if immediate is in [0, 32767]. - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); + return BuiltinConstantArgRange(TheCall, 0, 0, 32767); case LoongArch::BI__builtin_loongarch_csrrd_w: case LoongArch::BI__builtin_loongarch_csrrd_d: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); + return BuiltinConstantArgRange(TheCall, 0, 0, 16383); case LoongArch::BI__builtin_loongarch_csrwr_w: case LoongArch::BI__builtin_loongarch_csrwr_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); + return BuiltinConstantArgRange(TheCall, 1, 0, 16383); case LoongArch::BI__builtin_loongarch_csrxchg_w: case LoongArch::BI__builtin_loongarch_csrxchg_d: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + return BuiltinConstantArgRange(TheCall, 2, 0, 16383); case LoongArch::BI__builtin_loongarch_lddir_d: case LoongArch::BI__builtin_loongarch_ldpte_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + return BuiltinConstantArgRange(TheCall, 1, 0, 31); case LoongArch::BI__builtin_loongarch_movfcsr2gr: case LoongArch::BI__builtin_loongarch_movgr2fcsr: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); + return BuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); // LSX intrinsics. case LoongArch::BI__builtin_lsx_vbitclri_b: @@ -4538,7 +4533,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: case LoongArch::BI__builtin_lsx_vrotri_b: case LoongArch::BI__builtin_lsx_vsrlri_b: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + return BuiltinConstantArgRange(TheCall, 1, 0, 7); case LoongArch::BI__builtin_lsx_vbitclri_h: case LoongArch::BI__builtin_lsx_vbitrevi_h: case LoongArch::BI__builtin_lsx_vbitseti_h: @@ -4552,7 +4547,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: case LoongArch::BI__builtin_lsx_vrotri_h: case LoongArch::BI__builtin_lsx_vsrlri_h: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + return BuiltinConstantArgRange(TheCall, 1, 0, 15); case LoongArch::BI__builtin_lsx_vssrarni_b_h: case LoongArch::BI__builtin_lsx_vssrarni_bu_h: case LoongArch::BI__builtin_lsx_vssrani_b_h: @@ -4565,7 +4560,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vssrlrni_b_h: case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: case LoongArch::BI__builtin_lsx_vsrani_b_h: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + return BuiltinConstantArgRange(TheCall, 2, 0, 15); case LoongArch::BI__builtin_lsx_vslei_bu: case LoongArch::BI__builtin_lsx_vslei_hu: case LoongArch::BI__builtin_lsx_vslei_wu: @@ -4605,7 +4600,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vbsll_v: case LoongArch::BI__builtin_lsx_vsubi_wu: case LoongArch::BI__builtin_lsx_vsubi_du: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + return BuiltinConstantArgRange(TheCall, 1, 0, 31); case LoongArch::BI__builtin_lsx_vssrarni_h_w: case LoongArch::BI__builtin_lsx_vssrarni_hu_w: case LoongArch::BI__builtin_lsx_vssrani_h_w: @@ -4620,7 +4615,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vssrlni_hu_w: case LoongArch::BI__builtin_lsx_vssrlrni_h_w: case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + return BuiltinConstantArgRange(TheCall, 2, 0, 31); case LoongArch::BI__builtin_lsx_vbitclri_d: case LoongArch::BI__builtin_lsx_vbitrevi_d: case LoongArch::BI__builtin_lsx_vbitseti_d: @@ -4632,7 +4627,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vsrari_d: case LoongArch::BI__builtin_lsx_vrotri_d: case LoongArch::BI__builtin_lsx_vsrlri_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); + return BuiltinConstantArgRange(TheCall, 1, 0, 63); case LoongArch::BI__builtin_lsx_vssrarni_w_d: case LoongArch::BI__builtin_lsx_vssrarni_wu_d: case LoongArch::BI__builtin_lsx_vssrani_w_d: @@ -4645,7 +4640,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vssrlrni_w_d: case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: case LoongArch::BI__builtin_lsx_vsrani_w_d: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); + return BuiltinConstantArgRange(TheCall, 2, 0, 63); case LoongArch::BI__builtin_lsx_vssrarni_d_q: case LoongArch::BI__builtin_lsx_vssrarni_du_q: case LoongArch::BI__builtin_lsx_vssrani_d_q: @@ -4658,7 +4653,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vsrani_d_q: case LoongArch::BI__builtin_lsx_vsrlrni_d_q: case LoongArch::BI__builtin_lsx_vsrlni_d_q: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); + return BuiltinConstantArgRange(TheCall, 2, 0, 127); case LoongArch::BI__builtin_lsx_vseqi_b: case LoongArch::BI__builtin_lsx_vseqi_h: case LoongArch::BI__builtin_lsx_vseqi_w: @@ -4679,7 +4674,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vmini_h: case LoongArch::BI__builtin_lsx_vmini_w: case LoongArch::BI__builtin_lsx_vmini_d: - return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); + return BuiltinConstantArgRange(TheCall, 1, -16, 15); case LoongArch::BI__builtin_lsx_vandi_b: case LoongArch::BI__builtin_lsx_vnori_b: case LoongArch::BI__builtin_lsx_vori_b: @@ -4687,7 +4682,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vshuf4i_h: case LoongArch::BI__builtin_lsx_vshuf4i_w: case LoongArch::BI__builtin_lsx_vxori_b: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); + return BuiltinConstantArgRange(TheCall, 1, 0, 255); case LoongArch::BI__builtin_lsx_vbitseli_b: case LoongArch::BI__builtin_lsx_vshuf4i_d: case LoongArch::BI__builtin_lsx_vextrins_b: @@ -4695,61 +4690,61 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lsx_vextrins_w: case LoongArch::BI__builtin_lsx_vextrins_d: case LoongArch::BI__builtin_lsx_vpermi_w: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); + return BuiltinConstantArgRange(TheCall, 2, 0, 255); case LoongArch::BI__builtin_lsx_vpickve2gr_b: case LoongArch::BI__builtin_lsx_vpickve2gr_bu: case LoongArch::BI__builtin_lsx_vreplvei_b: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + return BuiltinConstantArgRange(TheCall, 1, 0, 15); case LoongArch::BI__builtin_lsx_vinsgr2vr_b: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + return BuiltinConstantArgRange(TheCall, 2, 0, 15); case LoongArch::BI__builtin_lsx_vpickve2gr_h: case LoongArch::BI__builtin_lsx_vpickve2gr_hu: case LoongArch::BI__builtin_lsx_vreplvei_h: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + return BuiltinConstantArgRange(TheCall, 1, 0, 7); case LoongArch::BI__builtin_lsx_vinsgr2vr_h: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + return BuiltinConstantArgRange(TheCall, 2, 0, 7); case LoongArch::BI__builtin_lsx_vpickve2gr_w: case LoongArch::BI__builtin_lsx_vpickve2gr_wu: case LoongArch::BI__builtin_lsx_vreplvei_w: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + return BuiltinConstantArgRange(TheCall, 1, 0, 3); case LoongArch::BI__builtin_lsx_vinsgr2vr_w: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + return BuiltinConstantArgRange(TheCall, 2, 0, 3); case LoongArch::BI__builtin_lsx_vpickve2gr_d: case LoongArch::BI__builtin_lsx_vpickve2gr_du: case LoongArch::BI__builtin_lsx_vreplvei_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + return BuiltinConstantArgRange(TheCall, 1, 0, 1); case LoongArch::BI__builtin_lsx_vinsgr2vr_d: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); + return BuiltinConstantArgRange(TheCall, 2, 0, 1); case LoongArch::BI__builtin_lsx_vstelm_b: - return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + return BuiltinConstantArgRange(TheCall, 2, -128, 127) || + BuiltinConstantArgRange(TheCall, 3, 0, 15); case LoongArch::BI__builtin_lsx_vstelm_h: - return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + return BuiltinConstantArgRange(TheCall, 2, -256, 254) || + BuiltinConstantArgRange(TheCall, 3, 0, 7); case LoongArch::BI__builtin_lsx_vstelm_w: - return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + return BuiltinConstantArgRange(TheCall, 2, -512, 508) || + BuiltinConstantArgRange(TheCall, 3, 0, 3); case LoongArch::BI__builtin_lsx_vstelm_d: - return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); + return BuiltinConstantArgRange(TheCall, 2, -1024, 1016) || + BuiltinConstantArgRange(TheCall, 3, 0, 1); case LoongArch::BI__builtin_lsx_vldrepl_b: case LoongArch::BI__builtin_lsx_vld: - return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); + return BuiltinConstantArgRange(TheCall, 1, -2048, 2047); case LoongArch::BI__builtin_lsx_vldrepl_h: - return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); + return BuiltinConstantArgRange(TheCall, 1, -2048, 2046); case LoongArch::BI__builtin_lsx_vldrepl_w: - return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); + return BuiltinConstantArgRange(TheCall, 1, -2048, 2044); case LoongArch::BI__builtin_lsx_vldrepl_d: - return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); + return BuiltinConstantArgRange(TheCall, 1, -2048, 2040); case LoongArch::BI__builtin_lsx_vst: - return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + return BuiltinConstantArgRange(TheCall, 2, -2048, 2047); case LoongArch::BI__builtin_lsx_vldi: - return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); + return BuiltinConstantArgRange(TheCall, 0, -4096, 4095); case LoongArch::BI__builtin_lsx_vrepli_b: case LoongArch::BI__builtin_lsx_vrepli_h: case LoongArch::BI__builtin_lsx_vrepli_w: case LoongArch::BI__builtin_lsx_vrepli_d: - return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + return BuiltinConstantArgRange(TheCall, 0, -512, 511); // LASX intrinsics. case LoongArch::BI__builtin_lasx_xvbitclri_b: @@ -4765,7 +4760,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: case LoongArch::BI__builtin_lasx_xvrotri_b: case LoongArch::BI__builtin_lasx_xvsrlri_b: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + return BuiltinConstantArgRange(TheCall, 1, 0, 7); case LoongArch::BI__builtin_lasx_xvbitclri_h: case LoongArch::BI__builtin_lasx_xvbitrevi_h: case LoongArch::BI__builtin_lasx_xvbitseti_h: @@ -4779,7 +4774,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: case LoongArch::BI__builtin_lasx_xvrotri_h: case LoongArch::BI__builtin_lasx_xvsrlri_h: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + return BuiltinConstantArgRange(TheCall, 1, 0, 15); case LoongArch::BI__builtin_lasx_xvssrarni_b_h: case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: case LoongArch::BI__builtin_lasx_xvssrani_b_h: @@ -4792,7 +4787,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: case LoongArch::BI__builtin_lasx_xvsrani_b_h: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + return BuiltinConstantArgRange(TheCall, 2, 0, 15); case LoongArch::BI__builtin_lasx_xvslei_bu: case LoongArch::BI__builtin_lasx_xvslei_hu: case LoongArch::BI__builtin_lasx_xvslei_wu: @@ -4832,7 +4827,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvsubi_du: case LoongArch::BI__builtin_lasx_xvbsrl_v: case LoongArch::BI__builtin_lasx_xvbsll_v: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + return BuiltinConstantArgRange(TheCall, 1, 0, 31); case LoongArch::BI__builtin_lasx_xvssrarni_h_w: case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: case LoongArch::BI__builtin_lasx_xvssrani_h_w: @@ -4847,7 +4842,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + return BuiltinConstantArgRange(TheCall, 2, 0, 31); case LoongArch::BI__builtin_lasx_xvbitclri_d: case LoongArch::BI__builtin_lasx_xvbitrevi_d: case LoongArch::BI__builtin_lasx_xvbitseti_d: @@ -4859,7 +4854,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvsrari_d: case LoongArch::BI__builtin_lasx_xvrotri_d: case LoongArch::BI__builtin_lasx_xvsrlri_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); + return BuiltinConstantArgRange(TheCall, 1, 0, 63); case LoongArch::BI__builtin_lasx_xvssrarni_w_d: case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: case LoongArch::BI__builtin_lasx_xvssrani_w_d: @@ -4872,7 +4867,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: case LoongArch::BI__builtin_lasx_xvsrani_w_d: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); + return BuiltinConstantArgRange(TheCall, 2, 0, 63); case LoongArch::BI__builtin_lasx_xvssrarni_d_q: case LoongArch::BI__builtin_lasx_xvssrarni_du_q: case LoongArch::BI__builtin_lasx_xvssrani_d_q: @@ -4885,7 +4880,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvsrani_d_q: case LoongArch::BI__builtin_lasx_xvsrlni_d_q: case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); + return BuiltinConstantArgRange(TheCall, 2, 0, 127); case LoongArch::BI__builtin_lasx_xvseqi_b: case LoongArch::BI__builtin_lasx_xvseqi_h: case LoongArch::BI__builtin_lasx_xvseqi_w: @@ -4906,7 +4901,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvmini_h: case LoongArch::BI__builtin_lasx_xvmini_w: case LoongArch::BI__builtin_lasx_xvmini_d: - return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); + return BuiltinConstantArgRange(TheCall, 1, -16, 15); case LoongArch::BI__builtin_lasx_xvandi_b: case LoongArch::BI__builtin_lasx_xvnori_b: case LoongArch::BI__builtin_lasx_xvori_b: @@ -4915,7 +4910,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvshuf4i_w: case LoongArch::BI__builtin_lasx_xvxori_b: case LoongArch::BI__builtin_lasx_xvpermi_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); + return BuiltinConstantArgRange(TheCall, 1, 0, 255); case LoongArch::BI__builtin_lasx_xvbitseli_b: case LoongArch::BI__builtin_lasx_xvshuf4i_d: case LoongArch::BI__builtin_lasx_xvextrins_b: @@ -4924,59 +4919,59 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, case LoongArch::BI__builtin_lasx_xvextrins_d: case LoongArch::BI__builtin_lasx_xvpermi_q: case LoongArch::BI__builtin_lasx_xvpermi_w: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); + return BuiltinConstantArgRange(TheCall, 2, 0, 255); case LoongArch::BI__builtin_lasx_xvrepl128vei_b: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + return BuiltinConstantArgRange(TheCall, 1, 0, 15); case LoongArch::BI__builtin_lasx_xvrepl128vei_h: case LoongArch::BI__builtin_lasx_xvpickve2gr_w: case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: case LoongArch::BI__builtin_lasx_xvpickve_w_f: case LoongArch::BI__builtin_lasx_xvpickve_w: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + return BuiltinConstantArgRange(TheCall, 1, 0, 7); case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: case LoongArch::BI__builtin_lasx_xvinsve0_w: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + return BuiltinConstantArgRange(TheCall, 2, 0, 7); case LoongArch::BI__builtin_lasx_xvrepl128vei_w: case LoongArch::BI__builtin_lasx_xvpickve2gr_d: case LoongArch::BI__builtin_lasx_xvpickve2gr_du: case LoongArch::BI__builtin_lasx_xvpickve_d_f: case LoongArch::BI__builtin_lasx_xvpickve_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + return BuiltinConstantArgRange(TheCall, 1, 0, 3); case LoongArch::BI__builtin_lasx_xvinsve0_d: case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + return BuiltinConstantArgRange(TheCall, 2, 0, 3); case LoongArch::BI__builtin_lasx_xvstelm_b: - return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); + return BuiltinConstantArgRange(TheCall, 2, -128, 127) || + BuiltinConstantArgRange(TheCall, 3, 0, 31); case LoongArch::BI__builtin_lasx_xvstelm_h: - return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + return BuiltinConstantArgRange(TheCall, 2, -256, 254) || + BuiltinConstantArgRange(TheCall, 3, 0, 15); case LoongArch::BI__builtin_lasx_xvstelm_w: - return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + return BuiltinConstantArgRange(TheCall, 2, -512, 508) || + BuiltinConstantArgRange(TheCall, 3, 0, 7); case LoongArch::BI__builtin_lasx_xvstelm_d: - return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || - SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + return BuiltinConstantArgRange(TheCall, 2, -1024, 1016) || + BuiltinConstantArgRange(TheCall, 3, 0, 3); case LoongArch::BI__builtin_lasx_xvrepl128vei_d: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + return BuiltinConstantArgRange(TheCall, 1, 0, 1); case LoongArch::BI__builtin_lasx_xvldrepl_b: case LoongArch::BI__builtin_lasx_xvld: - return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); + return BuiltinConstantArgRange(TheCall, 1, -2048, 2047); case LoongArch::BI__builtin_lasx_xvldrepl_h: - return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); + return BuiltinConstantArgRange(TheCall, 1, -2048, 2046); case LoongArch::BI__builtin_lasx_xvldrepl_w: - return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); + return BuiltinConstantArgRange(TheCall, 1, -2048, 2044); case LoongArch::BI__builtin_lasx_xvldrepl_d: - return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); + return BuiltinConstantArgRange(TheCall, 1, -2048, 2040); case LoongArch::BI__builtin_lasx_xvst: - return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + return BuiltinConstantArgRange(TheCall, 2, -2048, 2047); case LoongArch::BI__builtin_lasx_xvldi: - return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); + return BuiltinConstantArgRange(TheCall, 0, -4096, 4095); case LoongArch::BI__builtin_lasx_xvrepli_b: case LoongArch::BI__builtin_lasx_xvrepli_h: case LoongArch::BI__builtin_lasx_xvrepli_w: case LoongArch::BI__builtin_lasx_xvrepli_d: - return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + return BuiltinConstantArgRange(TheCall, 0, -512, 511); } return false; } @@ -5192,10 +5187,10 @@ bool Sema::CheckMipsBuiltinArgument(unsigned BuiltinID, CallExpr *TheCall) { } if (!m) - return SemaBuiltinConstantArgRange(TheCall, i, l, u); + return BuiltinConstantArgRange(TheCall, i, l, u); - return SemaBuiltinConstantArgRange(TheCall, i, l, u) || - SemaBuiltinConstantArgMultiple(TheCall, i, m); + return BuiltinConstantArgRange(TheCall, i, l, u) || + BuiltinConstantArgMultiple(TheCall, i, m); } /// DecodePPCMMATypeFromStr - This decodes one PPC MMA type descriptor from Str, @@ -5285,7 +5280,6 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { case PPC::BI__builtin_ppc_fetch_and_andlp: case PPC::BI__builtin_ppc_fetch_and_orlp: case PPC::BI__builtin_ppc_fetch_and_swaplp: - case PPC::BI__builtin_ppc_rldimi: return true; } return false; @@ -5295,7 +5289,7 @@ static bool isPPC_64Builtin(unsigned BuiltinID) { /// number of 0s on either side. The 1s are allowed to wrap from LSB to MSB, so /// 0x000FFF0, 0x0000FFFF, 0xFF0000FF, 0x0 are all runs. 0x0F0F0000 is not, /// since all 1s are not contiguous. -bool Sema::SemaValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) { +bool Sema::ValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) { llvm::APSInt Result; // We can't check the value of a dependent argument. Expr *Arg = TheCall->getArg(ArgNum); @@ -5303,7 +5297,7 @@ bool Sema::SemaValueIsRunOfOnes(CallExpr *TheCall, unsigned ArgNum) { return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; // Check contiguous run of 1s, 0xFF0000FF is also a run of 1s. @@ -5329,27 +5323,27 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, default: return false; case PPC::BI__builtin_altivec_crypto_vshasigmaw: case PPC::BI__builtin_altivec_crypto_vshasigmad: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + return BuiltinConstantArgRange(TheCall, 1, 0, 1) || + BuiltinConstantArgRange(TheCall, 2, 0, 15); case PPC::BI__builtin_altivec_dss: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3); + return BuiltinConstantArgRange(TheCall, 0, 0, 3); case PPC::BI__builtin_tbegin: case PPC::BI__builtin_tend: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 1); + return BuiltinConstantArgRange(TheCall, 0, 0, 1); case PPC::BI__builtin_tsr: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 7); + return BuiltinConstantArgRange(TheCall, 0, 0, 7); case PPC::BI__builtin_tabortwc: case PPC::BI__builtin_tabortdc: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31); + return BuiltinConstantArgRange(TheCall, 0, 0, 31); case PPC::BI__builtin_tabortwci: case PPC::BI__builtin_tabortdci: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + return BuiltinConstantArgRange(TheCall, 0, 0, 31) || + BuiltinConstantArgRange(TheCall, 2, 0, 31); // According to GCC 'Basic PowerPC Built-in Functions Available on ISA 2.05', // __builtin_(un)pack_longdouble are available only if long double uses IBM // extended double representation. case PPC::BI__builtin_unpack_longdouble: - if (SemaBuiltinConstantArgRange(TheCall, 1, 0, 1)) + if (BuiltinConstantArgRange(TheCall, 1, 0, 1)) return true; [[fallthrough]]; case PPC::BI__builtin_pack_longdouble: @@ -5361,39 +5355,39 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case PPC::BI__builtin_altivec_dstt: case PPC::BI__builtin_altivec_dstst: case PPC::BI__builtin_altivec_dststt: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + return BuiltinConstantArgRange(TheCall, 2, 0, 3); case PPC::BI__builtin_vsx_xxpermdi: case PPC::BI__builtin_vsx_xxsldwi: - return SemaBuiltinVSX(TheCall); + return BuiltinVSX(TheCall); case PPC::BI__builtin_unpack_vector_int128: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + return BuiltinConstantArgRange(TheCall, 1, 0, 1); case PPC::BI__builtin_altivec_vgnb: - return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7); + return BuiltinConstantArgRange(TheCall, 1, 2, 7); case PPC::BI__builtin_vsx_xxeval: - return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255); + return BuiltinConstantArgRange(TheCall, 3, 0, 255); case PPC::BI__builtin_altivec_vsldbi: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + return BuiltinConstantArgRange(TheCall, 2, 0, 7); case PPC::BI__builtin_altivec_vsrdbi: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + return BuiltinConstantArgRange(TheCall, 2, 0, 7); case PPC::BI__builtin_vsx_xxpermx: - return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + return BuiltinConstantArgRange(TheCall, 3, 0, 7); case PPC::BI__builtin_ppc_tw: case PPC::BI__builtin_ppc_tdw: - return SemaBuiltinConstantArgRange(TheCall, 2, 1, 31); + return BuiltinConstantArgRange(TheCall, 2, 1, 31); case PPC::BI__builtin_ppc_cmprb: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 1); + return BuiltinConstantArgRange(TheCall, 0, 0, 1); // For __rlwnm, __rlwimi and __rldimi, the last parameter mask must // be a constant that represents a contiguous bit field. case PPC::BI__builtin_ppc_rlwnm: - return SemaValueIsRunOfOnes(TheCall, 2); + return ValueIsRunOfOnes(TheCall, 2); case PPC::BI__builtin_ppc_rlwimi: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31) || - SemaValueIsRunOfOnes(TheCall, 3); + return BuiltinConstantArgRange(TheCall, 2, 0, 31) || + ValueIsRunOfOnes(TheCall, 3); case PPC::BI__builtin_ppc_rldimi: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63) || - SemaValueIsRunOfOnes(TheCall, 3); + return BuiltinConstantArgRange(TheCall, 2, 0, 63) || + ValueIsRunOfOnes(TheCall, 3); case PPC::BI__builtin_ppc_addex: { - if (SemaBuiltinConstantArgRange(TheCall, 2, 0, 3)) + if (BuiltinConstantArgRange(TheCall, 2, 0, 3)) return true; // Output warning for reserved values 1 to 3. int ArgValue = @@ -5405,29 +5399,29 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, } case PPC::BI__builtin_ppc_mtfsb0: case PPC::BI__builtin_ppc_mtfsb1: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31); + return BuiltinConstantArgRange(TheCall, 0, 0, 31); case PPC::BI__builtin_ppc_mtfsf: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 255); + return BuiltinConstantArgRange(TheCall, 0, 0, 255); case PPC::BI__builtin_ppc_mtfsfi: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 7) || - SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + return BuiltinConstantArgRange(TheCall, 0, 0, 7) || + BuiltinConstantArgRange(TheCall, 1, 0, 15); case PPC::BI__builtin_ppc_alignx: - return SemaBuiltinConstantArgPower2(TheCall, 0); + return BuiltinConstantArgPower2(TheCall, 0); case PPC::BI__builtin_ppc_rdlam: - return SemaValueIsRunOfOnes(TheCall, 2); + return ValueIsRunOfOnes(TheCall, 2); case PPC::BI__builtin_vsx_ldrmb: case PPC::BI__builtin_vsx_strmb: - return SemaBuiltinConstantArgRange(TheCall, 1, 1, 16); + return BuiltinConstantArgRange(TheCall, 1, 1, 16); case PPC::BI__builtin_altivec_vcntmbb: case PPC::BI__builtin_altivec_vcntmbh: case PPC::BI__builtin_altivec_vcntmbw: case PPC::BI__builtin_altivec_vcntmbd: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + return BuiltinConstantArgRange(TheCall, 1, 0, 1); case PPC::BI__builtin_vsx_xxgenpcvbm: case PPC::BI__builtin_vsx_xxgenpcvhm: case PPC::BI__builtin_vsx_xxgenpcvwm: case PPC::BI__builtin_vsx_xxgenpcvdm: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + return BuiltinConstantArgRange(TheCall, 1, 0, 3); case PPC::BI__builtin_ppc_test_data_class: { // Check if the first argument of the __builtin_ppc_test_data_class call is // valid. The argument must be 'float' or 'double' or '__float128'. @@ -5437,7 +5431,7 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, ArgType != QualType(Context.Float128Ty)) return Diag(TheCall->getBeginLoc(), diag::err_ppc_invalid_test_data_class_type); - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 127); + return BuiltinConstantArgRange(TheCall, 1, 0, 127); } case PPC::BI__builtin_ppc_maxfe: case PPC::BI__builtin_ppc_minfe: @@ -5466,12 +5460,12 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, << TheCall->getArg(I)->getType() << ArgType << 1 << 0 << 0; return false; } -#define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \ +#define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \ case PPC::BI__builtin_##Name: \ - return SemaBuiltinPPCMMACall(TheCall, BuiltinID, Types); + return BuiltinPPCMMACall(TheCall, BuiltinID, Types); #include "clang/Basic/BuiltinsPPC.def" } - return SemaBuiltinConstantArgRange(TheCall, i, l, u); + return BuiltinConstantArgRange(TheCall, i, l, u); } // Check if the given type is a non-pointer PPC MMA type. This function is used @@ -5611,6 +5605,7 @@ void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall, // returning an ExprError bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { switch (BuiltinID) { + case Builtin::BI__builtin_hlsl_elementwise_all: case Builtin::BI__builtin_hlsl_elementwise_any: { if (checkArgCount(*this, TheCall, 1)) return true; @@ -5621,7 +5616,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; if (CheckVectorElementCallArgs(this, TheCall)) return true; - if (SemaBuiltinElementwiseTernaryMath( + if (BuiltinElementwiseTernaryMath( TheCall, /*CheckForFloatArgs*/ TheCall->getArg(0)->getType()->hasFloatingRepresentation())) return true; @@ -5632,7 +5627,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; if (CheckVectorElementCallArgs(this, TheCall)) return true; - if (SemaBuiltinVectorToScalarMath(TheCall)) + if (BuiltinVectorToScalarMath(TheCall)) return true; if (CheckNoDoubleVectors(this, TheCall)) return true; @@ -5666,7 +5661,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; if (CheckVectorElementCallArgs(this, TheCall)) return true; - if (SemaBuiltinElementwiseTernaryMath(TheCall)) + if (BuiltinElementwiseTernaryMath(TheCall)) return true; if (CheckFloatOrHalfRepresentations(this, TheCall)) return true; @@ -5677,7 +5672,7 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; if (CheckVectorElementCallArgs(this, TheCall)) return true; - if (SemaBuiltinElementwiseTernaryMath( + if (BuiltinElementwiseTernaryMath( TheCall, /*CheckForFloatArgs*/ TheCall->getArg(0)->getType()->hasFloatingRepresentation())) return true; @@ -5784,7 +5779,7 @@ bool Sema::CheckRISCVLMUL(CallExpr *TheCall, unsigned ArgNum) { return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; int64_t Val = Result.getSExtValue(); @@ -5894,10 +5889,10 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, switch (BuiltinID) { case RISCVVector::BI__builtin_rvv_vsetvli: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3) || + return BuiltinConstantArgRange(TheCall, 1, 0, 3) || CheckRISCVLMUL(TheCall, 2); case RISCVVector::BI__builtin_rvv_vsetvlimax: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || + return BuiltinConstantArgRange(TheCall, 0, 0, 3) || CheckRISCVLMUL(TheCall, 1); case RISCVVector::BI__builtin_rvv_vget_v: { ASTContext::BuiltinVectorTypeInfo ResVecInfo = @@ -5912,7 +5907,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, else // vget for non-tuple type MaxIndex = (VecInfo.EC.getKnownMinValue() * VecInfo.NumVectors) / (ResVecInfo.EC.getKnownMinValue() * ResVecInfo.NumVectors); - return SemaBuiltinConstantArgRange(TheCall, 1, 0, MaxIndex - 1); + return BuiltinConstantArgRange(TheCall, 1, 0, MaxIndex - 1); } case RISCVVector::BI__builtin_rvv_vset_v: { ASTContext::BuiltinVectorTypeInfo ResVecInfo = @@ -5927,7 +5922,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, else // vset fo non-tuple type MaxIndex = (ResVecInfo.EC.getKnownMinValue() * ResVecInfo.NumVectors) / (VecInfo.EC.getKnownMinValue() * VecInfo.NumVectors); - return SemaBuiltinConstantArgRange(TheCall, 1, 0, MaxIndex - 1); + return BuiltinConstantArgRange(TheCall, 1, 0, MaxIndex - 1); } // Vector Crypto case RISCVVector::BI__builtin_rvv_vaeskf1_vi_tu: @@ -5938,19 +5933,19 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, QualType Op2Type = TheCall->getArg(1)->getType(); return CheckInvalidVLENandLMUL(TI, TheCall, *this, Op1Type, 128) || CheckInvalidVLENandLMUL(TI, TheCall, *this, Op2Type, 128) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + BuiltinConstantArgRange(TheCall, 2, 0, 31); } case RISCVVector::BI__builtin_rvv_vsm3c_vi_tu: case RISCVVector::BI__builtin_rvv_vsm3c_vi: { QualType Op1Type = TheCall->getArg(0)->getType(); return CheckInvalidVLENandLMUL(TI, TheCall, *this, Op1Type, 256) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + BuiltinConstantArgRange(TheCall, 2, 0, 31); } case RISCVVector::BI__builtin_rvv_vaeskf1_vi: case RISCVVector::BI__builtin_rvv_vsm4k_vi: { QualType Op1Type = TheCall->getArg(0)->getType(); return CheckInvalidVLENandLMUL(TI, TheCall, *this, Op1Type, 128) || - SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + BuiltinConstantArgRange(TheCall, 1, 0, 31); } case RISCVVector::BI__builtin_rvv_vaesdf_vv: case RISCVVector::BI__builtin_rvv_vaesdf_vs: @@ -6003,27 +5998,27 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_sf_vc_i_se: // bit_27_26, bit_24_20, bit_11_7, simm5, sew, log2lmul - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || - SemaBuiltinConstantArgRange(TheCall, 1, 0, 31) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 31) || - SemaBuiltinConstantArgRange(TheCall, 3, -16, 15) || + return BuiltinConstantArgRange(TheCall, 0, 0, 3) || + BuiltinConstantArgRange(TheCall, 1, 0, 31) || + BuiltinConstantArgRange(TheCall, 2, 0, 31) || + BuiltinConstantArgRange(TheCall, 3, -16, 15) || CheckRISCVLMUL(TheCall, 5); case RISCVVector::BI__builtin_rvv_sf_vc_iv_se: // bit_27_26, bit_11_7, vs2, simm5 - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || - SemaBuiltinConstantArgRange(TheCall, 1, 0, 31) || - SemaBuiltinConstantArgRange(TheCall, 3, -16, 15); + return BuiltinConstantArgRange(TheCall, 0, 0, 3) || + BuiltinConstantArgRange(TheCall, 1, 0, 31) || + BuiltinConstantArgRange(TheCall, 3, -16, 15); case RISCVVector::BI__builtin_rvv_sf_vc_v_i: case RISCVVector::BI__builtin_rvv_sf_vc_v_i_se: // bit_27_26, bit_24_20, simm5 - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || - SemaBuiltinConstantArgRange(TheCall, 1, 0, 31) || - SemaBuiltinConstantArgRange(TheCall, 2, -16, 15); + return BuiltinConstantArgRange(TheCall, 0, 0, 3) || + BuiltinConstantArgRange(TheCall, 1, 0, 31) || + BuiltinConstantArgRange(TheCall, 2, -16, 15); case RISCVVector::BI__builtin_rvv_sf_vc_v_iv: case RISCVVector::BI__builtin_rvv_sf_vc_v_iv_se: // bit_27_26, vs2, simm5 - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || - SemaBuiltinConstantArgRange(TheCall, 2, -16, 15); + return BuiltinConstantArgRange(TheCall, 0, 0, 3) || + BuiltinConstantArgRange(TheCall, 2, -16, 15); case RISCVVector::BI__builtin_rvv_sf_vc_ivv_se: case RISCVVector::BI__builtin_rvv_sf_vc_ivw_se: case RISCVVector::BI__builtin_rvv_sf_vc_v_ivv: @@ -6031,13 +6026,13 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_sf_vc_v_ivv_se: case RISCVVector::BI__builtin_rvv_sf_vc_v_ivw_se: // bit_27_26, vd, vs2, simm5 - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || - SemaBuiltinConstantArgRange(TheCall, 3, -16, 15); + return BuiltinConstantArgRange(TheCall, 0, 0, 3) || + BuiltinConstantArgRange(TheCall, 3, -16, 15); case RISCVVector::BI__builtin_rvv_sf_vc_x_se: // bit_27_26, bit_24_20, bit_11_7, xs1, sew, log2lmul - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || - SemaBuiltinConstantArgRange(TheCall, 1, 0, 31) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 31) || + return BuiltinConstantArgRange(TheCall, 0, 0, 3) || + BuiltinConstantArgRange(TheCall, 1, 0, 31) || + BuiltinConstantArgRange(TheCall, 2, 0, 31) || CheckRISCVLMUL(TheCall, 5); case RISCVVector::BI__builtin_rvv_sf_vc_xv_se: case RISCVVector::BI__builtin_rvv_sf_vc_vv_se: @@ -6045,8 +6040,8 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_sf_vc_v_x: case RISCVVector::BI__builtin_rvv_sf_vc_v_x_se: // bit_27_26, bit_24-20, xs1 - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || - SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + return BuiltinConstantArgRange(TheCall, 0, 0, 3) || + BuiltinConstantArgRange(TheCall, 1, 0, 31); case RISCVVector::BI__builtin_rvv_sf_vc_vvv_se: case RISCVVector::BI__builtin_rvv_sf_vc_xvv_se: case RISCVVector::BI__builtin_rvv_sf_vc_vvw_se: @@ -6066,11 +6061,11 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_sf_vc_v_xvw_se: case RISCVVector::BI__builtin_rvv_sf_vc_v_vvw_se: // bit_27_26, vd, vs2, xs1/vs1 - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3); + return BuiltinConstantArgRange(TheCall, 0, 0, 3); case RISCVVector::BI__builtin_rvv_sf_vc_fv_se: // bit_26, bit_11_7, vs2, fs1 - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 1) || - SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + return BuiltinConstantArgRange(TheCall, 0, 0, 1) || + BuiltinConstantArgRange(TheCall, 1, 0, 31); case RISCVVector::BI__builtin_rvv_sf_vc_fvv_se: case RISCVVector::BI__builtin_rvv_sf_vc_fvw_se: case RISCVVector::BI__builtin_rvv_sf_vc_v_fvv: @@ -6081,7 +6076,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_sf_vc_v_fv: case RISCVVector::BI__builtin_rvv_sf_vc_v_fv_se: // bit_26, vs2, fs1 - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 1); + return BuiltinConstantArgRange(TheCall, 0, 0, 1); // Check if byteselect is in [0, 3] case RISCV::BI__builtin_riscv_aes32dsi: case RISCV::BI__builtin_riscv_aes32dsmi: @@ -6089,10 +6084,10 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCV::BI__builtin_riscv_aes32esmi: case RISCV::BI__builtin_riscv_sm4ks: case RISCV::BI__builtin_riscv_sm4ed: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + return BuiltinConstantArgRange(TheCall, 2, 0, 3); // Check if rnum is in [0, 10] case RISCV::BI__builtin_riscv_aes64ks1i: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 10); + return BuiltinConstantArgRange(TheCall, 1, 0, 10); // Check if value range for vxrm is in [0, 3] case RISCVVector::BI__builtin_rvv_vaaddu_vv: case RISCVVector::BI__builtin_rvv_vaaddu_vx: @@ -6112,7 +6107,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_vnclip_wx: case RISCVVector::BI__builtin_rvv_vnclipu_wv: case RISCVVector::BI__builtin_rvv_vnclipu_wx: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + return BuiltinConstantArgRange(TheCall, 2, 0, 3); case RISCVVector::BI__builtin_rvv_vaaddu_vv_tu: case RISCVVector::BI__builtin_rvv_vaaddu_vx_tu: case RISCVVector::BI__builtin_rvv_vaadd_vv_tu: @@ -6149,7 +6144,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_vnclip_wx_m: case RISCVVector::BI__builtin_rvv_vnclipu_wv_m: case RISCVVector::BI__builtin_rvv_vnclipu_wx_m: - return SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + return BuiltinConstantArgRange(TheCall, 3, 0, 3); case RISCVVector::BI__builtin_rvv_vaaddu_vv_tum: case RISCVVector::BI__builtin_rvv_vaaddu_vv_tumu: case RISCVVector::BI__builtin_rvv_vaaddu_vv_mu: @@ -6204,7 +6199,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_vnclip_wx_tumu: case RISCVVector::BI__builtin_rvv_vnclipu_wv_tumu: case RISCVVector::BI__builtin_rvv_vnclipu_wx_tumu: - return SemaBuiltinConstantArgRange(TheCall, 4, 0, 3); + return BuiltinConstantArgRange(TheCall, 4, 0, 3); case RISCVVector::BI__builtin_rvv_vfsqrt_v_rm: case RISCVVector::BI__builtin_rvv_vfrec7_v_rm: case RISCVVector::BI__builtin_rvv_vfcvt_x_f_v_rm: @@ -6218,7 +6213,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_vfncvt_f_x_w_rm: case RISCVVector::BI__builtin_rvv_vfncvt_f_xu_w_rm: case RISCVVector::BI__builtin_rvv_vfncvt_f_f_w_rm: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 4); + return BuiltinConstantArgRange(TheCall, 1, 0, 4); case RISCVVector::BI__builtin_rvv_vfadd_vv_rm: case RISCVVector::BI__builtin_rvv_vfadd_vf_rm: case RISCVVector::BI__builtin_rvv_vfsub_vv_rm: @@ -6269,7 +6264,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_vfncvt_f_x_w_rm_m: case RISCVVector::BI__builtin_rvv_vfncvt_f_xu_w_rm_m: case RISCVVector::BI__builtin_rvv_vfncvt_f_f_w_rm_m: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 4); + return BuiltinConstantArgRange(TheCall, 2, 0, 4); case RISCVVector::BI__builtin_rvv_vfadd_vv_rm_tu: case RISCVVector::BI__builtin_rvv_vfadd_vf_rm_tu: case RISCVVector::BI__builtin_rvv_vfsub_vv_rm_tu: @@ -6405,7 +6400,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_vfncvt_f_x_w_rm_mu: case RISCVVector::BI__builtin_rvv_vfncvt_f_xu_w_rm_mu: case RISCVVector::BI__builtin_rvv_vfncvt_f_f_w_rm_mu: - return SemaBuiltinConstantArgRange(TheCall, 3, 0, 4); + return BuiltinConstantArgRange(TheCall, 3, 0, 4); case RISCVVector::BI__builtin_rvv_vfmacc_vv_rm_m: case RISCVVector::BI__builtin_rvv_vfmacc_vf_rm_m: case RISCVVector::BI__builtin_rvv_vfnmacc_vv_rm_m: @@ -6566,7 +6561,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCVVector::BI__builtin_rvv_vfwmsac_vf_rm_mu: case RISCVVector::BI__builtin_rvv_vfwnmsac_vv_rm_mu: case RISCVVector::BI__builtin_rvv_vfwnmsac_vf_rm_mu: - return SemaBuiltinConstantArgRange(TheCall, 4, 0, 4); + return BuiltinConstantArgRange(TheCall, 4, 0, 4); case RISCV::BI__builtin_riscv_ntl_load: case RISCV::BI__builtin_riscv_ntl_store: DeclRefExpr *DRE = @@ -6586,7 +6581,7 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, // Domain value should be compile-time constant. // 2 <= domain <= 5 if (TheCall->getNumArgs() == NumArgs && - SemaBuiltinConstantArgRange(TheCall, NumArgs - 1, 2, 5)) + BuiltinConstantArgRange(TheCall, NumArgs - 1, 2, 5)) return true; Expr *PointerArg = TheCall->getArg(0); @@ -6670,8 +6665,8 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, case SystemZ::BI__builtin_s390_vfaezfs: i = 2; l = 0; u = 15; break; case SystemZ::BI__builtin_s390_vfisb: case SystemZ::BI__builtin_s390_vfidb: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15) || - SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + return BuiltinConstantArgRange(TheCall, 1, 0, 15) || + BuiltinConstantArgRange(TheCall, 2, 0, 15); case SystemZ::BI__builtin_s390_vftcisb: case SystemZ::BI__builtin_s390_vftcidb: i = 1; l = 0; u = 4095; break; case SystemZ::BI__builtin_s390_vlbb: i = 1; l = 0; u = 15; break; @@ -6702,7 +6697,7 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, case SystemZ::BI__builtin_s390_vcnf: i = 1; l = 0; u = 15; break; case SystemZ::BI__builtin_s390_vcrnfs: i = 2; l = 0; u = 15; break; } - return SemaBuiltinConstantArgRange(TheCall, i, l, u); + return BuiltinConstantArgRange(TheCall, i, l, u); } bool Sema::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI, @@ -7055,7 +7050,7 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; // Make sure rounding mode is either ROUND_CUR_DIRECTION or ROUND_NO_EXC bit @@ -7165,7 +7160,7 @@ bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; if (Result == 1 || Result == 2 || Result == 4 || Result == 8) @@ -7180,7 +7175,7 @@ enum { TileRegLow = 0, TileRegHigh = 7 }; bool Sema::CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, ArrayRef ArgNums) { for (int ArgNum : ArgNums) { - if (SemaBuiltinConstantArgRange(TheCall, ArgNum, TileRegLow, TileRegHigh)) + if (BuiltinConstantArgRange(TheCall, ArgNum, TileRegLow, TileRegHigh)) return true; } return false; @@ -7197,7 +7192,7 @@ bool Sema::CheckX86BuiltinTileDuplicate(CallExpr *TheCall, continue; llvm::APSInt Result; - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; int ArgExtValue = Result.getExtValue(); assert((ArgExtValue >= TileRegLow && ArgExtValue <= TileRegHigh) && @@ -7623,7 +7618,7 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, // template-generated or macro-generated dead code to potentially have out-of- // range values. These need to code generate, but don't need to necessarily // make any sense. We use a warning that defaults to an error. - return SemaBuiltinConstantArgRange(TheCall, i, l, u, /*RangeIsError*/ false); + return BuiltinConstantArgRange(TheCall, i, l, u, /*RangeIsError*/ false); } static bool checkIntelFPGARegArgument(Sema &S, QualType ArgType, @@ -7723,11 +7718,11 @@ bool Sema::CheckIntelFPGAMemBuiltinFunctionCall(CallExpr *TheCall) { // Second argument must be a constant integer llvm::APSInt Result; - if (SemaBuiltinConstantArg(TheCall, 1, Result)) + if (BuiltinConstantArg(TheCall, 1, Result)) return true; // Third argument (CacheSize) must be a non-negative constant integer - if (SemaBuiltinConstantArg(TheCall, 2, Result)) + if (BuiltinConstantArg(TheCall, 2, Result)) return true; if (Result < 0) return Diag(TheCall->getArg(2)->getBeginLoc(), @@ -7735,7 +7730,7 @@ bool Sema::CheckIntelFPGAMemBuiltinFunctionCall(CallExpr *TheCall) { // The last four optional arguments must be signed constant integers. for (unsigned I = MinNumArgs; I != NumArgs; ++I) { - if (SemaBuiltinConstantArg(TheCall, I, Result)) + if (BuiltinConstantArg(TheCall, I, Result)) return true; } @@ -7786,7 +7781,7 @@ bool Sema::CheckIntelSYCLPtrAnnotationBuiltinFunctionCall(unsigned BuiltinID, llvm::APSInt Result; for (; I != NumArgs; ++I) { // must be integer - if (SemaBuiltinConstantArg(TheCall, I, Result)) + if (BuiltinConstantArg(TheCall, I, Result)) return true; } @@ -8251,6 +8246,7 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, // For variadic functions, we may have more args than parameters. // For some K&R functions, we may have less args than parameters. const auto N = std::min(Proto->getNumParams(), Args.size()); + bool AnyScalableArgsOrRet = Proto->getReturnType()->isSizelessVectorType(); for (unsigned ArgIdx = 0; ArgIdx < N; ++ArgIdx) { // Args[ArgIdx] can be null in malformed code. if (const Expr *Arg = Args[ArgIdx]) { @@ -8264,6 +8260,8 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, checkAIXMemberAlignment((Arg->getExprLoc()), Arg); QualType ParamTy = Proto->getParamType(ArgIdx); + if (ParamTy->isSizelessVectorType()) + AnyScalableArgsOrRet = true; QualType ArgTy = Arg->getType(); CheckArgAlignment(Arg->getExprLoc(), FDecl, std::to_string(ArgIdx + 1), ArgTy, ParamTy); @@ -8284,6 +8282,23 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, } } + // If the call requires a streaming-mode change and has scalable vector + // arguments or return values, then warn the user that the streaming and + // non-streaming vector lengths may be different. + const auto *CallerFD = dyn_cast(CurContext); + if (CallerFD && (!FD || !FD->getBuiltinID()) && AnyScalableArgsOrRet) { + bool IsCalleeStreaming = + ExtInfo.AArch64SMEAttributes & FunctionType::SME_PStateSMEnabledMask; + bool IsCalleeStreamingCompatible = + ExtInfo.AArch64SMEAttributes & + FunctionType::SME_PStateSMCompatibleMask; + ArmStreamingType CallerFnType = getArmStreamingFnType(CallerFD); + if (!IsCalleeStreamingCompatible && + (CallerFnType == ArmStreamingCompatible || + ((CallerFnType == ArmStreaming) ^ IsCalleeStreaming))) + Diag(Loc, diag::warn_sme_streaming_pass_return_vl_to_non_streaming); + } + FunctionType::ArmStateValue CalleeArmZAState = FunctionType::getArmZAState(ExtInfo.AArch64SMEAttributes); FunctionType::ArmStateValue CalleeArmZT0State = @@ -8292,7 +8307,7 @@ void Sema::checkCall(NamedDecl *FDecl, const FunctionProtoType *Proto, CalleeArmZT0State != FunctionType::ARM_None) { bool CallerHasZAState = false; bool CallerHasZT0State = false; - if (const auto *CallerFD = dyn_cast(CurContext)) { + if (CallerFD) { auto *Attr = CallerFD->getAttr(); if (Attr && Attr->isNewZA()) CallerHasZAState = true; @@ -8553,8 +8568,8 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) { } } -ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult, - AtomicExpr::AtomicOp Op) { +ExprResult Sema::AtomicOpsOverloaded(ExprResult TheCallResult, + AtomicExpr::AtomicOp Op) { CallExpr *TheCall = cast(TheCallResult.get()); DeclRefExpr *DRE =cast(TheCall->getCallee()->IgnoreParenCasts()); MultiExprArg Args{TheCall->getArgs(), TheCall->getNumArgs()}; @@ -9183,8 +9198,7 @@ bool Sema::BuiltinWasmRefNullFunc(CallExpr *TheCall) { /// /// This function goes through and does final semantic checking for these /// builtins, as well as generating any warnings. -ExprResult -Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) { +ExprResult Sema::BuiltinAtomicOverloaded(ExprResult TheCallResult) { CallExpr *TheCall = static_cast(TheCallResult.get()); Expr *Callee = TheCall->getCallee(); DeclRefExpr *DRE = cast(Callee->IgnoreParenCasts()); @@ -9555,13 +9569,13 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) { return TheCallResult; } -/// SemaBuiltinNontemporalOverloaded - We have a call to +/// BuiltinNontemporalOverloaded - We have a call to /// __builtin_nontemporal_store or __builtin_nontemporal_load, which is an /// overloaded function based on the pointer type of its last argument. /// /// This function goes through and does final semantic checking for these /// builtins. -ExprResult Sema::SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult) { +ExprResult Sema::BuiltinNontemporalOverloaded(ExprResult TheCallResult) { CallExpr *TheCall = (CallExpr *)TheCallResult.get(); DeclRefExpr *DRE = cast(TheCall->getCallee()->IgnoreParenCasts()); @@ -9762,7 +9776,7 @@ static bool checkVAStartIsInVariadicFunction(Sema &S, Expr *Fn, /// Check the arguments to '__builtin_va_start' or '__builtin_ms_va_start' /// for validity. Emit an error and return true on failure; return false /// on success. -bool Sema::SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall) { +bool Sema::BuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall) { Expr *Fn = TheCall->getCallee(); if (checkVAStartABI(*this, BuiltinID, Fn)) @@ -9836,7 +9850,7 @@ bool Sema::SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall) { return false; } -bool Sema::SemaBuiltinVAStartARMMicrosoft(CallExpr *Call) { +bool Sema::BuiltinVAStartARMMicrosoft(CallExpr *Call) { auto IsSuitablyTypedFormatArgument = [this](const Expr *Arg) -> bool { const LangOptions &LO = getLangOpts(); @@ -9899,9 +9913,9 @@ bool Sema::SemaBuiltinVAStartARMMicrosoft(CallExpr *Call) { return false; } -/// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and +/// BuiltinUnorderedCompare - Handle functions like __builtin_isgreater and /// friends. This is declared to take (...), so we have to check everything. -bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall, unsigned BuiltinID) { +bool Sema::BuiltinUnorderedCompare(CallExpr *TheCall, unsigned BuiltinID) { if (checkArgCount(*this, TheCall, 2)) return true; @@ -9941,11 +9955,11 @@ bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall, unsigned BuiltinID) { return false; } -/// SemaBuiltinSemaBuiltinFPClassification - Handle functions like +/// BuiltinSemaBuiltinFPClassification - Handle functions like /// __builtin_isnan and friends. This is declared to take (...), so we have /// to check everything. -bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs, - unsigned BuiltinID) { +bool Sema::BuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs, + unsigned BuiltinID) { if (checkArgCount(*this, TheCall, NumArgs)) return true; @@ -10013,7 +10027,7 @@ bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs, // __builtin_isfpclass has integer parameter that specify test mask. It is // passed in (...), so it should be analyzed completely here. if (IsFPClass) - if (SemaBuiltinConstantArgRange(TheCall, 1, 0, llvm::fcAllFlags)) + if (BuiltinConstantArgRange(TheCall, 1, 0, llvm::fcAllFlags)) return true; // TODO: enable this code to all classification functions. @@ -10030,7 +10044,7 @@ bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs, } /// Perform semantic analysis for a call to __builtin_complex. -bool Sema::SemaBuiltinComplex(CallExpr *TheCall) { +bool Sema::BuiltinComplex(CallExpr *TheCall) { if (checkArgCount(*this, TheCall, 2)) return true; @@ -10091,7 +10105,7 @@ bool Sema::SemaBuiltinComplex(CallExpr *TheCall) { // Example builtins are : // vector double vec_xxpermdi(vector double, vector double, int); // vector short vec_xxsldwi(vector short, vector short, int); -bool Sema::SemaBuiltinVSX(CallExpr *TheCall) { +bool Sema::BuiltinVSX(CallExpr *TheCall) { unsigned ExpectedNumArgs = 3; if (checkArgCount(*this, TheCall, ExpectedNumArgs)) return true; @@ -10133,9 +10147,9 @@ bool Sema::SemaBuiltinVSX(CallExpr *TheCall) { return false; } -/// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. +/// BuiltinShuffleVector - Handle __builtin_shufflevector. // This is declared to take (...), so we have to check everything. -ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { +ExprResult Sema::BuiltinShuffleVector(CallExpr *TheCall) { if (TheCall->getNumArgs() < 2) return ExprError(Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_at_least) @@ -10223,10 +10237,10 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { TheCall->getRParenLoc()); } -/// SemaConvertVectorExpr - Handle __builtin_convertvector -ExprResult Sema::SemaConvertVectorExpr(Expr *E, TypeSourceInfo *TInfo, - SourceLocation BuiltinLoc, - SourceLocation RParenLoc) { +/// ConvertVectorExpr - Handle __builtin_convertvector +ExprResult Sema::ConvertVectorExpr(Expr *E, TypeSourceInfo *TInfo, + SourceLocation BuiltinLoc, + SourceLocation RParenLoc) { ExprValueKind VK = VK_PRValue; ExprObjectKind OK = OK_Ordinary; QualType DstTy = TInfo->getType(); @@ -10250,14 +10264,14 @@ ExprResult Sema::SemaConvertVectorExpr(Expr *E, TypeSourceInfo *TInfo, << E->getSourceRange()); } - return new (Context) - ConvertVectorExpr(E, TInfo, DstTy, VK, OK, BuiltinLoc, RParenLoc); + return new (Context) class ConvertVectorExpr(E, TInfo, DstTy, VK, OK, + BuiltinLoc, RParenLoc); } -/// SemaBuiltinPrefetch - Handle __builtin_prefetch. +/// BuiltinPrefetch - Handle __builtin_prefetch. // This is declared to take (const void*, ...) and can take two // optional constant int args. -bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { +bool Sema::BuiltinPrefetch(CallExpr *TheCall) { unsigned NumArgs = TheCall->getNumArgs(); if (checkArgCountAtMost(*this, TheCall, 3)) @@ -10266,14 +10280,14 @@ bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) { // Argument 0 is checked for us and the remaining arguments must be // constant integers. for (unsigned i = 1; i != NumArgs; ++i) - if (SemaBuiltinConstantArgRange(TheCall, i, 0, i == 1 ? 1 : 3)) + if (BuiltinConstantArgRange(TheCall, i, 0, i == 1 ? 1 : 3)) return true; return false; } -/// SemaBuiltinArithmeticFence - Handle __arithmetic_fence. -bool Sema::SemaBuiltinArithmeticFence(CallExpr *TheCall) { +/// BuiltinArithmeticFence - Handle __arithmetic_fence. +bool Sema::BuiltinArithmeticFence(CallExpr *TheCall) { if (!Context.getTargetInfo().checkArithmeticFenceSupported()) return Diag(TheCall->getBeginLoc(), diag::err_builtin_target_unsupported) << SourceRange(TheCall->getBeginLoc(), TheCall->getEndLoc()); @@ -10295,10 +10309,10 @@ bool Sema::SemaBuiltinArithmeticFence(CallExpr *TheCall) { return false; } -/// SemaBuiltinAssume - Handle __assume (MS Extension). +/// BuiltinAssume - Handle __assume (MS Extension). // __assume does not evaluate its arguments, and should warn if its argument // has side effects. -bool Sema::SemaBuiltinAssume(CallExpr *TheCall) { +bool Sema::BuiltinAssume(CallExpr *TheCall) { Expr *Arg = TheCall->getArg(0); if (Arg->isInstantiationDependent()) return false; @@ -10313,7 +10327,7 @@ bool Sema::SemaBuiltinAssume(CallExpr *TheCall) { /// Handle __builtin_alloca_with_align. This is declared /// as (size_t, size_t) where the second size_t must be a power of 2 greater /// than 8. -bool Sema::SemaBuiltinAllocaWithAlign(CallExpr *TheCall) { +bool Sema::BuiltinAllocaWithAlign(CallExpr *TheCall) { // The alignment must be a constant integer. Expr *Arg = TheCall->getArg(1); @@ -10346,7 +10360,7 @@ bool Sema::SemaBuiltinAllocaWithAlign(CallExpr *TheCall) { /// Handle __builtin_assume_aligned. This is declared /// as (const void*, size_t, ...) and can take one optional constant int arg. -bool Sema::SemaBuiltinAssumeAligned(CallExpr *TheCall) { +bool Sema::BuiltinAssumeAligned(CallExpr *TheCall) { if (checkArgCountRange(*this, TheCall, 2, 3)) return true; @@ -10368,7 +10382,7 @@ bool Sema::SemaBuiltinAssumeAligned(CallExpr *TheCall) { // We can't check the value of a dependent argument. if (!SecondArg->isValueDependent()) { llvm::APSInt Result; - if (SemaBuiltinConstantArg(TheCall, 1, Result)) + if (BuiltinConstantArg(TheCall, 1, Result)) return true; if (!Result.isPowerOf2()) @@ -10390,7 +10404,7 @@ bool Sema::SemaBuiltinAssumeAligned(CallExpr *TheCall) { return false; } -bool Sema::SemaBuiltinOSLogFormat(CallExpr *TheCall) { +bool Sema::BuiltinOSLogFormat(CallExpr *TheCall) { unsigned BuiltinID = cast(TheCall->getCalleeDecl())->getBuiltinID(); bool IsSizeCall = BuiltinID == Builtin::BI__builtin_os_log_format_buffer_size; @@ -10466,10 +10480,10 @@ bool Sema::SemaBuiltinOSLogFormat(CallExpr *TheCall) { return false; } -/// SemaBuiltinConstantArg - Handle a check if argument ArgNum of CallExpr +/// BuiltinConstantArg - Handle a check if argument ArgNum of CallExpr /// TheCall is a constant expression. -bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, - llvm::APSInt &Result) { +bool Sema::BuiltinConstantArg(CallExpr *TheCall, int ArgNum, + llvm::APSInt &Result) { Expr *Arg = TheCall->getArg(ArgNum); DeclRefExpr *DRE =cast(TheCall->getCallee()->IgnoreParenCasts()); FunctionDecl *FDecl = cast(DRE->getDecl()); @@ -10484,10 +10498,10 @@ bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum, return false; } -/// SemaBuiltinConstantArgRange - Handle a check if argument ArgNum of CallExpr +/// BuiltinConstantArgRange - Handle a check if argument ArgNum of CallExpr /// TheCall is a constant expression in the range [Low, High]. -bool Sema::SemaBuiltinConstantArgRange(CallExpr *TheCall, int ArgNum, - int Low, int High, bool RangeIsError) { +bool Sema::BuiltinConstantArgRange(CallExpr *TheCall, int ArgNum, int Low, + int High, bool RangeIsError) { if (isConstantEvaluatedContext()) return false; llvm::APSInt Result; @@ -10498,7 +10512,7 @@ bool Sema::SemaBuiltinConstantArgRange(CallExpr *TheCall, int ArgNum, return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; if (Result.getSExtValue() < Low || Result.getSExtValue() > High) { @@ -10517,10 +10531,10 @@ bool Sema::SemaBuiltinConstantArgRange(CallExpr *TheCall, int ArgNum, return false; } -/// SemaBuiltinConstantArgMultiple - Handle a check if argument ArgNum of CallExpr +/// BuiltinConstantArgMultiple - Handle a check if argument ArgNum of CallExpr /// TheCall is a constant expression is a multiple of Num.. -bool Sema::SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum, - unsigned Num) { +bool Sema::BuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum, + unsigned Num) { llvm::APSInt Result; // We can't check the value of a dependent argument. @@ -10529,7 +10543,7 @@ bool Sema::SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum, return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; if (Result.getSExtValue() % Num != 0) @@ -10539,9 +10553,9 @@ bool Sema::SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum, return false; } -/// SemaBuiltinConstantArgPower2 - Check if argument ArgNum of TheCall is a +/// BuiltinConstantArgPower2 - Check if argument ArgNum of TheCall is a /// constant expression representing a power of 2. -bool Sema::SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum) { +bool Sema::BuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum) { llvm::APSInt Result; // We can't check the value of a dependent argument. @@ -10550,7 +10564,7 @@ bool Sema::SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum) { return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; // Bit-twiddling to test for a power of 2: for x > 0, x & (x-1) is zero if @@ -10584,11 +10598,11 @@ static bool IsShiftedByte(llvm::APSInt Value) { } } -/// SemaBuiltinConstantArgShiftedByte - Check if argument ArgNum of TheCall is +/// BuiltinConstantArgShiftedByte - Check if argument ArgNum of TheCall is /// a constant expression representing an arbitrary byte value shifted left by /// a multiple of 8 bits. -bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, - unsigned ArgBits) { +bool Sema::BuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, + unsigned ArgBits) { llvm::APSInt Result; // We can't check the value of a dependent argument. @@ -10597,7 +10611,7 @@ bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; // Truncate to the given size. @@ -10611,14 +10625,13 @@ bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, << Arg->getSourceRange(); } -/// SemaBuiltinConstantArgShiftedByteOr0xFF - Check if argument ArgNum of +/// BuiltinConstantArgShiftedByteOr0xFF - Check if argument ArgNum of /// TheCall is a constant expression representing either a shifted byte value, /// or a value of the form 0x??FF (i.e. a member of the arithmetic progression /// 0x00FF, 0x01FF, ..., 0xFFFF). This strange range check is needed for some /// Arm MVE intrinsics. -bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, - int ArgNum, - unsigned ArgBits) { +bool Sema::BuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum, + unsigned ArgBits) { llvm::APSInt Result; // We can't check the value of a dependent argument. @@ -10627,7 +10640,7 @@ bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, return false; // Check constant-ness first. - if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + if (BuiltinConstantArg(TheCall, ArgNum, Result)) return true; // Truncate to the given size. @@ -10644,8 +10657,8 @@ bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, << Arg->getSourceRange(); } -/// SemaBuiltinARMMemoryTaggingCall - Handle calls of memory tagging extensions -bool Sema::SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall) { +/// BuiltinARMMemoryTaggingCall - Handle calls of memory tagging extensions +bool Sema::BuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall) { if (BuiltinID == AArch64::BI__builtin_arm_irg) { if (checkArgCount(*this, TheCall, 2)) return true; @@ -10692,7 +10705,7 @@ bool Sema::SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall TheCall->setType(FirstArgType); // Second arg must be an constant in range [0,15] - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + return BuiltinConstantArgRange(TheCall, 1, 0, 15); } if (BuiltinID == AArch64::BI__builtin_arm_gmi) { @@ -10798,11 +10811,11 @@ bool Sema::SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall return true; } -/// SemaBuiltinARMSpecialReg - Handle a check if argument ArgNum of CallExpr +/// BuiltinARMSpecialReg - Handle a check if argument ArgNum of CallExpr /// TheCall is an ARM/AArch64 special register string literal. -bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, - int ArgNum, unsigned ExpectedFieldNum, - bool AllowName) { +bool Sema::BuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, + int ArgNum, unsigned ExpectedFieldNum, + bool AllowName) { bool IsARMBuiltin = BuiltinID == ARM::BI__builtin_arm_rsr64 || BuiltinID == ARM::BI__builtin_arm_wsr64 || BuiltinID == ARM::BI__builtin_arm_rsr || @@ -10924,18 +10937,18 @@ bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, // If a programmer wants to codegen the MSR (register) form of `msr tco, // xN`, they can still do so by specifying the register using five // colon-separated numbers in a string. - return SemaBuiltinConstantArgRange(TheCall, 1, 0, *MaxLimit); + return BuiltinConstantArgRange(TheCall, 1, 0, *MaxLimit); } return false; } -/// SemaBuiltinPPCMMACall - Check the call to a PPC MMA builtin for validity. +/// BuiltinPPCMMACall - Check the call to a PPC MMA builtin for validity. /// Emit an error and return true on failure; return false on success. /// TypeStr is a string containing the type descriptor of the value returned by /// the builtin and the descriptors of the expected type of the arguments. -bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID, - const char *TypeStr) { +bool Sema::BuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID, + const char *TypeStr) { assert((TypeStr[0] != '\0') && "Invalid types in PPC MMA builtin declaration"); @@ -10978,8 +10991,7 @@ bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID, // If the value of the Mask is not 0, we have a constraint in the size of // the integer argument so here we ensure the argument is a constant that // is in the valid range. - if (Mask != 0 && - SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, Mask, true)) + if (Mask != 0 && BuiltinConstantArgRange(TheCall, ArgNum, 0, Mask, true)) return true; ArgNum++; @@ -10999,10 +11011,10 @@ bool Sema::SemaBuiltinPPCMMACall(CallExpr *TheCall, unsigned BuiltinID, return false; } -/// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). +/// BuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val). /// This checks that the target supports __builtin_longjmp and /// that val is a constant 1. -bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { +bool Sema::BuiltinLongjmp(CallExpr *TheCall) { if (!Context.getTargetInfo().hasSjLjLowering()) return Diag(TheCall->getBeginLoc(), diag::err_builtin_longjmp_unsupported) << SourceRange(TheCall->getBeginLoc(), TheCall->getEndLoc()); @@ -11011,7 +11023,7 @@ bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { llvm::APSInt Result; // TODO: This is less than ideal. Overload this to take a value. - if (SemaBuiltinConstantArg(TheCall, 1, Result)) + if (BuiltinConstantArg(TheCall, 1, Result)) return true; if (Result != 1) @@ -11021,9 +11033,9 @@ bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) { return false; } -/// SemaBuiltinSetjmp - Handle __builtin_setjmp(void *env[5]). +/// BuiltinSetjmp - Handle __builtin_setjmp(void *env[5]). /// This checks that the target supports __builtin_setjmp. -bool Sema::SemaBuiltinSetjmp(CallExpr *TheCall) { +bool Sema::BuiltinSetjmp(CallExpr *TheCall) { if (!Context.getTargetInfo().hasSjLjLowering()) return Diag(TheCall->getBeginLoc(), diag::err_builtin_setjmp_unsupported) << SourceRange(TheCall->getBeginLoc(), TheCall->getEndLoc()); @@ -20456,17 +20468,17 @@ bool Sema::PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall) { return false; } -bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) { +bool Sema::BuiltinElementwiseMath(CallExpr *TheCall) { QualType Res; - if (SemaBuiltinVectorMath(TheCall, Res)) + if (BuiltinVectorMath(TheCall, Res)) return true; TheCall->setType(Res); return false; } -bool Sema::SemaBuiltinVectorToScalarMath(CallExpr *TheCall) { +bool Sema::BuiltinVectorToScalarMath(CallExpr *TheCall) { QualType Res; - if (SemaBuiltinVectorMath(TheCall, Res)) + if (BuiltinVectorMath(TheCall, Res)) return true; if (auto *VecTy0 = Res->getAs()) @@ -20477,7 +20489,7 @@ bool Sema::SemaBuiltinVectorToScalarMath(CallExpr *TheCall) { return false; } -bool Sema::SemaBuiltinVectorMath(CallExpr *TheCall, QualType &Res) { +bool Sema::BuiltinVectorMath(CallExpr *TheCall, QualType &Res) { if (checkArgCount(*this, TheCall, 2)) return true; @@ -20505,8 +20517,8 @@ bool Sema::SemaBuiltinVectorMath(CallExpr *TheCall, QualType &Res) { return false; } -bool Sema::SemaBuiltinElementwiseTernaryMath(CallExpr *TheCall, - bool CheckForFloatArgs) { +bool Sema::BuiltinElementwiseTernaryMath(CallExpr *TheCall, + bool CheckForFloatArgs) { if (checkArgCount(*this, TheCall, 3)) return true; @@ -20561,7 +20573,7 @@ bool Sema::PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall) { return false; } -bool Sema::SemaBuiltinNonDeterministicValue(CallExpr *TheCall) { +bool Sema::BuiltinNonDeterministicValue(CallExpr *TheCall) { if (checkArgCount(*this, TheCall, 1)) return true; @@ -20576,8 +20588,8 @@ bool Sema::SemaBuiltinNonDeterministicValue(CallExpr *TheCall) { return false; } -ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall, - ExprResult CallResult) { +ExprResult Sema::BuiltinMatrixTranspose(CallExpr *TheCall, + ExprResult CallResult) { if (checkArgCount(*this, TheCall, 1)) return ExprError(); @@ -20626,8 +20638,8 @@ getAndVerifyMatrixDimension(Expr *Expr, StringRef Name, Sema &S) { return Dim; } -ExprResult Sema::SemaBuiltinMatrixColumnMajorLoad(CallExpr *TheCall, - ExprResult CallResult) { +ExprResult Sema::BuiltinMatrixColumnMajorLoad(CallExpr *TheCall, + ExprResult CallResult) { if (!getLangOpts().MatrixTypes) { Diag(TheCall->getBeginLoc(), diag::err_builtin_matrix_disabled); return ExprError(); @@ -20742,8 +20754,8 @@ ExprResult Sema::SemaBuiltinMatrixColumnMajorLoad(CallExpr *TheCall, return CallResult; } -ExprResult Sema::SemaBuiltinMatrixColumnMajorStore(CallExpr *TheCall, - ExprResult CallResult) { +ExprResult Sema::BuiltinMatrixColumnMajorStore(CallExpr *TheCall, + ExprResult CallResult) { if (checkArgCount(*this, TheCall, 3)) return ExprError(); diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index b2986c5012ea2..e00c972602829 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -615,10 +615,12 @@ bool Sema::SetupConstraintScope( // reference the original primary template. // We walk up the instantiated template chain so that nested lambdas get // handled properly. - for (FunctionTemplateDecl *FromMemTempl = - PrimaryTemplate->getInstantiatedFromMemberTemplate(); - FromMemTempl; - FromMemTempl = FromMemTempl->getInstantiatedFromMemberTemplate()) { + // We should only collect instantiated parameters from the primary template. + // Otherwise, we may have mismatched template parameter depth! + if (FunctionTemplateDecl *FromMemTempl = + PrimaryTemplate->getInstantiatedFromMemberTemplate()) { + while (FromMemTempl->getInstantiatedFromMemberTemplate()) + FromMemTempl = FromMemTempl->getInstantiatedFromMemberTemplate(); if (addInstantiatedParametersToScope(FD, FromMemTempl->getTemplatedDecl(), Scope, MLTAL)) return true; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f31972c3ece4c..a5b0ebff72482 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1537,6 +1537,10 @@ void Sema::PushOnScopeChains(NamedDecl *D, Scope *S, bool AddToContext) { cast(D)->isFunctionTemplateSpecialization()) return; + if (isa(D) && D->getDeclName().isEmpty()) { + S->AddDecl(D); + return; + } // If this replaces anything in the current scope, IdentifierResolver::iterator I = IdResolver.begin(D->getDeclName()), IEnd = IdResolver.end(); @@ -2188,8 +2192,21 @@ void Sema::DiagnoseUnusedButSetDecl(const VarDecl *VD, assert(iter->getSecond() >= 0 && "Found a negative number of references to a VarDecl"); - if (iter->getSecond() != 0) - return; + if (int RefCnt = iter->getSecond(); RefCnt > 0) { + // Assume the given VarDecl is "used" if its ref count stored in + // `RefMinusAssignments` is positive, with one exception. + // + // For a C++ variable whose decl (with initializer) entirely consist the + // condition expression of a if/while/for construct, + // Clang creates a DeclRefExpr for the condition expression rather than a + // BinaryOperator of AssignmentOp. Thus, the C++ variable's ref + // count stored in `RefMinusAssignment` equals 1 when the variable is never + // used in the body of the if/while/for construct. + bool UnusedCXXCondDecl = VD->isCXXCondDecl() && (RefCnt == 1); + if (!UnusedCXXCondDecl) + return; + } + unsigned DiagID = isa(VD) ? diag::warn_unused_but_set_parameter : diag::warn_unused_but_set_variable; DiagReceiver(VD->getLocation(), PDiag(DiagID) << VD); @@ -12476,12 +12493,22 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, } // Check if the function definition uses any AArch64 SME features without - // having the '+sme' feature enabled. + // having the '+sme' feature enabled and warn user if sme locally streaming + // function returns or uses arguments with VL-based types. if (DeclIsDefn) { const auto *Attr = NewFD->getAttr(); bool UsesSM = NewFD->hasAttr(); bool UsesZA = Attr && Attr->isNewZA(); bool UsesZT0 = Attr && Attr->isNewZT0(); + + if (NewFD->hasAttr()) { + if (NewFD->getReturnType()->isSizelessVectorType() || + llvm::any_of(NewFD->parameters(), [](ParmVarDecl *P) { + return P->getOriginalType()->isSizelessVectorType(); + })) + Diag(NewFD->getLocation(), + diag::warn_sme_locally_streaming_has_vl_args_returns); + } if (const auto *FPT = NewFD->getType()->getAs()) { FunctionProtoType::ExtProtoInfo EPI = FPT->getExtProtoInfo(); UsesSM |= diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index ca792ff85480e..9c51bd128eaff 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -18591,6 +18591,9 @@ DeclResult Sema::ActOnCXXConditionDeclaration(Scope *S, Declarator &D) { return true; } + if (auto *VD = dyn_cast(Dcl)) + VD->setCXXCondDecl(); + return Dcl; } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 79b28a69b3378..e37dda77a9db4 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3510,10 +3510,11 @@ static bool ShouldLookupResultBeMultiVersionOverload(const LookupResult &R) { ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, LookupResult &R, bool NeedsADL, - bool AcceptInvalidDecl) { + bool AcceptInvalidDecl, + bool NeedUnresolved) { // If this is a single, fully-resolved result and we don't need ADL, // just build an ordinary singleton decl ref. - if (!NeedsADL && R.isSingleResult() && + if (!NeedUnresolved && !NeedsADL && R.isSingleResult() && !R.getAsSingle() && !ShouldLookupResultBeMultiVersionOverload(R)) return BuildDeclarationNameExpr(SS, R.getLookupNameInfo(), R.getFoundDecl(), @@ -4222,7 +4223,8 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { } else if (Literal.isFloatingLiteral()) { QualType Ty; if (Literal.isHalf){ - if (getOpenCLOptions().isAvailableOption("cl_khr_fp16", getLangOpts())) + if (getLangOpts().HLSL || + getOpenCLOptions().isAvailableOption("cl_khr_fp16", getLangOpts())) Ty = Context.HalfTy; else { Diag(Tok.getLocation(), diag::err_half_const_requires_fp16); @@ -4231,7 +4233,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { } else if (Literal.isFloat) Ty = Context.FloatTy; else if (Literal.isLong) - Ty = Context.LongDoubleTy; + Ty = !getLangOpts().HLSL ? Context.LongDoubleTy : Context.DoubleTy; else if (Literal.isFloat16) Ty = Context.Float16Ty; else if (Literal.isFloat128) @@ -7588,7 +7590,7 @@ ExprResult Sema::ActOnConvertVectorExpr(Expr *E, ParsedType ParsedDestTy, SourceLocation RParenLoc) { TypeSourceInfo *TInfo; GetTypeFromParser(ParsedDestTy, &TInfo); - return SemaConvertVectorExpr(E, TInfo, BuiltinLoc, RParenLoc); + return ConvertVectorExpr(E, TInfo, BuiltinLoc, RParenLoc); } /// BuildResolvedCallExpr - Build a call to a resolved expression, @@ -19090,8 +19092,10 @@ void Sema::MarkFunctionReferenced(SourceLocation Loc, FunctionDecl *Func, // Note that we skip the implicit instantiation of templates that are only // used in unused default arguments or by recursive calls to themselves. // This is formally non-conforming, but seems reasonable in practice. - bool NeedDefinition = !IsRecursiveCall && (OdrUse == OdrUseContext::Used || - NeededForConstantEvaluation); + bool NeedDefinition = + !IsRecursiveCall && + (OdrUse == OdrUseContext::Used || + (NeededForConstantEvaluation && !Func->isPureVirtual())); // C++14 [temp.expl.spec]p6: // If a template [...] is explicitly specialized then that specialization @@ -20828,20 +20832,42 @@ void Sema::MarkVariableReferenced(SourceLocation Loc, VarDecl *Var) { static void FixDependencyOfIdExpressionsInLambdaWithDependentObjectParameter( Sema &SemaRef, ValueDecl *D, Expr *E) { auto *ID = dyn_cast(E); - if (!ID || ID->isTypeDependent()) + if (!ID || ID->isTypeDependent() || !ID->refersToEnclosingVariableOrCapture()) return; + // If any enclosing lambda with a dependent explicit object parameter either + // explicitly captures the variable by value, or has a capture default of '=' + // and does not capture the variable by reference, then the type of the DRE + // is dependent on the type of that lambda's explicit object parameter. auto IsDependent = [&]() { - const LambdaScopeInfo *LSI = SemaRef.getCurLambda(); - if (!LSI) - return false; - if (!LSI->ExplicitObjectParameter || - !LSI->ExplicitObjectParameter->getType()->isDependentType()) - return false; - if (!LSI->CaptureMap.count(D)) - return false; - const Capture &Cap = LSI->getCapture(D); - return !Cap.isCopyCapture(); + for (auto *Scope : llvm::reverse(SemaRef.FunctionScopes)) { + auto *LSI = dyn_cast(Scope); + if (!LSI) + continue; + + if (LSI->Lambda && !LSI->Lambda->Encloses(SemaRef.CurContext) && + LSI->AfterParameterList) + return false; + + const auto *MD = LSI->CallOperator; + if (MD->getType().isNull()) + continue; + + const auto *Ty = MD->getType()->getAs(); + if (!Ty || !MD->isExplicitObjectMemberFunction() || + !Ty->getParamType(0)->isDependentType()) + continue; + + if (auto *C = LSI->CaptureMap.count(D) ? &LSI->getCapture(D) : nullptr) { + if (C->isCopyCapture()) + return true; + continue; + } + + if (LSI->ImpCaptureStyle == LambdaScopeInfo::ImpCap_LambdaByval) + return true; + } + return false; }(); ID->setCapturedByCopyInLambdaWithExplicitObjectParameter( diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 70518211fa831..cc0b86716447b 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1420,26 +1420,42 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit, } ExprResult Sema::ActOnCXXThis(SourceLocation Loc) { - /// C++ 9.3.2: In the body of a non-static member function, the keyword this - /// is a non-lvalue expression whose value is the address of the object for - /// which the function is called. + // C++20 [expr.prim.this]p1: + // The keyword this names a pointer to the object for which an + // implicit object member function is invoked or a non-static + // data member's initializer is evaluated. QualType ThisTy = getCurrentThisType(); - if (ThisTy.isNull()) { - DeclContext *DC = getFunctionLevelDeclContext(); + if (CheckCXXThisType(Loc, ThisTy)) + return ExprError(); - if (const auto *Method = dyn_cast(DC); - Method && Method->isExplicitObjectMemberFunction()) { - return Diag(Loc, diag::err_invalid_this_use) << 1; - } + return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false); +} - if (isLambdaCallWithExplicitObjectParameter(CurContext)) - return Diag(Loc, diag::err_invalid_this_use) << 1; +bool Sema::CheckCXXThisType(SourceLocation Loc, QualType Type) { + if (!Type.isNull()) + return false; - return Diag(Loc, diag::err_invalid_this_use) << 0; + // C++20 [expr.prim.this]p3: + // If a declaration declares a member function or member function template + // of a class X, the expression this is a prvalue of type + // "pointer to cv-qualifier-seq X" wherever X is the current class between + // the optional cv-qualifier-seq and the end of the function-definition, + // member-declarator, or declarator. It shall not appear within the + // declaration of either a static member function or an explicit object + // member function of the current class (although its type and value + // category are defined within such member functions as they are within + // an implicit object member function). + DeclContext *DC = getFunctionLevelDeclContext(); + if (const auto *Method = dyn_cast(DC); + Method && Method->isExplicitObjectMemberFunction()) { + Diag(Loc, diag::err_invalid_this_use) << 1; + } else if (isLambdaCallWithExplicitObjectParameter(CurContext)) { + Diag(Loc, diag::err_invalid_this_use) << 1; + } else { + Diag(Loc, diag::err_invalid_this_use) << 0; } - - return BuildCXXThisExpr(Loc, ThisTy, /*IsImplicit=*/false); + return true; } Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type, @@ -1451,6 +1467,42 @@ Expr *Sema::BuildCXXThisExpr(SourceLocation Loc, QualType Type, void Sema::MarkThisReferenced(CXXThisExpr *This) { CheckCXXThisCapture(This->getExprLoc()); + if (This->isTypeDependent()) + return; + + // Check if 'this' is captured by value in a lambda with a dependent explicit + // object parameter, and mark it as type-dependent as well if so. + auto IsDependent = [&]() { + for (auto *Scope : llvm::reverse(FunctionScopes)) { + auto *LSI = dyn_cast(Scope); + if (!LSI) + continue; + + if (LSI->Lambda && !LSI->Lambda->Encloses(CurContext) && + LSI->AfterParameterList) + return false; + + // If this lambda captures 'this' by value, then 'this' is dependent iff + // this lambda has a dependent explicit object parameter. If we can't + // determine whether it does (e.g. because the CXXMethodDecl's type is + // null), assume it doesn't. + if (LSI->isCXXThisCaptured()) { + if (!LSI->getCXXThisCapture().isCopyCapture()) + continue; + + const auto *MD = LSI->CallOperator; + if (MD->getType().isNull()) + return false; + + const auto *Ty = MD->getType()->getAs(); + return Ty && MD->isExplicitObjectMemberFunction() && + Ty->getParamType(0)->isDependentType(); + } + } + return false; + }(); + + This->setCapturedByCopyInLambdaWithExplicitObjectParameter(IsDependent); } bool Sema::isThisOutsideMemberFunctionBody(QualType BaseType) { @@ -3947,9 +3999,8 @@ static bool resolveBuiltinNewDeleteOverload(Sema &S, CallExpr *TheCall, llvm_unreachable("Unreachable, bad result from BestViableFunction"); } -ExprResult -Sema::SemaBuiltinOperatorNewDeleteOverloaded(ExprResult TheCallResult, - bool IsDelete) { +ExprResult Sema::BuiltinOperatorNewDeleteOverloaded(ExprResult TheCallResult, + bool IsDelete) { CallExpr *TheCall = cast(TheCallResult.get()); if (!getLangOpts().CPlusPlus) { Diag(TheCall->getExprLoc(), diag::err_builtin_requires_language) @@ -5576,8 +5627,8 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT, } } -static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, - QualType RhsT, SourceLocation KeyLoc); +static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, const TypeSourceInfo *Lhs, + const TypeSourceInfo *Rhs, SourceLocation KeyLoc); static bool EvaluateBooleanTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc, @@ -5593,8 +5644,8 @@ static bool EvaluateBooleanTypeTrait(Sema &S, TypeTrait Kind, // Evaluate ReferenceBindsToTemporary and ReferenceConstructsFromTemporary // alongside the IsConstructible traits to avoid duplication. if (Kind <= BTT_Last && Kind != BTT_ReferenceBindsToTemporary && Kind != BTT_ReferenceConstructsFromTemporary) - return EvaluateBinaryTypeTrait(S, Kind, Args[0]->getType(), - Args[1]->getType(), RParenLoc); + return EvaluateBinaryTypeTrait(S, Kind, Args[0], + Args[1], RParenLoc); switch (Kind) { case clang::BTT_ReferenceBindsToTemporary: @@ -5689,8 +5740,8 @@ static bool EvaluateBooleanTypeTrait(Sema &S, TypeTrait Kind, if (U->isReferenceType()) return false; - QualType TPtr = S.Context.getPointerType(S.BuiltinRemoveReference(T, UnaryTransformType::RemoveCVRef, {})); - QualType UPtr = S.Context.getPointerType(S.BuiltinRemoveReference(U, UnaryTransformType::RemoveCVRef, {})); + TypeSourceInfo *TPtr = S.Context.CreateTypeSourceInfo(S.Context.getPointerType(S.BuiltinRemoveReference(T, UnaryTransformType::RemoveCVRef, {}))); + TypeSourceInfo *UPtr = S.Context.CreateTypeSourceInfo(S.Context.getPointerType(S.BuiltinRemoveReference(U, UnaryTransformType::RemoveCVRef, {}))); return EvaluateBinaryTypeTrait(S, TypeTrait::BTT_IsConvertibleTo, UPtr, TPtr, RParenLoc); } @@ -5824,8 +5875,11 @@ ExprResult Sema::ActOnTypeTrait(TypeTrait Kind, SourceLocation KWLoc, return BuildTypeTrait(Kind, KWLoc, ConvertedArgs, RParenLoc); } -static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, - QualType RhsT, SourceLocation KeyLoc) { +static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, const TypeSourceInfo *Lhs, + const TypeSourceInfo *Rhs, SourceLocation KeyLoc) { + QualType LhsT = Lhs->getType(); + QualType RhsT = Rhs->getType(); + assert(!LhsT->isDependentType() && !RhsT->isDependentType() && "Cannot evaluate traits of dependent types"); @@ -5850,7 +5904,8 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, return false; if (Self.RequireCompleteType( - KeyLoc, RhsT, diag::err_incomplete_type_used_in_type_trait_expr)) + Rhs->getTypeLoc().getBeginLoc(), RhsT, + diag::err_incomplete_type_used_in_type_trait_expr)) return false; return BaseInterface->isSuperClassOf(DerivedInterface); @@ -5873,8 +5928,9 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, // If Base and Derived are class types and are different types // (ignoring possible cv-qualifiers) then Derived shall be a // complete type. - if (Self.RequireCompleteType(KeyLoc, RhsT, - diag::err_incomplete_type_used_in_type_trait_expr)) + if (Self.RequireCompleteType( + Rhs->getTypeLoc().getBeginLoc(), RhsT, + diag::err_incomplete_type_used_in_type_trait_expr)) return false; return cast(rhsRecord->getDecl()) @@ -5926,7 +5982,8 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, return LhsT->isVoidType(); // A function definition requires a complete, non-abstract return type. - if (!Self.isCompleteType(KeyLoc, RhsT) || Self.isAbstractType(KeyLoc, RhsT)) + if (!Self.isCompleteType(Rhs->getTypeLoc().getBeginLoc(), RhsT) || + Self.isAbstractType(Rhs->getTypeLoc().getBeginLoc(), RhsT)) return false; // Compute the result of add_rvalue_reference. @@ -5976,12 +6033,14 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, // For both, T and U shall be complete types, (possibly cv-qualified) // void, or arrays of unknown bound. if (!LhsT->isVoidType() && !LhsT->isIncompleteArrayType() && - Self.RequireCompleteType(KeyLoc, LhsT, - diag::err_incomplete_type_used_in_type_trait_expr)) + Self.RequireCompleteType( + Lhs->getTypeLoc().getBeginLoc(), LhsT, + diag::err_incomplete_type_used_in_type_trait_expr)) return false; if (!RhsT->isVoidType() && !RhsT->isIncompleteArrayType() && - Self.RequireCompleteType(KeyLoc, RhsT, - diag::err_incomplete_type_used_in_type_trait_expr)) + Self.RequireCompleteType( + Rhs->getTypeLoc().getBeginLoc(), RhsT, + diag::err_incomplete_type_used_in_type_trait_expr)) return false; // cv void is never assignable. @@ -6035,6 +6094,19 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT, return false; } case BTT_IsLayoutCompatible: { + if (!LhsT->isVoidType() && !LhsT->isIncompleteArrayType()) + Self.RequireCompleteType(Lhs->getTypeLoc().getBeginLoc(), LhsT, + diag::err_incomplete_type); + if (!RhsT->isVoidType() && !RhsT->isIncompleteArrayType()) + Self.RequireCompleteType(Rhs->getTypeLoc().getBeginLoc(), RhsT, + diag::err_incomplete_type); + + if (LhsT->isVariableArrayType()) + Self.Diag(Lhs->getTypeLoc().getBeginLoc(), diag::err_vla_unsupported) + << 1 << tok::kw___is_layout_compatible; + if (RhsT->isVariableArrayType()) + Self.Diag(Rhs->getTypeLoc().getBeginLoc(), diag::err_vla_unsupported) + << 1 << tok::kw___is_layout_compatible; return Self.IsLayoutCompatible(LhsT, RhsT); } default: llvm_unreachable("not a BTT"); diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 32998ae60eafe..8cd2288d279cc 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -61,6 +61,10 @@ enum IMAKind { /// The reference is a contextually-permitted abstract member reference. IMA_Abstract, + /// Whether the context is static is dependent on the enclosing template (i.e. + /// in a dependent class scope explicit specialization). + IMA_Dependent, + /// The reference may be to an unresolved using declaration and the /// context is not an instance method. IMA_Unresolved_StaticOrExplicitContext, @@ -91,10 +95,18 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef, DeclContext *DC = SemaRef.getFunctionLevelDeclContext(); - bool isStaticOrExplicitContext = - SemaRef.CXXThisTypeOverride.isNull() && - (!isa(DC) || cast(DC)->isStatic() || - cast(DC)->isExplicitObjectMemberFunction()); + bool couldInstantiateToStatic = false; + bool isStaticOrExplicitContext = SemaRef.CXXThisTypeOverride.isNull(); + + if (auto *MD = dyn_cast(DC)) { + if (MD->isImplicitObjectMemberFunction()) { + isStaticOrExplicitContext = false; + // A dependent class scope function template explicit specialization + // that is neither declared 'static' nor with an explicit object + // parameter could instantiate to a static or non-static member function. + couldInstantiateToStatic = MD->getDependentSpecializationInfo(); + } + } if (R.isUnresolvableResult()) return isStaticOrExplicitContext ? IMA_Unresolved_StaticOrExplicitContext @@ -123,6 +135,9 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef, if (Classes.empty()) return IMA_Static; + if (couldInstantiateToStatic) + return IMA_Dependent; + // C++11 [expr.prim.general]p12: // An id-expression that denotes a non-static data member or non-static // member function of a class can only be used: @@ -268,27 +283,30 @@ ExprResult Sema::BuildPossibleImplicitMemberExpr( const CXXScopeSpec &SS, SourceLocation TemplateKWLoc, LookupResult &R, const TemplateArgumentListInfo *TemplateArgs, const Scope *S, UnresolvedLookupExpr *AsULE) { - switch (ClassifyImplicitMemberAccess(*this, R)) { + switch (IMAKind Classification = ClassifyImplicitMemberAccess(*this, R)) { case IMA_Instance: - return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, true, S); - case IMA_Mixed: case IMA_Mixed_Unrelated: case IMA_Unresolved: - return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, false, - S); - + return BuildImplicitMemberExpr( + SS, TemplateKWLoc, R, TemplateArgs, + /*IsKnownInstance=*/Classification == IMA_Instance, S); case IMA_Field_Uneval_Context: Diag(R.getNameLoc(), diag::warn_cxx98_compat_non_static_member_use) << R.getLookupNameInfo().getName(); [[fallthrough]]; case IMA_Static: case IMA_Abstract: + case IMA_Dependent: case IMA_Mixed_StaticOrExplicitContext: case IMA_Unresolved_StaticOrExplicitContext: if (TemplateArgs || TemplateKWLoc.isValid()) - return BuildTemplateIdExpr(SS, TemplateKWLoc, R, false, TemplateArgs); - return AsULE ? AsULE : BuildDeclarationNameExpr(SS, R, false); + return BuildTemplateIdExpr(SS, TemplateKWLoc, R, /*RequiresADL=*/false, + TemplateArgs); + return AsULE ? AsULE + : BuildDeclarationNameExpr( + SS, R, /*NeedsADL=*/false, /*AcceptInvalidDecl=*/false, + /*NeedUnresolved=*/Classification == IMA_Dependent); case IMA_Error_StaticOrExplicitContext: case IMA_Error_Unrelated: diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index cf82cc9bccdf5..681849d6e6c8a 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -8,27 +8,31 @@ // This implements Semantic Analysis for HLSL constructs. //===----------------------------------------------------------------------===// +#include "clang/Sema/SemaHLSL.h" #include "clang/Sema/Sema.h" using namespace clang; -Decl *Sema::ActOnStartHLSLBuffer(Scope *BufferScope, bool CBuffer, - SourceLocation KwLoc, IdentifierInfo *Ident, - SourceLocation IdentLoc, - SourceLocation LBrace) { +SemaHLSL::SemaHLSL(Sema &S) : SemaBase(S) {} + +Decl *SemaHLSL::ActOnStartHLSLBuffer(Scope *BufferScope, bool CBuffer, + SourceLocation KwLoc, + IdentifierInfo *Ident, + SourceLocation IdentLoc, + SourceLocation LBrace) { // For anonymous namespace, take the location of the left brace. - DeclContext *LexicalParent = getCurLexicalContext(); + DeclContext *LexicalParent = SemaRef.getCurLexicalContext(); HLSLBufferDecl *Result = HLSLBufferDecl::Create( - Context, LexicalParent, CBuffer, KwLoc, Ident, IdentLoc, LBrace); + getASTContext(), LexicalParent, CBuffer, KwLoc, Ident, IdentLoc, LBrace); - PushOnScopeChains(Result, BufferScope); - PushDeclContext(BufferScope, Result); + SemaRef.PushOnScopeChains(Result, BufferScope); + SemaRef.PushDeclContext(BufferScope, Result); return Result; } -void Sema::ActOnFinishHLSLBuffer(Decl *Dcl, SourceLocation RBrace) { +void SemaHLSL::ActOnFinishHLSLBuffer(Decl *Dcl, SourceLocation RBrace) { auto *BufDecl = cast(Dcl); BufDecl->setRBraceLoc(RBrace); - PopDeclContext(); + SemaRef.PopDeclContext(); } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 57fce876aae73..e42fd9b5a21a7 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -10941,32 +10941,16 @@ QualType Sema::DeduceTemplateSpecializationFromInitializer( Context.getLValueReferenceType(ElementTypes[I].withConst()); } - llvm::FoldingSetNodeID ID; - ID.AddPointer(Template); - for (auto &T : ElementTypes) - T.getCanonicalType().Profile(ID); - unsigned Hash = ID.ComputeHash(); - if (AggregateDeductionCandidates.count(Hash) == 0) { - if (FunctionTemplateDecl *TD = - DeclareImplicitDeductionGuideFromInitList( - Template, ElementTypes, - TSInfo->getTypeLoc().getEndLoc())) { - auto *GD = cast(TD->getTemplatedDecl()); - GD->setDeductionCandidateKind(DeductionCandidate::Aggregate); - AggregateDeductionCandidates[Hash] = GD; - addDeductionCandidate(TD, GD, DeclAccessPair::make(TD, AS_public), - OnlyListConstructors, - /*AllowAggregateDeductionCandidate=*/true); - } - } else { - CXXDeductionGuideDecl *GD = AggregateDeductionCandidates[Hash]; - FunctionTemplateDecl *TD = GD->getDescribedFunctionTemplate(); - assert(TD && "aggregate deduction candidate is function template"); + if (FunctionTemplateDecl *TD = + DeclareAggregateDeductionGuideFromInitList( + LookupTemplateDecl, ElementTypes, + TSInfo->getTypeLoc().getEndLoc())) { + auto *GD = cast(TD->getTemplatedDecl()); addDeductionCandidate(TD, GD, DeclAccessPair::make(TD, AS_public), OnlyListConstructors, /*AllowAggregateDeductionCandidate=*/true); + HasAnyDeductionGuide = true; } - HasAnyDeductionGuide = true; } }; diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 2ac994cac71e1..2ba1e49b5739d 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#include "clang/AST/StmtOpenACC.h" #include "clang/Sema/SemaOpenACC.h" +#include "clang/AST/StmtOpenACC.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/Sema.h" @@ -31,31 +31,46 @@ bool diagnoseConstructAppertainment(SemaOpenACC &S, OpenACCDirectiveKind K, case OpenACCDirectiveKind::Serial: case OpenACCDirectiveKind::Kernels: if (!IsStmt) - return S.SemaRef.Diag(StartLoc, diag::err_acc_construct_appertainment) - << K; + return S.Diag(StartLoc, diag::err_acc_construct_appertainment) << K; break; } return false; } + +bool doesClauseApplyToDirective(OpenACCDirectiveKind DirectiveKind, + OpenACCClauseKind ClauseKind) { + // FIXME: For each clause as we implement them, we can add the + // 'legalization' list here. + + // Do nothing so we can go to the 'unimplemented' diagnostic instead. + return true; +} } // namespace -SemaOpenACC::SemaOpenACC(Sema &S) : SemaRef(S) {} - -ASTContext &SemaOpenACC::getASTContext() const { return SemaRef.Context; } -DiagnosticsEngine &SemaOpenACC::getDiagnostics() const { return SemaRef.Diags; } -const LangOptions &SemaOpenACC::getLangOpts() const { return SemaRef.LangOpts; } - -bool SemaOpenACC::ActOnClause(OpenACCClauseKind ClauseKind, - SourceLocation StartLoc) { - if (ClauseKind == OpenACCClauseKind::Invalid) - return false; - // For now just diagnose that it is unsupported and leave the parsing to do - // whatever it can do. This function will eventually need to start returning - // some sort of Clause AST type, but for now just return true/false based on - // success. - return SemaRef.Diag(StartLoc, diag::warn_acc_clause_unimplemented) - << ClauseKind; +SemaOpenACC::SemaOpenACC(Sema &S) : SemaBase(S) {} + +OpenACCClause * +SemaOpenACC::ActOnClause(ArrayRef ExistingClauses, + OpenACCParsedClause &Clause) { + if (Clause.getClauseKind() == OpenACCClauseKind::Invalid) + return nullptr; + + // Diagnose that we don't support this clause on this directive. + if (!doesClauseApplyToDirective(Clause.getDirectiveKind(), + Clause.getClauseKind())) { + Diag(Clause.getBeginLoc(), diag::err_acc_clause_appertainment) + << Clause.getDirectiveKind() << Clause.getClauseKind(); + return nullptr; + } + + // TODO OpenACC: Switch over the clauses we implement here and 'create' + // them. + + Diag(Clause.getBeginLoc(), diag::warn_acc_clause_unimplemented) + << Clause.getClauseKind(); + return nullptr; } + void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, SourceLocation StartLoc) { switch (K) { @@ -72,7 +87,7 @@ void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, // here as these constructs do not take any arguments. break; default: - SemaRef.Diag(StartLoc, diag::warn_acc_construct_unimplemented) << K; + Diag(StartLoc, diag::warn_acc_construct_unimplemented) << K; break; } } @@ -85,6 +100,7 @@ bool SemaOpenACC::ActOnStartStmtDirective(OpenACCDirectiveKind K, StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation EndLoc, + ArrayRef Clauses, StmtResult AssocStmt) { switch (K) { default: @@ -94,8 +110,9 @@ StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, case OpenACCDirectiveKind::Parallel: case OpenACCDirectiveKind::Serial: case OpenACCDirectiveKind::Kernels: + // TODO OpenACC: Add clauses to the construct here. return OpenACCComputeConstruct::Create( - getASTContext(), K, StartLoc, EndLoc, + getASTContext(), K, StartLoc, EndLoc, Clauses, AssocStmt.isUsable() ? AssocStmt.get() : nullptr); } llvm_unreachable("Unhandled case in directive handling?"); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 833cd6da74e64..a1d16cc2c633b 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -6371,6 +6371,7 @@ Sema::EvaluateConvertedConstantExpression(Expr *E, QualType T, APValue &Value, // by this point. assert(CE->getResultStorageKind() != ConstantResultStorageKind::None && "ConstantExpr has no value associated with it"); + (void)CE; } else { E = ConstantExpr::Create(Context, Result.get(), Value); } diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index d3def13f495d2..2013799b5eb81 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2754,23 +2754,42 @@ bool hasDeclaredDeductionGuides(DeclarationName Name, DeclContext *DC) { return false; } -// Build deduction guides for a type alias template. -void DeclareImplicitDeductionGuidesForTypeAlias( - Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate, SourceLocation Loc) { - if (AliasTemplate->isInvalidDecl()) - return; - auto &Context = SemaRef.Context; - // FIXME: if there is an explicit deduction guide after the first use of the - // type alias usage, we will not cover this explicit deduction guide. fix this - // case. - if (hasDeclaredDeductionGuides( - Context.DeclarationNames.getCXXDeductionGuideName(AliasTemplate), - AliasTemplate->getDeclContext())) - return; +NamedDecl *transformTemplateParameter(Sema &SemaRef, DeclContext *DC, + NamedDecl *TemplateParam, + MultiLevelTemplateArgumentList &Args, + unsigned NewIndex) { + if (auto *TTP = dyn_cast(TemplateParam)) + return transformTemplateTypeParam(SemaRef, DC, TTP, Args, TTP->getDepth(), + NewIndex); + if (auto *TTP = dyn_cast(TemplateParam)) + return transformTemplateParam(SemaRef, DC, TTP, Args, NewIndex, + TTP->getDepth()); + if (auto *NTTP = dyn_cast(TemplateParam)) + return transformTemplateParam(SemaRef, DC, NTTP, Args, NewIndex, + NTTP->getDepth()); + llvm_unreachable("Unhandled template parameter types"); +} + +Expr *transformRequireClause(Sema &SemaRef, FunctionTemplateDecl *FTD, + llvm::ArrayRef TransformedArgs) { + Expr *RC = FTD->getTemplateParameters()->getRequiresClause(); + if (!RC) + return nullptr; + MultiLevelTemplateArgumentList Args; + Args.setKind(TemplateSubstitutionKind::Rewrite); + Args.addOuterTemplateArguments(TransformedArgs); + ExprResult E = SemaRef.SubstExpr(RC, Args); + if (E.isInvalid()) + return nullptr; + return E.getAs(); +} + +std::pair> +getRHSTemplateDeclAndArgs(Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate) { // Unwrap the sugared ElaboratedType. auto RhsType = AliasTemplate->getTemplatedDecl() ->getUnderlyingType() - .getSingleStepDesugaredType(Context); + .getSingleStepDesugaredType(SemaRef.Context); TemplateDecl *Template = nullptr; llvm::ArrayRef AliasRhsTemplateArgs; if (const auto *TST = RhsType->getAs()) { @@ -2791,6 +2810,24 @@ void DeclareImplicitDeductionGuidesForTypeAlias( } else { assert(false && "unhandled RHS type of the alias"); } + return {Template, AliasRhsTemplateArgs}; +} + +// Build deduction guides for a type alias template. +void DeclareImplicitDeductionGuidesForTypeAlias( + Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate, SourceLocation Loc) { + if (AliasTemplate->isInvalidDecl()) + return; + auto &Context = SemaRef.Context; + // FIXME: if there is an explicit deduction guide after the first use of the + // type alias usage, we will not cover this explicit deduction guide. fix this + // case. + if (hasDeclaredDeductionGuides( + Context.DeclarationNames.getCXXDeductionGuideName(AliasTemplate), + AliasTemplate->getDeclContext())) + return; + auto [Template, AliasRhsTemplateArgs] = + getRHSTemplateDeclAndArgs(SemaRef, AliasTemplate); if (!Template) return; DeclarationNameInfo NameInfo( @@ -2803,6 +2840,13 @@ void DeclareImplicitDeductionGuidesForTypeAlias( FunctionTemplateDecl *F = dyn_cast(G); if (!F) continue; + // The **aggregate** deduction guides are handled in a different code path + // (DeclareImplicitDeductionGuideFromInitList), which involves the tricky + // cache. + if (cast(F->getTemplatedDecl()) + ->getDeductionCandidateKind() == DeductionCandidate::Aggregate) + continue; + auto RType = F->getTemplatedDecl()->getReturnType(); // The (trailing) return type of the deduction guide. const TemplateSpecializationType *FReturnType = @@ -2885,21 +2929,6 @@ void DeclareImplicitDeductionGuidesForTypeAlias( // parameters, used for building `TemplateArgsForBuildingFPrime`. SmallVector TransformedDeducedAliasArgs( AliasTemplate->getTemplateParameters()->size()); - auto TransformTemplateParameter = - [&SemaRef](DeclContext *DC, NamedDecl *TemplateParam, - MultiLevelTemplateArgumentList &Args, - unsigned NewIndex) -> NamedDecl * { - if (auto *TTP = dyn_cast(TemplateParam)) - return transformTemplateTypeParam(SemaRef, DC, TTP, Args, - TTP->getDepth(), NewIndex); - if (auto *TTP = dyn_cast(TemplateParam)) - return transformTemplateParam(SemaRef, DC, TTP, Args, NewIndex, - TTP->getDepth()); - if (auto *NTTP = dyn_cast(TemplateParam)) - return transformTemplateParam(SemaRef, DC, NTTP, Args, NewIndex, - NTTP->getDepth()); - return nullptr; - }; for (unsigned AliasTemplateParamIdx : DeducedAliasTemplateParams) { auto *TP = AliasTemplate->getTemplateParameters()->getParam( @@ -2909,9 +2938,9 @@ void DeclareImplicitDeductionGuidesForTypeAlias( MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(TransformedDeducedAliasArgs); - NamedDecl *NewParam = - TransformTemplateParameter(AliasTemplate->getDeclContext(), TP, Args, - /*NewIndex*/ FPrimeTemplateParams.size()); + NamedDecl *NewParam = transformTemplateParameter( + SemaRef, AliasTemplate->getDeclContext(), TP, Args, + /*NewIndex*/ FPrimeTemplateParams.size()); FPrimeTemplateParams.push_back(NewParam); auto NewTemplateArgument = Context.getCanonicalTemplateArgument( @@ -2927,8 +2956,8 @@ void DeclareImplicitDeductionGuidesForTypeAlias( // We take a shortcut here, it is ok to reuse the // TemplateArgsForBuildingFPrime. Args.addOuterTemplateArguments(TemplateArgsForBuildingFPrime); - NamedDecl *NewParam = TransformTemplateParameter( - F->getDeclContext(), TP, Args, FPrimeTemplateParams.size()); + NamedDecl *NewParam = transformTemplateParameter( + SemaRef, F->getDeclContext(), TP, Args, FPrimeTemplateParams.size()); FPrimeTemplateParams.push_back(NewParam); assert(TemplateArgsForBuildingFPrime[FTemplateParamIdx].isNull() && @@ -2938,16 +2967,8 @@ void DeclareImplicitDeductionGuidesForTypeAlias( Context.getInjectedTemplateArg(NewParam)); } // Substitute new template parameters into requires-clause if present. - Expr *RequiresClause = nullptr; - if (Expr *InnerRC = F->getTemplateParameters()->getRequiresClause()) { - MultiLevelTemplateArgumentList Args; - Args.setKind(TemplateSubstitutionKind::Rewrite); - Args.addOuterTemplateArguments(TemplateArgsForBuildingFPrime); - ExprResult E = SemaRef.SubstExpr(InnerRC, Args); - if (E.isInvalid()) - return; - RequiresClause = E.getAs(); - } + Expr *RequiresClause = + transformRequireClause(SemaRef, F, TemplateArgsForBuildingFPrime); // FIXME: implement the is_deducible constraint per C++ // [over.match.class.deduct]p3.3: // ... and a constraint that is satisfied if and only if the arguments @@ -3013,11 +3034,102 @@ void DeclareImplicitDeductionGuidesForTypeAlias( } } +// Build an aggregate deduction guide for a type alias template. +FunctionTemplateDecl *DeclareAggregateDeductionGuideForTypeAlias( + Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate, + MutableArrayRef ParamTypes, SourceLocation Loc) { + TemplateDecl *RHSTemplate = + getRHSTemplateDeclAndArgs(SemaRef, AliasTemplate).first; + if (!RHSTemplate) + return nullptr; + auto *RHSDeductionGuide = SemaRef.DeclareAggregateDeductionGuideFromInitList( + RHSTemplate, ParamTypes, Loc); + if (!RHSDeductionGuide) + return nullptr; + + LocalInstantiationScope Scope(SemaRef); + + // Build a new template parameter list for the synthesized aggregate deduction + // guide by transforming the one from RHSDeductionGuide. + SmallVector TransformedTemplateParams; + // Template args that refer to the rebuilt template parameters. + // All template arguments must be initialized in advance. + SmallVector TransformedTemplateArgs( + RHSDeductionGuide->getTemplateParameters()->size()); + for (auto *TP : *RHSDeductionGuide->getTemplateParameters()) { + // Rebuild any internal references to earlier parameters and reindex as + // we go. + MultiLevelTemplateArgumentList Args; + Args.setKind(TemplateSubstitutionKind::Rewrite); + Args.addOuterTemplateArguments(TransformedTemplateArgs); + NamedDecl *NewParam = transformTemplateParameter( + SemaRef, AliasTemplate->getDeclContext(), TP, Args, + /*NewIndex=*/TransformedTemplateParams.size()); + + TransformedTemplateArgs[TransformedTemplateParams.size()] = + SemaRef.Context.getCanonicalTemplateArgument( + SemaRef.Context.getInjectedTemplateArg(NewParam)); + TransformedTemplateParams.push_back(NewParam); + } + // FIXME: implement the is_deducible constraint per C++ + // [over.match.class.deduct]p3.3. + Expr *TransformedRequiresClause = transformRequireClause( + SemaRef, RHSDeductionGuide, TransformedTemplateArgs); + auto *TransformedTemplateParameterList = TemplateParameterList::Create( + SemaRef.Context, AliasTemplate->getTemplateParameters()->getTemplateLoc(), + AliasTemplate->getTemplateParameters()->getLAngleLoc(), + TransformedTemplateParams, + AliasTemplate->getTemplateParameters()->getRAngleLoc(), + TransformedRequiresClause); + auto *TransformedTemplateArgList = TemplateArgumentList::CreateCopy( + SemaRef.Context, TransformedTemplateArgs); + + if (auto *TransformedDeductionGuide = SemaRef.InstantiateFunctionDeclaration( + RHSDeductionGuide, TransformedTemplateArgList, + AliasTemplate->getLocation(), + Sema::CodeSynthesisContext::BuildingDeductionGuides)) { + auto *GD = + llvm::dyn_cast(TransformedDeductionGuide); + FunctionTemplateDecl *Result = buildDeductionGuide( + SemaRef, AliasTemplate, TransformedTemplateParameterList, + GD->getCorrespondingConstructor(), GD->getExplicitSpecifier(), + GD->getTypeSourceInfo(), AliasTemplate->getBeginLoc(), + AliasTemplate->getLocation(), AliasTemplate->getEndLoc(), + GD->isImplicit()); + cast(Result->getTemplatedDecl()) + ->setDeductionCandidateKind(DeductionCandidate::Aggregate); + return Result; + } + return nullptr; +} + } // namespace -FunctionTemplateDecl *Sema::DeclareImplicitDeductionGuideFromInitList( +FunctionTemplateDecl *Sema::DeclareAggregateDeductionGuideFromInitList( TemplateDecl *Template, MutableArrayRef ParamTypes, SourceLocation Loc) { + llvm::FoldingSetNodeID ID; + ID.AddPointer(Template); + for (auto &T : ParamTypes) + T.getCanonicalType().Profile(ID); + unsigned Hash = ID.ComputeHash(); + + auto Found = AggregateDeductionCandidates.find(Hash); + if (Found != AggregateDeductionCandidates.end()) { + CXXDeductionGuideDecl *GD = Found->getSecond(); + return GD->getDescribedFunctionTemplate(); + } + + if (auto *AliasTemplate = llvm::dyn_cast(Template)) { + if (auto *FTD = DeclareAggregateDeductionGuideForTypeAlias( + *this, AliasTemplate, ParamTypes, Loc)) { + auto *GD = cast(FTD->getTemplatedDecl()); + GD->setDeductionCandidateKind(DeductionCandidate::Aggregate); + AggregateDeductionCandidates[Hash] = GD; + return FTD; + } + } + if (CXXRecordDecl *DefRecord = cast(Template->getTemplatedDecl())->getDefinition()) { if (TemplateDecl *DescribedTemplate = @@ -3050,10 +3162,13 @@ FunctionTemplateDecl *Sema::DeclareImplicitDeductionGuideFromInitList( Transform.NestedPattern ? Transform.NestedPattern : Transform.Template; ContextRAII SavedContext(*this, Pattern->getTemplatedDecl()); - auto *DG = cast( + auto *FTD = cast( Transform.buildSimpleDeductionGuide(ParamTypes)); SavedContext.pop(); - return DG; + auto *GD = cast(FTD->getTemplatedDecl()); + GD->setDeductionCandidateKind(DeductionCandidate::Aggregate); + AggregateDeductionCandidates[Hash] = GD; + return FTD; } void Sema::DeclareImplicitDeductionGuides(TemplateDecl *Template, @@ -4379,7 +4494,10 @@ QualType Sema::CheckTemplateIdType(TemplateName Name, AliasTemplate->getTemplateParameters()->getDepth()); LocalInstantiationScope Scope(*this); - InstantiatingTemplate Inst(*this, TemplateLoc, Template); + InstantiatingTemplate Inst( + *this, /*PointOfInstantiation=*/TemplateLoc, + /*Entity=*/AliasTemplate, + /*TemplateArgs=*/TemplateArgLists.getInnermost()); if (Inst.isInvalid()) return QualType(); diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 716660244537b..0b6375001f532 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -3140,13 +3140,15 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( return TemplateDeductionResult::Success; } -/// Perform template argument deduction to determine whether -/// the given template arguments match the given class template -/// partial specialization per C++ [temp.class.spec.match]. -TemplateDeductionResult -Sema::DeduceTemplateArguments(ClassTemplatePartialSpecializationDecl *Partial, - ArrayRef TemplateArgs, - TemplateDeductionInfo &Info) { +/// Perform template argument deduction to determine whether the given template +/// arguments match the given class or variable template partial specialization +/// per C++ [temp.class.spec.match]. +template +static std::enable_if_t::value, + TemplateDeductionResult> +DeduceTemplateArguments(Sema &S, T *Partial, + ArrayRef TemplateArgs, + TemplateDeductionInfo &Info) { if (Partial->isInvalidDecl()) return TemplateDeductionResult::Invalid; @@ -3158,25 +3160,25 @@ Sema::DeduceTemplateArguments(ClassTemplatePartialSpecializationDecl *Partial, // Unevaluated SFINAE context. EnterExpressionEvaluationContext Unevaluated( - *this, Sema::ExpressionEvaluationContext::Unevaluated); - SFINAETrap Trap(*this); + S, Sema::ExpressionEvaluationContext::Unevaluated); + Sema::SFINAETrap Trap(S); // This deduction has no relation to any outer instantiation we might be // performing. - LocalInstantiationScope InstantiationScope(*this); + LocalInstantiationScope InstantiationScope(S); SmallVector Deduced; Deduced.resize(Partial->getTemplateParameters()->size()); if (TemplateDeductionResult Result = ::DeduceTemplateArguments( - *this, Partial->getTemplateParameters(), + S, Partial->getTemplateParameters(), Partial->getTemplateArgs().asArray(), TemplateArgs, Info, Deduced, /*NumberOfArgumentsMustMatch=*/false); Result != TemplateDeductionResult::Success) return Result; SmallVector DeducedArgs(Deduced.begin(), Deduced.end()); - InstantiatingTemplate Inst(*this, Info.getLocation(), Partial, DeducedArgs, - Info); + Sema::InstantiatingTemplate Inst(S, Info.getLocation(), Partial, DeducedArgs, + Info); if (Inst.isInvalid()) return TemplateDeductionResult::InstantiationDepth; @@ -3184,64 +3186,25 @@ Sema::DeduceTemplateArguments(ClassTemplatePartialSpecializationDecl *Partial, return TemplateDeductionResult::SubstitutionFailure; TemplateDeductionResult Result; - runWithSufficientStackSpace(Info.getLocation(), [&] { - Result = ::FinishTemplateArgumentDeduction(*this, Partial, + S.runWithSufficientStackSpace(Info.getLocation(), [&] { + Result = ::FinishTemplateArgumentDeduction(S, Partial, /*IsPartialOrdering=*/false, TemplateArgs, Deduced, Info); }); return Result; } -/// Perform template argument deduction to determine whether -/// the given template arguments match the given variable template -/// partial specialization per C++ [temp.class.spec.match]. +TemplateDeductionResult +Sema::DeduceTemplateArguments(ClassTemplatePartialSpecializationDecl *Partial, + ArrayRef TemplateArgs, + TemplateDeductionInfo &Info) { + return ::DeduceTemplateArguments(*this, Partial, TemplateArgs, Info); +} TemplateDeductionResult Sema::DeduceTemplateArguments(VarTemplatePartialSpecializationDecl *Partial, ArrayRef TemplateArgs, TemplateDeductionInfo &Info) { - if (Partial->isInvalidDecl()) - return TemplateDeductionResult::Invalid; - - // C++ [temp.class.spec.match]p2: - // A partial specialization matches a given actual template - // argument list if the template arguments of the partial - // specialization can be deduced from the actual template argument - // list (14.8.2). - - // Unevaluated SFINAE context. - EnterExpressionEvaluationContext Unevaluated( - *this, Sema::ExpressionEvaluationContext::Unevaluated); - SFINAETrap Trap(*this); - - // This deduction has no relation to any outer instantiation we might be - // performing. - LocalInstantiationScope InstantiationScope(*this); - - SmallVector Deduced; - Deduced.resize(Partial->getTemplateParameters()->size()); - if (TemplateDeductionResult Result = ::DeduceTemplateArguments( - *this, Partial->getTemplateParameters(), - Partial->getTemplateArgs().asArray(), TemplateArgs, Info, Deduced, - /*NumberOfArgumentsMustMatch=*/false); - Result != TemplateDeductionResult::Success) - return Result; - - SmallVector DeducedArgs(Deduced.begin(), Deduced.end()); - InstantiatingTemplate Inst(*this, Info.getLocation(), Partial, DeducedArgs, - Info); - if (Inst.isInvalid()) - return TemplateDeductionResult::InstantiationDepth; - - if (Trap.hasErrorOccurred()) - return TemplateDeductionResult::SubstitutionFailure; - - TemplateDeductionResult Result; - runWithSufficientStackSpace(Info.getLocation(), [&] { - Result = ::FinishTemplateArgumentDeduction(*this, Partial, - /*IsPartialOrdering=*/false, - TemplateArgs, Deduced, Info); - }); - return Result; + return ::DeduceTemplateArguments(*this, Partial, TemplateArgs, Info); } /// Determine whether the given type T is a simple-template-id type. diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 25e9d50bd28dc..1cef8dcb58209 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -80,6 +80,81 @@ struct Response { return R; } }; + +// Retrieve the primary template for a lambda call operator. It's +// unfortunate that we only have the mappings of call operators rather +// than lambda classes. +const FunctionDecl * +getPrimaryTemplateOfGenericLambda(const FunctionDecl *LambdaCallOperator) { + while (true) { + if (auto *FTD = dyn_cast_if_present( + LambdaCallOperator->getDescribedTemplate()); + FTD && FTD->getInstantiatedFromMemberTemplate()) { + LambdaCallOperator = + FTD->getInstantiatedFromMemberTemplate()->getTemplatedDecl(); + } else if (auto *Prev = cast(LambdaCallOperator) + ->getInstantiatedFromMemberFunction()) + LambdaCallOperator = Prev; + else + break; + } + return LambdaCallOperator; +} + +struct EnclosingTypeAliasTemplateDetails { + TypeAliasTemplateDecl *Template = nullptr; + TypeAliasTemplateDecl *PrimaryTypeAliasDecl = nullptr; + ArrayRef AssociatedTemplateArguments; + + explicit operator bool() noexcept { return Template; } +}; + +// Find the enclosing type alias template Decl from CodeSynthesisContexts, as +// well as its primary template and instantiating template arguments. +EnclosingTypeAliasTemplateDetails +getEnclosingTypeAliasTemplateDecl(Sema &SemaRef) { + for (auto &CSC : llvm::reverse(SemaRef.CodeSynthesisContexts)) { + if (CSC.Kind != Sema::CodeSynthesisContext::SynthesisKind:: + TypeAliasTemplateInstantiation) + continue; + EnclosingTypeAliasTemplateDetails Result; + auto *TATD = cast(CSC.Entity), + *Next = TATD->getInstantiatedFromMemberTemplate(); + Result = { + /*Template=*/TATD, + /*PrimaryTypeAliasDecl=*/TATD, + /*AssociatedTemplateArguments=*/CSC.template_arguments(), + }; + while (Next) { + Result.PrimaryTypeAliasDecl = Next; + Next = Next->getInstantiatedFromMemberTemplate(); + } + return Result; + } + return {}; +} + +// Check if we are currently inside of a lambda expression that is +// surrounded by a using alias declaration. e.g. +// template using type = decltype([](auto) { ^ }()); +// By checking if: +// 1. The lambda expression and the using alias declaration share the +// same declaration context. +// 2. They have the same template depth. +// We have to do so since a TypeAliasTemplateDecl (or a TypeAliasDecl) is never +// a DeclContext, nor does it have an associated specialization Decl from which +// we could collect these template arguments. +bool isLambdaEnclosedByTypeAliasDecl( + const FunctionDecl *PrimaryLambdaCallOperator, + const TypeAliasTemplateDecl *PrimaryTypeAliasDecl) { + return cast(PrimaryLambdaCallOperator->getDeclContext()) + ->getTemplateDepth() == + PrimaryTypeAliasDecl->getTemplateDepth() && + getLambdaAwareParentOfDeclContext( + const_cast(PrimaryLambdaCallOperator)) == + PrimaryTypeAliasDecl->getDeclContext(); +} + // Add template arguments from a variable template instantiation. Response HandleVarTemplateSpec(const VarTemplateSpecializationDecl *VarTemplSpec, @@ -176,7 +251,7 @@ HandleClassTemplateSpec(const ClassTemplateSpecializationDecl *ClassTemplSpec, return Response::UseNextDecl(ClassTemplSpec); } -Response HandleFunction(const FunctionDecl *Function, +Response HandleFunction(Sema &SemaRef, const FunctionDecl *Function, MultiLevelTemplateArgumentList &Result, const FunctionDecl *Pattern, bool RelativeToPrimary, bool ForConstraintInstantiation) { @@ -207,8 +282,23 @@ Response HandleFunction(const FunctionDecl *Function, // If this function is a generic lambda specialization, we are done. if (!ForConstraintInstantiation && - isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function)) + isGenericLambdaCallOperatorOrStaticInvokerSpecialization(Function)) { + // TypeAliasTemplateDecls should be taken into account, e.g. + // when we're deducing the return type of a lambda. + // + // template int Value = 0; + // template + // using T = decltype([]() { return Value; }()); + // + if (auto TypeAlias = getEnclosingTypeAliasTemplateDecl(SemaRef)) { + if (isLambdaEnclosedByTypeAliasDecl( + /*PrimaryLambdaCallOperator=*/getPrimaryTemplateOfGenericLambda( + Function), + /*PrimaryTypeAliasDecl=*/TypeAlias.PrimaryTypeAliasDecl)) + return Response::UseNextDecl(Function); + } return Response::Done(); + } } else if (Function->getDescribedFunctionTemplate()) { assert( @@ -283,7 +373,7 @@ Response HandleFunctionTemplateDecl(const FunctionTemplateDecl *FTD, return Response::ChangeDecl(FTD->getLexicalDeclContext()); } -Response HandleRecordDecl(const CXXRecordDecl *Rec, +Response HandleRecordDecl(Sema &SemaRef, const CXXRecordDecl *Rec, MultiLevelTemplateArgumentList &Result, ASTContext &Context, bool ForConstraintInstantiation) { @@ -312,11 +402,39 @@ Response HandleRecordDecl(const CXXRecordDecl *Rec, return Response::ChangeDecl(Rec->getLexicalDeclContext()); } - // This is to make sure we pick up the VarTemplateSpecializationDecl that this - // lambda is defined inside of. - if (Rec->isLambda()) + // This is to make sure we pick up the VarTemplateSpecializationDecl or the + // TypeAliasTemplateDecl that this lambda is defined inside of. + if (Rec->isLambda()) { if (const Decl *LCD = Rec->getLambdaContextDecl()) return Response::ChangeDecl(LCD); + // Retrieve the template arguments for a using alias declaration. + // This is necessary for constraint checking, since we always keep + // constraints relative to the primary template. + if (auto TypeAlias = getEnclosingTypeAliasTemplateDecl(SemaRef)) { + const FunctionDecl *PrimaryLambdaCallOperator = + getPrimaryTemplateOfGenericLambda(Rec->getLambdaCallOperator()); + if (isLambdaEnclosedByTypeAliasDecl(PrimaryLambdaCallOperator, + TypeAlias.PrimaryTypeAliasDecl)) { + Result.addOuterTemplateArguments(TypeAlias.Template, + TypeAlias.AssociatedTemplateArguments, + /*Final=*/false); + // Visit the parent of the current type alias declaration rather than + // the lambda thereof. + // E.g., in the following example: + // struct S { + // template using T = decltype([] {} ()); + // }; + // void foo() { + // S::T var; + // } + // The instantiated lambda expression (which we're visiting at 'var') + // has a function DeclContext 'foo' rather than the Record DeclContext + // S. This seems to be an oversight to me that we may want to set a + // Sema Context from the CXXScopeSpec before substituting into T. + return Response::ChangeDecl(TypeAlias.Template->getDeclContext()); + } + } + } return Response::UseNextDecl(Rec); } @@ -410,10 +528,11 @@ MultiLevelTemplateArgumentList Sema::getTemplateInstantiationArgs( R = HandleClassTemplateSpec(ClassTemplSpec, Result, SkipForSpecialization); } else if (const auto *Function = dyn_cast(CurDecl)) { - R = HandleFunction(Function, Result, Pattern, RelativeToPrimary, + R = HandleFunction(*this, Function, Result, Pattern, RelativeToPrimary, ForConstraintInstantiation); } else if (const auto *Rec = dyn_cast(CurDecl)) { - R = HandleRecordDecl(Rec, Result, Context, ForConstraintInstantiation); + R = HandleRecordDecl(*this, Rec, Result, Context, + ForConstraintInstantiation); } else if (const auto *CSD = dyn_cast(CurDecl)) { R = HandleImplicitConceptSpecializationDecl(CSD, Result); @@ -470,6 +589,7 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const { case BuildingBuiltinDumpStructCall: case LambdaExpressionSubstitution: case BuildingDeductionGuides: + case TypeAliasTemplateInstantiation: return false; // This function should never be called when Kind's value is Memoization. @@ -615,6 +735,15 @@ Sema::InstantiatingTemplate::InstantiatingTemplate( PointOfInstantiation, InstantiationRange, Param, Template, TemplateArgs) {} +Sema::InstantiatingTemplate::InstantiatingTemplate( + Sema &SemaRef, SourceLocation PointOfInstantiation, + TypeAliasTemplateDecl *Entity, ArrayRef TemplateArgs, + SourceRange InstantiationRange) + : InstantiatingTemplate( + SemaRef, CodeSynthesisContext::TypeAliasTemplateInstantiation, + PointOfInstantiation, InstantiationRange, /*Entity=*/Entity, + /*Template=*/nullptr, TemplateArgs) {} + Sema::InstantiatingTemplate::InstantiatingTemplate( Sema &SemaRef, SourceLocation PointOfInstantiation, TemplateDecl *Template, NamedDecl *Param, ArrayRef TemplateArgs, @@ -854,11 +983,6 @@ void Sema::PrintInstantiationStack() { Diags.Report(Active->PointOfInstantiation, diag::note_template_class_instantiation_here) << CTD << Active->InstantiationRange; - } else { - Diags.Report(Active->PointOfInstantiation, - diag::note_template_type_alias_instantiation_here) - << cast(D) - << Active->InstantiationRange; } break; } @@ -1132,6 +1256,12 @@ void Sema::PrintInstantiationStack() { Diags.Report(Active->PointOfInstantiation, diag::note_building_deduction_guide_here); break; + case CodeSynthesisContext::TypeAliasTemplateInstantiation: + Diags.Report(Active->PointOfInstantiation, + diag::note_template_type_alias_instantiation_here) + << cast(Active->Entity) + << Active->InstantiationRange; + break; } } } @@ -1147,12 +1277,13 @@ std::optional Sema::isSFINAEContext() const { ++Active) { switch (Active->Kind) { - case CodeSynthesisContext::TemplateInstantiation: + case CodeSynthesisContext::TypeAliasTemplateInstantiation: // An instantiation of an alias template may or may not be a SFINAE // context, depending on what else is on the stack. if (isa(Active->Entity)) break; [[fallthrough]]; + case CodeSynthesisContext::TemplateInstantiation: case CodeSynthesisContext::DefaultFunctionArgumentInstantiation: case CodeSynthesisContext::ExceptionSpecInstantiation: case CodeSynthesisContext::ConstraintsCheck: @@ -1557,6 +1688,18 @@ namespace { SubstTemplateTypeParmPackTypeLoc TL, bool SuppressObjCLifetime); + CXXRecordDecl::LambdaDependencyKind + ComputeLambdaDependency(LambdaScopeInfo *LSI) { + auto &CCS = SemaRef.CodeSynthesisContexts.back(); + if (CCS.Kind == + Sema::CodeSynthesisContext::TypeAliasTemplateInstantiation) { + unsigned TypeAliasDeclDepth = CCS.Entity->getTemplateDepth(); + if (TypeAliasDeclDepth >= TemplateArgs.getNumSubstitutedLevels()) + return CXXRecordDecl::LambdaDependencyKind::LDK_AlwaysDependent; + } + return inherited::ComputeLambdaDependency(LSI); + } + ExprResult TransformLambdaExpr(LambdaExpr *E) { LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true); Sema::ConstraintEvalRAII RAII(*this); diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index b0e8f836d2b5d..7d65092e6c920 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1582,6 +1582,15 @@ TemplateDeclInstantiator::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { return nullptr; TypeAliasDecl *Pattern = D->getTemplatedDecl(); + Sema::InstantiatingTemplate InstTemplate( + SemaRef, D->getBeginLoc(), D, + D->getTemplateDepth() >= TemplateArgs.getNumLevels() + ? ArrayRef() + : (TemplateArgs.begin() + TemplateArgs.getNumLevels() - 1 - + D->getTemplateDepth()) + ->Args); + if (InstTemplate.isInvalid()) + return nullptr; TypeAliasTemplateDecl *PrevAliasTemplate = nullptr; if (getPreviousDeclForInstantiation(Pattern)) { @@ -5602,6 +5611,14 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, EnterExpressionEvaluationContext EvalContext( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + Qualifiers ThisTypeQuals; + CXXRecordDecl *ThisContext = nullptr; + if (CXXMethodDecl *Method = dyn_cast(Function)) { + ThisContext = Method->getParent(); + ThisTypeQuals = Method->getMethodQualifiers(); + } + CXXThisScopeRAII ThisScope(*this, ThisContext, ThisTypeQuals); + // Introduce a new scope where local variable instantiations will be // recorded, unless we're actually a member function within a local // class, in which case we need to merge our results with the parent diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 17cb804e1268f..935903ba4e3c6 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -768,6 +768,12 @@ class TreeTransform { /// the body. StmtResult SkipLambdaBody(LambdaExpr *E, Stmt *Body); + CXXRecordDecl::LambdaDependencyKind + ComputeLambdaDependency(LambdaScopeInfo *LSI) { + return static_cast( + LSI->Lambda->getLambdaDependencyKind()); + } + QualType TransformReferenceType(TypeLocBuilder &TLB, ReferenceTypeLoc TL); StmtResult TransformCompoundStmt(CompoundStmt *S, bool IsStmtExpr); @@ -3329,12 +3335,13 @@ class TreeTransform { /// Build a new C++ "this" expression. /// - /// By default, builds a new "this" expression without performing any - /// semantic analysis. Subclasses may override this routine to provide - /// different behavior. + /// By default, performs semantic analysis to build a new "this" expression. + /// Subclasses may override this routine to provide different behavior. ExprResult RebuildCXXThisExpr(SourceLocation ThisLoc, QualType ThisType, bool isImplicit) { + if (getSema().CheckCXXThisType(ThisLoc, ThisType)) + return ExprError(); return getSema().BuildCXXThisExpr(ThisLoc, ThisType, isImplicit); } @@ -3912,15 +3919,14 @@ class TreeTransform { FPOptionsOverride()); // Type-check the __builtin_shufflevector expression. - return SemaRef.SemaBuiltinShuffleVector(cast(TheCall.get())); + return SemaRef.BuiltinShuffleVector(cast(TheCall.get())); } /// Build a new convert vector expression. ExprResult RebuildConvertVectorExpr(SourceLocation BuiltinLoc, Expr *SrcExpr, TypeSourceInfo *DstTInfo, SourceLocation RParenLoc) { - return SemaRef.SemaConvertVectorExpr(SrcExpr, DstTInfo, - BuiltinLoc, RParenLoc); + return SemaRef.ConvertVectorExpr(SrcExpr, DstTInfo, BuiltinLoc, RParenLoc); } /// Build a new template argument pack expansion. @@ -4028,17 +4034,10 @@ class TreeTransform { StmtResult RebuildOpenACCComputeConstruct(OpenACCDirectiveKind K, SourceLocation BeginLoc, SourceLocation EndLoc, + ArrayRef Clauses, StmtResult StrBlock) { - getSema().OpenACC().ActOnConstruct(K, BeginLoc); - - // TODO OpenACC: Include clauses. - if (getSema().OpenACC().ActOnStartStmtDirective(K, BeginLoc)) - return StmtError(); - - StrBlock = getSema().OpenACC().ActOnAssociatedStmt(K, StrBlock); - return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, EndLoc, - StrBlock); + Clauses, StrBlock); } private: @@ -4059,6 +4058,10 @@ class TreeTransform { QualType TransformDependentNameType(TypeLocBuilder &TLB, DependentNameTypeLoc TL, bool DeducibleTSTContext); + + llvm::SmallVector + TransformOpenACCClauseList(OpenACCDirectiveKind DirKind, + ArrayRef OldClauses); }; template @@ -11099,16 +11102,38 @@ OMPClause *TreeTransform::TransformOMPXBareClause(OMPXBareClause *C) { //===----------------------------------------------------------------------===// // OpenACC transformation //===----------------------------------------------------------------------===// +template +llvm::SmallVector +TreeTransform::TransformOpenACCClauseList( + OpenACCDirectiveKind DirKind, ArrayRef OldClauses) { + // TODO OpenACC: Ensure we loop through the list and transform the individual + // clauses. + return {}; +} + template StmtResult TreeTransform::TransformOpenACCComputeConstruct( OpenACCComputeConstruct *C) { - // TODO OpenACC: Transform clauses. + getSema().OpenACC().ActOnConstruct(C->getDirectiveKind(), C->getBeginLoc()); + // FIXME: When implementing this for constructs that can take arguments, we + // should do Sema for them here. + + if (getSema().OpenACC().ActOnStartStmtDirective(C->getDirectiveKind(), + C->getBeginLoc())) + return StmtError(); + + llvm::SmallVector TransformedClauses = + getDerived().TransformOpenACCClauseList(C->getDirectiveKind(), + C->clauses()); // Transform Structured Block. StmtResult StrBlock = getDerived().TransformStmt(C->getStructuredBlock()); + StrBlock = + getSema().OpenACC().ActOnAssociatedStmt(C->getDirectiveKind(), StrBlock); return getDerived().RebuildOpenACCComputeConstruct( - C->getDirectiveKind(), C->getBeginLoc(), C->getEndLoc(), StrBlock); + C->getDirectiveKind(), C->getBeginLoc(), C->getEndLoc(), + TransformedClauses, StrBlock); } //===----------------------------------------------------------------------===// @@ -11205,8 +11230,8 @@ TreeTransform::TransformDeclRefExpr(DeclRefExpr *E) { } if (!getDerived().AlwaysRebuild() && - QualifierLoc == E->getQualifierLoc() && - ND == E->getDecl() && + !E->isCapturedByCopyInLambdaWithExplicitObjectParameter() && + QualifierLoc == E->getQualifierLoc() && ND == E->getDecl() && Found == E->getFoundDecl() && NameInfo.getName() == E->getDecl()->getDeclName() && !E->hasExplicitTemplateArgs()) { @@ -12727,9 +12752,17 @@ TreeTransform::TransformCXXThisExpr(CXXThisExpr *E) { // // In other contexts, the type of `this` may be overrided // for type deduction, so we need to recompute it. - QualType T = getSema().getCurLambda() ? - getDerived().TransformType(E->getType()) - : getSema().getCurrentThisType(); + // + // Always recompute the type if we're in the body of a lambda, and + // 'this' is dependent on a lambda's explicit object parameter. + QualType T = [&]() { + auto &S = getSema(); + if (E->isCapturedByCopyInLambdaWithExplicitObjectParameter()) + return S.getCurrentThisType(); + if (S.getCurLambda()) + return getDerived().TransformType(E->getType()); + return S.getCurrentThisType(); + }(); if (!getDerived().AlwaysRebuild() && T == E->getType()) { // Mark it referenced in the new context regardless. @@ -14102,6 +14135,46 @@ TreeTransform::TransformLambdaExpr(LambdaExpr *E) { /*IsInstantiation*/ true); SavedContext.pop(); + // Recompute the dependency of the lambda so that we can defer the lambda call + // construction until after we have all the necessary template arguments. For + // example, given + // + // template struct S { + // template + // using Type = decltype([](U){}(42.0)); + // }; + // void foo() { + // using T = S::Type; + // ^~~~~~ + // } + // + // We would end up here from instantiating S when ensuring its + // completeness. That would transform the lambda call expression regardless of + // the absence of the corresponding argument for U. + // + // Going ahead with unsubstituted type U makes things worse: we would soon + // compare the argument type (which is float) against the parameter U + // somewhere in Sema::BuildCallExpr. Then we would quickly run into a bogus + // error suggesting unmatched types 'U' and 'float'! + // + // That said, everything will be fine if we defer that semantic checking. + // Fortunately, we have such a mechanism that bypasses it if the CallExpr is + // dependent. Since the CallExpr's dependency boils down to the lambda's + // dependency in this case, we can harness that by recomputing the dependency + // from the instantiation arguments. + // + // FIXME: Creating the type of a lambda requires us to have a dependency + // value, which happens before its substitution. We update its dependency + // *after* the substitution in case we can't decide the dependency + // so early, e.g. because we want to see if any of the *substituted* + // parameters are dependent. + DependencyKind = getDerived().ComputeLambdaDependency(&LSICopy); + Class->setLambdaDependencyKind(DependencyKind); + // Clean up the type cache created previously. Then, we re-create a type for + // such Decl with the new DependencyKind. + Class->setTypeForDecl(nullptr); + getSema().Context.getTypeDeclType(Class); + return getSema().BuildLambdaExpr(E->getBeginLoc(), Body.get()->getEndLoc(), &LSICopy); } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 32c2339e4a458..12d5cf29b225c 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -32,6 +32,7 @@ #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/ODRDiagsEmitter.h" #include "clang/AST/ODRHash.h" +#include "clang/AST/OpenACCClause.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/TemplateBase.h" @@ -53,6 +54,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/ObjCRuntime.h" +#include "clang/Basic/OpenACCKinds.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/OperatorKinds.h" #include "clang/Basic/PragmaKinds.h" @@ -2094,7 +2096,7 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d, Module::Header H = {std::string(key.Filename), "", *FE}; ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true); } - HFI.isModuleHeader |= ModuleMap::isModular(HeaderRole); + HFI.mergeModuleMembership(HeaderRole); } // This HeaderFileInfo was externally loaded. @@ -11758,3 +11760,66 @@ void ASTRecordReader::readOMPChildren(OMPChildren *Data) { for (unsigned I = 0, E = Data->getNumChildren(); I < E; ++I) Data->getChildren()[I] = readStmt(); } + +OpenACCClause *ASTRecordReader::readOpenACCClause() { + OpenACCClauseKind ClauseKind = readEnum(); + // TODO OpenACC: We don't have these used anywhere, but eventually we should + // be constructing the Clauses with them, so these attributes can go away at + // that point. + [[maybe_unused]] SourceLocation BeginLoc = readSourceLocation(); + [[maybe_unused]] SourceLocation EndLoc = readSourceLocation(); + + switch (ClauseKind) { + case OpenACCClauseKind::Default: + case OpenACCClauseKind::Finalize: + case OpenACCClauseKind::IfPresent: + case OpenACCClauseKind::Seq: + case OpenACCClauseKind::Independent: + case OpenACCClauseKind::Auto: + case OpenACCClauseKind::Worker: + case OpenACCClauseKind::Vector: + case OpenACCClauseKind::NoHost: + case OpenACCClauseKind::If: + case OpenACCClauseKind::Self: + case OpenACCClauseKind::Copy: + case OpenACCClauseKind::UseDevice: + case OpenACCClauseKind::Attach: + case OpenACCClauseKind::Delete: + case OpenACCClauseKind::Detach: + case OpenACCClauseKind::Device: + case OpenACCClauseKind::DevicePtr: + case OpenACCClauseKind::DeviceResident: + case OpenACCClauseKind::FirstPrivate: + case OpenACCClauseKind::Host: + case OpenACCClauseKind::Link: + case OpenACCClauseKind::NoCreate: + case OpenACCClauseKind::Present: + case OpenACCClauseKind::Private: + case OpenACCClauseKind::CopyOut: + case OpenACCClauseKind::CopyIn: + case OpenACCClauseKind::Create: + case OpenACCClauseKind::Reduction: + case OpenACCClauseKind::Collapse: + case OpenACCClauseKind::Bind: + case OpenACCClauseKind::VectorLength: + case OpenACCClauseKind::NumGangs: + case OpenACCClauseKind::NumWorkers: + case OpenACCClauseKind::DeviceNum: + case OpenACCClauseKind::DefaultAsync: + case OpenACCClauseKind::DeviceType: + case OpenACCClauseKind::DType: + case OpenACCClauseKind::Async: + case OpenACCClauseKind::Tile: + case OpenACCClauseKind::Gang: + case OpenACCClauseKind::Wait: + case OpenACCClauseKind::Invalid: + llvm_unreachable("Clause serialization not yet implemented"); + } + llvm_unreachable("Invalid Clause Kind"); +} + +void ASTRecordReader::readOpenACCClauseList( + MutableArrayRef Clauses) { + for (unsigned I = 0; I < Clauses.size(); ++I) + Clauses[I] = readOpenACCClause(); +} diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp index a22f760408c63..78448855fba09 100644 --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -94,8 +94,6 @@ namespace clang { GlobalDeclID NamedDeclForTagDecl = 0; IdentifierInfo *TypedefNameForLinkage = nullptr; - bool HasPendingBody = false; - ///A flag to carry the information for a decl from the entity is /// used. We use it to delay the marking of the canonical decl as used until /// the entire declaration is deserialized and merged. @@ -314,9 +312,6 @@ namespace clang { static void markIncompleteDeclChainImpl(Redeclarable *D); static void markIncompleteDeclChainImpl(...); - /// Determine whether this declaration has a pending body. - bool hasPendingBody() const { return HasPendingBody; } - void ReadFunctionDefinition(FunctionDecl *FD); void Visit(Decl *D); @@ -541,7 +536,6 @@ void ASTDeclReader::ReadFunctionDefinition(FunctionDecl *FD) { } // Store the offset of the body so we can lazily load it later. Reader.PendingBodies[FD] = GetCurrentCursorOffset(); - HasPendingBody = true; } void ASTDeclReader::Visit(Decl *D) { @@ -1164,7 +1158,6 @@ void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) { // Load the body on-demand. Most clients won't care, because method // definitions rarely show up in headers. Reader.PendingBodies[MD] = GetCurrentCursorOffset(); - HasPendingBody = true; } MD->setSelfDecl(readDeclAs()); MD->setCmdDecl(readDeclAs()); @@ -4156,8 +4149,7 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) { // AST consumer might need to know about, queue it. // We don't pass it to the consumer immediately because we may be in recursive // loading, and some declarations may still be initializing. - PotentiallyInterestingDecls.push_back( - InterestingDecl(D, Reader.hasPendingBody())); + PotentiallyInterestingDecls.push_back(D); return D; } @@ -4179,10 +4171,10 @@ void ASTReader::PassInterestingDeclsToConsumer() { EagerlyDeserializedDecls.clear(); while (!PotentiallyInterestingDecls.empty()) { - InterestingDecl D = PotentiallyInterestingDecls.front(); + Decl *D = PotentiallyInterestingDecls.front(); PotentiallyInterestingDecls.pop_front(); - if (isConsumerInterestedIn(getContext(), D.getDecl(), D.hasPendingBody())) - PassInterestingDeclToConsumer(D.getDecl()); + if (isConsumerInterestedIn(getContext(), D, PendingBodies.count(D))) + PassInterestingDeclToConsumer(D); } } @@ -4239,9 +4231,8 @@ void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { // We might have made this declaration interesting. If so, remember that // we need to hand it off to the consumer. if (!WasInteresting && - isConsumerInterestedIn(getContext(), D, Reader.hasPendingBody())) { - PotentiallyInterestingDecls.push_back( - InterestingDecl(D, Reader.hasPendingBody())); + isConsumerInterestedIn(getContext(), D, PendingBodies.count(D))) { + PotentiallyInterestingDecls.push_back(D); WasInteresting = true; } } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index fb37c573bdca4..47471adf64ef3 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1873,6 +1873,7 @@ void ASTStmtReader::VisitCXXThisExpr(CXXThisExpr *E) { VisitExpr(E); E->setLocation(readSourceLocation()); E->setImplicit(Record.readInt()); + E->setCapturedByCopyInLambdaWithExplicitObjectParameter(Record.readInt()); } void ASTStmtReader::VisitCXXThrowExpr(CXXThrowExpr *E) { @@ -2816,9 +2817,10 @@ void ASTStmtReader::VisitOMPTargetParallelGenericLoopDirective( // OpenACC Constructs/Directives. //===----------------------------------------------------------------------===// void ASTStmtReader::VisitOpenACCConstructStmt(OpenACCConstructStmt *S) { + (void)Record.readInt(); S->Kind = Record.readEnum(); S->Range = Record.readSourceRange(); - // TODO OpenACC: Deserialize Clauses. + Record.readOpenACCClauseList(S->Clauses); } void ASTStmtReader::VisitOpenACCAssociatedStmtConstruct( @@ -4270,10 +4272,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = new (Context) ConceptSpecializationExpr(Empty); break; } - case STMT_OPENACC_COMPUTE_CONSTRUCT: - S = OpenACCComputeConstruct::CreateEmpty(Context, Empty); + case STMT_OPENACC_COMPUTE_CONSTRUCT: { + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields]; + S = OpenACCComputeConstruct::CreateEmpty(Context, NumClauses); break; - + } case EXPR_REQUIRES: unsigned numLocalParameters = Record[ASTStmtReader::NumExprFields]; unsigned numRequirement = Record[ASTStmtReader::NumExprFields + 1]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ba6a8a5e16e4e..baf03f69d7306 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -29,6 +29,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/OpenACCClause.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/TemplateName.h" @@ -44,6 +45,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/Module.h" #include "clang/Basic/ObjCRuntime.h" +#include "clang/Basic/OpenACCKinds.h" #include "clang/Basic/OpenCLOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" @@ -7397,3 +7399,63 @@ void ASTRecordWriter::writeOMPChildren(OMPChildren *Data) { for (unsigned I = 0, E = Data->getNumChildren(); I < E; ++I) AddStmt(Data->getChildren()[I]); } + +void ASTRecordWriter::writeOpenACCClause(const OpenACCClause *C) { + writeEnum(C->getClauseKind()); + writeSourceLocation(C->getBeginLoc()); + writeSourceLocation(C->getEndLoc()); + + switch (C->getClauseKind()) { + case OpenACCClauseKind::Default: + case OpenACCClauseKind::Finalize: + case OpenACCClauseKind::IfPresent: + case OpenACCClauseKind::Seq: + case OpenACCClauseKind::Independent: + case OpenACCClauseKind::Auto: + case OpenACCClauseKind::Worker: + case OpenACCClauseKind::Vector: + case OpenACCClauseKind::NoHost: + case OpenACCClauseKind::If: + case OpenACCClauseKind::Self: + case OpenACCClauseKind::Copy: + case OpenACCClauseKind::UseDevice: + case OpenACCClauseKind::Attach: + case OpenACCClauseKind::Delete: + case OpenACCClauseKind::Detach: + case OpenACCClauseKind::Device: + case OpenACCClauseKind::DevicePtr: + case OpenACCClauseKind::DeviceResident: + case OpenACCClauseKind::FirstPrivate: + case OpenACCClauseKind::Host: + case OpenACCClauseKind::Link: + case OpenACCClauseKind::NoCreate: + case OpenACCClauseKind::Present: + case OpenACCClauseKind::Private: + case OpenACCClauseKind::CopyOut: + case OpenACCClauseKind::CopyIn: + case OpenACCClauseKind::Create: + case OpenACCClauseKind::Reduction: + case OpenACCClauseKind::Collapse: + case OpenACCClauseKind::Bind: + case OpenACCClauseKind::VectorLength: + case OpenACCClauseKind::NumGangs: + case OpenACCClauseKind::NumWorkers: + case OpenACCClauseKind::DeviceNum: + case OpenACCClauseKind::DefaultAsync: + case OpenACCClauseKind::DeviceType: + case OpenACCClauseKind::DType: + case OpenACCClauseKind::Async: + case OpenACCClauseKind::Tile: + case OpenACCClauseKind::Gang: + case OpenACCClauseKind::Wait: + case OpenACCClauseKind::Invalid: + llvm_unreachable("Clause serialization not yet implemented"); + } + llvm_unreachable("Invalid Clause Kind"); +} + +void ASTRecordWriter::writeOpenACCClauseList( + ArrayRef Clauses) { + for (const OpenACCClause *Clause : Clauses) + writeOpenACCClause(Clause); +} diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index b7d8a39c92e22..a081eb48d24bf 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1876,6 +1876,7 @@ void ASTStmtWriter::VisitCXXThisExpr(CXXThisExpr *E) { VisitExpr(E); Record.AddSourceLocation(E->getLocation()); Record.push_back(E->isImplicit()); + Record.push_back(E->isCapturedByCopyInLambdaWithExplicitObjectParameter()); Code = serialization::EXPR_CXX_THIS; } @@ -2876,9 +2877,10 @@ void ASTStmtWriter::VisitOMPTargetParallelGenericLoopDirective( // OpenACC Constructs/Directives. //===----------------------------------------------------------------------===// void ASTStmtWriter::VisitOpenACCConstructStmt(OpenACCConstructStmt *S) { + Record.push_back(S->clauses().size()); Record.writeEnum(S->Kind); Record.AddSourceRange(S->Range); - // TODO OpenACC: Serialize Clauses. + Record.writeOpenACCClauseList(S->clauses()); } void ASTStmtWriter::VisitOpenACCAssociatedStmtConstruct( diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 59be236ca1c76..63844563de44f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -124,34 +124,45 @@ class CStringChecker : public Checker< eval::Call, const CallEvent &)>; CallDescriptionMap Callbacks = { - {{CDM::CLibrary, {"memcpy"}, 3}, + {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3}, std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)}, - {{CDM::CLibrary, {"wmemcpy"}, 3}, + {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3}, std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)}, - {{CDM::CLibrary, {"mempcpy"}, 3}, + {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3}, std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)}, - {{CDM::Unspecified, {"wmempcpy"}, 3}, + {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3}, std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)}, {{CDM::CLibrary, {"memcmp"}, 3}, std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, {{CDM::CLibrary, {"wmemcmp"}, 3}, std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)}, - {{CDM::CLibrary, {"memmove"}, 3}, + {{CDM::CLibraryMaybeHardened, {"memmove"}, 3}, std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)}, - {{CDM::CLibrary, {"wmemmove"}, 3}, + {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3}, std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)}, - {{CDM::CLibrary, {"memset"}, 3}, &CStringChecker::evalMemset}, + {{CDM::CLibraryMaybeHardened, {"memset"}, 3}, + &CStringChecker::evalMemset}, {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset}, - {{CDM::CLibrary, {"strcpy"}, 2}, &CStringChecker::evalStrcpy}, - {{CDM::CLibrary, {"strncpy"}, 3}, &CStringChecker::evalStrncpy}, - {{CDM::CLibrary, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy}, - {{CDM::CLibrary, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy}, - {{CDM::CLibrary, {"strcat"}, 2}, &CStringChecker::evalStrcat}, - {{CDM::CLibrary, {"strncat"}, 3}, &CStringChecker::evalStrncat}, - {{CDM::CLibrary, {"strlcat"}, 3}, &CStringChecker::evalStrlcat}, - {{CDM::CLibrary, {"strlen"}, 1}, &CStringChecker::evalstrLength}, + // FIXME: C23 introduces 'memset_explicit', maybe also model that + {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2}, + &CStringChecker::evalStrcpy}, + {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3}, + &CStringChecker::evalStrncpy}, + {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2}, + &CStringChecker::evalStpcpy}, + {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3}, + &CStringChecker::evalStrlcpy}, + {{CDM::CLibraryMaybeHardened, {"strcat"}, 2}, + &CStringChecker::evalStrcat}, + {{CDM::CLibraryMaybeHardened, {"strncat"}, 3}, + &CStringChecker::evalStrncat}, + {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3}, + &CStringChecker::evalStrlcat}, + {{CDM::CLibraryMaybeHardened, {"strlen"}, 1}, + &CStringChecker::evalstrLength}, {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength}, - {{CDM::CLibrary, {"strnlen"}, 2}, &CStringChecker::evalstrnLength}, + {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2}, + &CStringChecker::evalstrnLength}, {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength}, {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp}, {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp}, @@ -162,9 +173,19 @@ class CStringChecker : public Checker< eval::Call, {{CDM::CLibrary, {"bcmp"}, 3}, std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero}, - {{CDM::CLibrary, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero}, - {{CDM::CLibrary, {"sprintf"}, 2}, &CStringChecker::evalSprintf}, - {{CDM::CLibrary, {"snprintf"}, 2}, &CStringChecker::evalSnprintf}, + {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2}, + &CStringChecker::evalBzero}, + + // When recognizing calls to the following variadic functions, we accept + // any number of arguments in the call (std::nullopt = accept any + // number), but check that in the declaration there are 2 and 3 + // parameters respectively. (Note that the parameter count does not + // include the "...". Calls where the number of arguments is too small + // will be discarded by the callback.) + {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2}, + &CStringChecker::evalSprintf}, + {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3}, + &CStringChecker::evalSnprintf}, }; // These require a bit of special handling. @@ -218,7 +239,7 @@ class CStringChecker : public Checker< eval::Call, void evalSprintf(CheckerContext &C, const CallEvent &Call) const; void evalSnprintf(CheckerContext &C, const CallEvent &Call) const; void evalSprintfCommon(CheckerContext &C, const CallEvent &Call, - bool IsBounded, bool IsBuiltin) const; + bool IsBounded) const; // Utility methods std::pair @@ -2467,27 +2488,26 @@ void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const { void CStringChecker::evalSprintf(CheckerContext &C, const CallEvent &Call) const { CurrentFunctionDescription = "'sprintf'"; - const auto *CE = cast(Call.getOriginExpr()); - bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk; - evalSprintfCommon(C, Call, /* IsBounded */ false, IsBI); + evalSprintfCommon(C, Call, /* IsBounded = */ false); } void CStringChecker::evalSnprintf(CheckerContext &C, const CallEvent &Call) const { CurrentFunctionDescription = "'snprintf'"; - const auto *CE = cast(Call.getOriginExpr()); - bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk; - evalSprintfCommon(C, Call, /* IsBounded */ true, IsBI); + evalSprintfCommon(C, Call, /* IsBounded = */ true); } void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call, - bool IsBounded, bool IsBuiltin) const { + bool IsBounded) const { ProgramStateRef State = C.getState(); const auto *CE = cast(Call.getOriginExpr()); DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}}; const auto NumParams = Call.parameters().size(); - assert(CE->getNumArgs() >= NumParams); + if (CE->getNumArgs() < NumParams) { + // This is an invalid call, let's just ignore it. + return; + } const auto AllArguments = llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs()); diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index 4c48fdf498f7f..89054512d65ad 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -718,20 +718,23 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { {{{"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{{"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrncat)}}, + {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncat)}}, TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrlcpy)}}, + {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrlcpy)}}, TR::Prop({{1, 2}}, {{0}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrlcat)}}, + {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrlcat)}}, TR::Prop({{1, 2}}, {{0}})}, - {{CDM::CLibrary, {{"snprintf"}}}, + {{CDM::CLibraryMaybeHardened, {{"snprintf"}}}, TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {{"sprintf"}}}, + {{CDM::CLibraryMaybeHardened, {{"sprintf"}}}, TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {{"strcpy"}}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {{"stpcpy"}}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {{"strcat"}}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {{"wcsncat"}}}, + {{CDM::CLibraryMaybeHardened, {{"strcpy"}}}, + TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {{"stpcpy"}}}, + TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {{"strcat"}}}, + TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {{"wcsncat"}}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, {{CDM::CLibrary, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{CDM::CLibrary, {{"strdupa"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, @@ -759,13 +762,13 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, // SinkProps - {{CDM::CLibrary, BI.getName(Builtin::BImemcpy)}, + {{CDM::CLibraryMaybeHardened, BI.getName(Builtin::BImemcpy)}, TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, MsgTaintedBufferSize)}, - {{CDM::CLibrary, {BI.getName(Builtin::BImemmove)}}, + {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BImemmove)}}, TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, MsgTaintedBufferSize)}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrncpy)}}, + {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncpy)}}, TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, MsgTaintedBufferSize)}, {{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}}, diff --git a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp index c3acb73ba7175..086c3e5e49b77 100644 --- a/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/RetainCountChecker/RetainCountDiagnostics.cpp @@ -977,7 +977,7 @@ void RefLeakReport::findBindingToReport(CheckerContext &Ctx, // something like derived regions if we want to construct SVal from // Sym. Instead, we take the value that is definitely stored in that // region, thus guaranteeing that trackStoredValue will work. - bugreporter::trackStoredValue(AllVarBindings[0].second.castAs(), + bugreporter::trackStoredValue(AllVarBindings[0].second, AllocBindingToReport, *this); } else { AllocBindingToReport = AllocFirstBinding; diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index 6cc8867945814..845a5f9b390dc 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -30,7 +30,7 @@ // was not consciously intended, and therefore it might have been unreachable. // // This checker uses eval::Call for modeling pure functions (functions without -// side effets), for which their `Summary' is a precise model. This avoids +// side effects), for which their `Summary' is a precise model. This avoids // unnecessary invalidation passes. Conflicts with other checkers are unlikely // because if the function has no other effects, other checkers would probably // never want to improve upon the modeling done by this checker. diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 069e3a633c121..31c756ab0c581 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -297,6 +297,9 @@ class StreamChecker : public Checker FnDescriptions = { {{{"fopen"}, 2}, {nullptr, &StreamChecker::evalFopen, ArgNone}}, @@ -945,6 +948,10 @@ void StreamChecker::evalFreadFwrite(const FnDescription *Desc, } // Add transition for the failed state. + // At write, add failure case only if "pedantic mode" is on. + if (!IsFread && !PedanticMode) + return; + NonLoc RetVal = makeRetVal(C, E.CE).castAs(); ProgramStateRef StateFailed = State->BindExpr(E.CE, C.getLocationContext(), RetVal); @@ -1057,6 +1064,9 @@ void StreamChecker::evalFputx(const FnDescription *Desc, const CallEvent &Call, C.addTransition(StateNotFailed); } + if (!PedanticMode) + return; + // Add transition for the failed state. The resulting value of the file // position indicator for the stream is indeterminate. ProgramStateRef StateFailed = E.bindReturnValue(State, C, *EofVal); @@ -1092,6 +1102,9 @@ void StreamChecker::evalFprintf(const FnDescription *Desc, E.setStreamState(StateNotFailed, StreamState::getOpened(Desc)); C.addTransition(StateNotFailed); + if (!PedanticMode) + return; + // Add transition for the failed state. The resulting value of the file // position indicator for the stream is indeterminate. StateFailed = E.setStreamState( @@ -1264,21 +1277,23 @@ void StreamChecker::evalFseek(const FnDescription *Desc, const CallEvent &Call, if (!E.Init(Desc, Call, C, State)) return; - // Bifurcate the state into failed and non-failed. - // Return zero on success, -1 on error. + // Add success state. ProgramStateRef StateNotFailed = E.bindReturnValue(State, C, 0); - ProgramStateRef StateFailed = E.bindReturnValue(State, C, -1); - // No failure: Reset the state to opened with no error. StateNotFailed = E.setStreamState(StateNotFailed, StreamState::getOpened(Desc)); C.addTransition(StateNotFailed); + if (!PedanticMode) + return; + + // Add failure state. // At error it is possible that fseek fails but sets none of the error flags. // If fseek failed, assume that the file position becomes indeterminate in any // case. // It is allowed to set the position beyond the end of the file. EOF error // should not occur. + ProgramStateRef StateFailed = E.bindReturnValue(State, C, -1); StateFailed = E.setStreamState( StateFailed, StreamState::getOpened(Desc, ErrorNone | ErrorFError, true)); C.addTransition(StateFailed, E.getFailureNoteTag(this, C)); @@ -1316,6 +1331,10 @@ void StreamChecker::evalFsetpos(const FnDescription *Desc, StateNotFailed = E.setStreamState( StateNotFailed, StreamState::getOpened(Desc, ErrorNone, false)); + C.addTransition(StateNotFailed); + + if (!PedanticMode) + return; // At failure ferror could be set. // The standards do not tell what happens with the file position at failure. @@ -1324,7 +1343,6 @@ void StreamChecker::evalFsetpos(const FnDescription *Desc, StateFailed = E.setStreamState( StateFailed, StreamState::getOpened(Desc, ErrorNone | ErrorFError, true)); - C.addTransition(StateNotFailed); C.addTransition(StateFailed, E.getFailureNoteTag(this, C)); } @@ -1794,7 +1812,9 @@ ProgramStateRef StreamChecker::checkPointerEscape( //===----------------------------------------------------------------------===// void ento::registerStreamChecker(CheckerManager &Mgr) { - Mgr.registerChecker(); + auto *Checker = Mgr.registerChecker(); + Checker->PedanticMode = + Mgr.getAnalyzerOptions().getCheckerBooleanOption(Checker, "Pedantic"); } bool ento::shouldRegisterStreamChecker(const CheckerManager &Mgr) { diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp index a0822513a6d02..984755fa7e502 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp @@ -1238,7 +1238,7 @@ class StoreSiteFinder final : public TrackingBugReporterVisitor { /// changes to its value in a nested stackframe could be pruned, and /// this visitor can prevent that without polluting the bugpath too /// much. - StoreSiteFinder(bugreporter::TrackerRef ParentTracker, KnownSVal V, + StoreSiteFinder(bugreporter::TrackerRef ParentTracker, SVal V, const MemRegion *R, TrackingOptions Options, const StackFrameContext *OriginSFC = nullptr) : TrackingBugReporterVisitor(ParentTracker), R(R), V(V), Options(Options), @@ -2539,9 +2539,9 @@ class DefaultExpressionHandler final : public ExpressionHandler { Report.addVisitor(L->getRegion()); Result.FoundSomethingToTrack = true; - if (auto KV = RVal.getAs()) + if (!RVal.isUnknown()) Result.combineWith( - getParentTracker().track(*KV, L->getRegion(), Opts, SFC)); + getParentTracker().track(RVal, L->getRegion(), Opts, SFC)); } const MemRegion *RegionRVal = RVal.getAsRegion(); @@ -2663,8 +2663,8 @@ Tracker::Result Tracker::track(const Expr *E, const ExplodedNode *N, Tracker::Result Tracker::track(SVal V, const MemRegion *R, TrackingOptions Opts, const StackFrameContext *Origin) { - if (auto KV = V.getAs()) { - Report.addVisitor(this, *KV, R, Opts, Origin); + if (!V.isUnknown()) { + Report.addVisitor(this, V, R, Opts, Origin); return {true}; } return {}; @@ -2692,7 +2692,7 @@ bool bugreporter::trackExpressionValue(const ExplodedNode *InputNode, .FoundSomethingToTrack; } -void bugreporter::trackStoredValue(KnownSVal V, const MemRegion *R, +void bugreporter::trackStoredValue(SVal V, const MemRegion *R, PathSensitiveBugReport &Report, TrackingOptions Opts, const StackFrameContext *Origin) { diff --git a/clang/lib/StaticAnalyzer/Core/CallDescription.cpp b/clang/lib/StaticAnalyzer/Core/CallDescription.cpp index 459e854cd44d9..0bb0fe66e54ff 100644 --- a/clang/lib/StaticAnalyzer/Core/CallDescription.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallDescription.cpp @@ -75,6 +75,48 @@ bool ento::CallDescription::matchesAsWritten(const CallExpr &CE) const { return matchesImpl(FD, CE.getNumArgs(), FD->param_size()); } +bool ento::CallDescription::matchNameOnly(const NamedDecl *ND) const { + DeclarationName Name = ND->getDeclName(); + if (const auto *NameII = Name.getAsIdentifierInfo()) { + if (!II) + II = &ND->getASTContext().Idents.get(getFunctionName()); + + return NameII == *II; // Fast case. + } + + // Fallback to the slow stringification and comparison for: + // C++ overloaded operators, constructors, destructors, etc. + // FIXME This comparison is way SLOWER than comparing pointers. + // At some point in the future, we should compare FunctionDecl pointers. + return Name.getAsString() == getFunctionName(); +} + +bool ento::CallDescription::matchQualifiedNameParts(const Decl *D) const { + const auto FindNextNamespaceOrRecord = + [](const DeclContext *Ctx) -> const DeclContext * { + while (Ctx && !isa(Ctx)) + Ctx = Ctx->getParent(); + return Ctx; + }; + + auto QualifierPartsIt = begin_qualified_name_parts(); + const auto QualifierPartsEndIt = end_qualified_name_parts(); + + // Match namespace and record names. Skip unrelated names if they don't + // match. + const DeclContext *Ctx = FindNextNamespaceOrRecord(D->getDeclContext()); + for (; Ctx && QualifierPartsIt != QualifierPartsEndIt; + Ctx = FindNextNamespaceOrRecord(Ctx->getParent())) { + // If not matched just continue and try matching for the next one. + if (cast(Ctx)->getName() != *QualifierPartsIt) + continue; + ++QualifierPartsIt; + } + + // We matched if we consumed all expected qualifier segments. + return QualifierPartsIt == QualifierPartsEndIt; +} + bool ento::CallDescription::matchesImpl(const FunctionDecl *FD, size_t ArgCount, size_t ParamCount) const { if (!FD) @@ -88,76 +130,34 @@ bool ento::CallDescription::matchesImpl(const FunctionDecl *FD, size_t ArgCount, if (MatchAs == Mode::CXXMethod && !isMethod) return false; - if (MatchAs == Mode::CLibrary) { - return CheckerContext::isCLibraryFunction(FD, getFunctionName()) && - (!RequiredArgs || *RequiredArgs <= ArgCount) && - (!RequiredParams || *RequiredParams <= ParamCount); - } - - if (!II) { - II = &FD->getASTContext().Idents.get(getFunctionName()); - } - - const auto MatchNameOnly = [](const CallDescription &CD, - const NamedDecl *ND) -> bool { - DeclarationName Name = ND->getDeclName(); - if (const auto *II = Name.getAsIdentifierInfo()) - return II == *CD.II; // Fast case. - - // Fallback to the slow stringification and comparison for: - // C++ overloaded operators, constructors, destructors, etc. - // FIXME This comparison is way SLOWER than comparing pointers. - // At some point in the future, we should compare FunctionDecl pointers. - return Name.getAsString() == CD.getFunctionName(); - }; - - const auto ExactMatchArgAndParamCounts = - [](size_t ArgCount, size_t ParamCount, - const CallDescription &CD) -> bool { - const bool ArgsMatch = !CD.RequiredArgs || *CD.RequiredArgs == ArgCount; - const bool ParamsMatch = - !CD.RequiredParams || *CD.RequiredParams == ParamCount; - return ArgsMatch && ParamsMatch; - }; - - const auto MatchQualifiedNameParts = [](const CallDescription &CD, - const Decl *D) -> bool { - const auto FindNextNamespaceOrRecord = - [](const DeclContext *Ctx) -> const DeclContext * { - while (Ctx && !isa(Ctx)) - Ctx = Ctx->getParent(); - return Ctx; - }; - - auto QualifierPartsIt = CD.begin_qualified_name_parts(); - const auto QualifierPartsEndIt = CD.end_qualified_name_parts(); - - // Match namespace and record names. Skip unrelated names if they don't - // match. - const DeclContext *Ctx = FindNextNamespaceOrRecord(D->getDeclContext()); - for (; Ctx && QualifierPartsIt != QualifierPartsEndIt; - Ctx = FindNextNamespaceOrRecord(Ctx->getParent())) { - // If not matched just continue and try matching for the next one. - if (cast(Ctx)->getName() != *QualifierPartsIt) - continue; - ++QualifierPartsIt; + if (MatchAs == Mode::CLibraryMaybeHardened) { + // In addition to accepting FOO() with CLibrary rules, we also want to + // accept calls to __FOO_chk() and __builtin___FOO_chk(). + if (CheckerContext::isCLibraryFunction(FD) && + CheckerContext::isHardenedVariantOf(FD, getFunctionName())) { + // Check that the actual argument/parameter counts are greater or equal + // to the required counts. (Setting a requirement to std::nullopt matches + // anything, so in that case value_or ensures that the value is compared + // with itself.) + return (RequiredArgs.value_or(ArgCount) <= ArgCount && + RequiredParams.value_or(ParamCount) <= ParamCount); } + } - // We matched if we consumed all expected qualifier segments. - return QualifierPartsIt == QualifierPartsEndIt; - }; - - // Let's start matching... - if (!ExactMatchArgAndParamCounts(ArgCount, ParamCount, *this)) + if (RequiredArgs.value_or(ArgCount) != ArgCount || + RequiredParams.value_or(ParamCount) != ParamCount) return false; - if (!MatchNameOnly(*this, FD)) + if (MatchAs == Mode::CLibrary || MatchAs == Mode::CLibraryMaybeHardened) + return CheckerContext::isCLibraryFunction(FD, getFunctionName()); + + if (!matchNameOnly(FD)) return false; if (!hasQualifiedNameParts()) return true; - return MatchQualifiedNameParts(*this, FD); + return matchQualifiedNameParts(FD); } ento::CallDescriptionSet::CallDescriptionSet( diff --git a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp index 1a9bff529e9bb..113abcd4c2ab0 100644 --- a/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerContext.cpp @@ -110,13 +110,24 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD, if (FName.starts_with("__inline") && FName.contains(Name)) return true; - if (FName.starts_with("__") && FName.ends_with("_chk") && - FName.contains(Name)) - return true; - return false; } +bool CheckerContext::isHardenedVariantOf(const FunctionDecl *FD, + StringRef Name) { + const IdentifierInfo *II = FD->getIdentifier(); + if (!II) + return false; + + auto CompletelyMatchesParts = [II](auto... Parts) -> bool { + StringRef FName = II->getName(); + return (FName.consume_front(Parts) && ...) && FName.empty(); + }; + + return CompletelyMatchesParts("__", Name, "_chk") || + CompletelyMatchesParts("__builtin_", "__", Name, "_chk"); +} + StringRef CheckerContext::getMacroNameOrSpelling(SourceLocation &Loc) { if (Loc.isMacroID()) return Lexer::getImmediateMacroName(Loc, getSourceManager(), diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp index 492b8f1e2b386..32850f5eea92a 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -363,22 +363,17 @@ class DependencyScanningAction : public tooling::ToolAction { PrebuiltModuleVFSMap, ScanInstance.getDiagnostics())) return false; - auto AdjustCI = [&](CompilerInstance &CI) { - // Set up the dependency scanning file system callback if requested. - if (DepFS) { - auto GetDependencyDirectives = [LocalDepFS = DepFS](FileEntryRef File) - -> std::optional> { - if (llvm::ErrorOr Entry = - LocalDepFS->getOrCreateFileSystemEntry(File.getName())) - if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry)) - return Entry->getDirectiveTokens(); - return std::nullopt; - }; - - CI.getPreprocessor().setDependencyDirectivesFn( - std::move(GetDependencyDirectives)); - } - }; + // Use the dependency scanning optimized file system if requested to do so. + if (DepFS) + ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile = + [LocalDepFS = DepFS](FileEntryRef File) + -> std::optional> { + if (llvm::ErrorOr Entry = + LocalDepFS->getOrCreateFileSystemEntry(File.getName())) + if (LocalDepFS->ensureDirectiveTokensArePopulated(*Entry)) + return Entry->getDirectiveTokens(); + return std::nullopt; + }; // Create the dependency collector that will collect the produced // dependencies. @@ -430,11 +425,9 @@ class DependencyScanningAction : public tooling::ToolAction { std::unique_ptr Action; if (ModuleName) - Action = std::make_unique( - *ModuleName, std::move(AdjustCI)); + Action = std::make_unique(*ModuleName); else - Action = - std::make_unique(std::move(AdjustCI)); + Action = std::make_unique(); if (ScanInstance.getDiagnostics().hasErrorOccurred()) return false; diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index eb5c50c35428f..94ccbd3351b09 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -175,6 +175,9 @@ makeCommonInvocationForModuleBuild(CompilerInvocation CI) { CI.getCodeGenOpts().CoverageCompilationDir.clear(); CI.getCodeGenOpts().CoverageDataFile.clear(); CI.getCodeGenOpts().CoverageNotesFile.clear(); + CI.getCodeGenOpts().ProfileInstrumentUsePath.clear(); + CI.getCodeGenOpts().SampleProfileFile.clear(); + CI.getCodeGenOpts().ProfileRemappingFile.clear(); } // Map output paths that affect behaviour to "-" so their existence is in the diff --git a/clang/lib/Tooling/Transformer/SourceCode.cpp b/clang/lib/Tooling/Transformer/SourceCode.cpp index 6aae834b0db56..ab7184c2c069e 100644 --- a/clang/lib/Tooling/Transformer/SourceCode.cpp +++ b/clang/lib/Tooling/Transformer/SourceCode.cpp @@ -101,6 +101,54 @@ static bool spelledInMacroDefinition(SourceLocation Loc, return false; } +// Returns the expansion char-range of `Loc` if `Loc` is a split token. For +// example, `>>` in nested templates needs the first `>` to be split, otherwise +// the `SourceLocation` of the token would lex as `>>` instead of `>`. +static std::optional +getExpansionForSplitToken(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + if (Loc.isMacroID()) { + bool Invalid = false; + auto &SLoc = SM.getSLocEntry(SM.getFileID(Loc), &Invalid); + if (Invalid) + return std::nullopt; + if (auto &Expansion = SLoc.getExpansion(); + !Expansion.isExpansionTokenRange()) { + // A char-range expansion is only used where a token-range would be + // incorrect, and so identifies this as a split token (and importantly, + // not as a macro). + return Expansion.getExpansionLocRange(); + } + } + return std::nullopt; +} + +// If `Range` covers a split token, returns the expansion range, otherwise +// returns `Range`. +static CharSourceRange getRangeForSplitTokens(CharSourceRange Range, + const SourceManager &SM, + const LangOptions &LangOpts) { + if (Range.isTokenRange()) { + auto BeginToken = getExpansionForSplitToken(Range.getBegin(), SM, LangOpts); + auto EndToken = getExpansionForSplitToken(Range.getEnd(), SM, LangOpts); + if (EndToken) { + SourceLocation BeginLoc = + BeginToken ? BeginToken->getBegin() : Range.getBegin(); + // We can't use the expansion location with a token-range, because that + // will incorrectly lex the end token, so use a char-range that ends at + // the split. + return CharSourceRange::getCharRange(BeginLoc, EndToken->getEnd()); + } else if (BeginToken) { + // Since the end token is not split, the whole range covers the split, so + // the only adjustment we make is to use the expansion location of the + // begin token. + return CharSourceRange::getTokenRange(BeginToken->getBegin(), + Range.getEnd()); + } + } + return Range; +} + static CharSourceRange getRange(const CharSourceRange &EditRange, const SourceManager &SM, const LangOptions &LangOpts, @@ -109,13 +157,14 @@ static CharSourceRange getRange(const CharSourceRange &EditRange, if (IncludeMacroExpansion) { Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts); } else { - if (spelledInMacroDefinition(EditRange.getBegin(), SM) || - spelledInMacroDefinition(EditRange.getEnd(), SM)) + auto AdjustedRange = getRangeForSplitTokens(EditRange, SM, LangOpts); + if (spelledInMacroDefinition(AdjustedRange.getBegin(), SM) || + spelledInMacroDefinition(AdjustedRange.getEnd(), SM)) return {}; - auto B = SM.getSpellingLoc(EditRange.getBegin()); - auto E = SM.getSpellingLoc(EditRange.getEnd()); - if (EditRange.isTokenRange()) + auto B = SM.getSpellingLoc(AdjustedRange.getBegin()); + auto E = SM.getSpellingLoc(AdjustedRange.getEnd()); + if (AdjustedRange.isTokenRange()) E = Lexer::getLocForEndOfToken(E, 0, SM, LangOpts); Range = CharSourceRange::getCharRange(B, E); } diff --git a/clang/test/APINotes/instancetype.m b/clang/test/APINotes/instancetype.m index 30339e5386f63..e3c13188ae9f7 100644 --- a/clang/test/APINotes/instancetype.m +++ b/clang/test/APINotes/instancetype.m @@ -1,3 +1,4 @@ +// RUN: rm -rf %t // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fapinotes-modules -fsyntax-only -I %S/Inputs/Headers -verify %s @import InstancetypeModule; diff --git a/clang/test/APINotes/retain-count-convention.m b/clang/test/APINotes/retain-count-convention.m index 4bf9610a352a7..35cfe0581daf5 100644 --- a/clang/test/APINotes/retain-count-convention.m +++ b/clang/test/APINotes/retain-count-convention.m @@ -5,15 +5,15 @@ #import -// CHECK: void *getCFOwnedToUnowned(void) __attribute__((cf_returns_not_retained)); -// CHECK: void *getCFUnownedToOwned(void) __attribute__((cf_returns_retained)); -// CHECK: void *getCFOwnedToNone(void) __attribute__((cf_unknown_transfer)); -// CHECK: id getObjCOwnedToUnowned(void) __attribute__((ns_returns_not_retained)); -// CHECK: id getObjCUnownedToOwned(void) __attribute__((ns_returns_retained)); -// CHECK: int indirectGetCFOwnedToUnowned(void * _Nullable *out __attribute__((cf_returns_not_retained))); -// CHECK: int indirectGetCFUnownedToOwned(void * _Nullable *out __attribute__((cf_returns_retained))); +// CHECK: __attribute__((cf_returns_not_retained)) void *getCFOwnedToUnowned(void); +// CHECK: __attribute__((cf_returns_retained)) void *getCFUnownedToOwned(void); +// CHECK: __attribute__((cf_unknown_transfer)) void *getCFOwnedToNone(void); +// CHECK: __attribute__((ns_returns_not_retained)) id getObjCOwnedToUnowned(void); +// CHECK: __attribute__((ns_returns_retained)) id getObjCUnownedToOwned(void); +// CHECK: int indirectGetCFOwnedToUnowned(__attribute__((cf_returns_not_retained)) void * _Nullable *out); +// CHECK: int indirectGetCFUnownedToOwned(__attribute__((cf_returns_retained)) void * _Nullable *out); // CHECK: int indirectGetCFOwnedToNone(void * _Nullable *out); -// CHECK: int indirectGetCFNoneToOwned(void **out __attribute__((cf_returns_not_retained))); +// CHECK: int indirectGetCFNoneToOwned(__attribute__((cf_returns_not_retained)) void **out); // CHECK-LABEL: @interface MethodTest // CHECK: - (id)getOwnedToUnowned __attribute__((ns_returns_not_retained)); diff --git a/clang/test/APINotes/versioned.m b/clang/test/APINotes/versioned.m index 61cc8c3f7c4d1..4a8da1556f874 100644 --- a/clang/test/APINotes/versioned.m +++ b/clang/test/APINotes/versioned.m @@ -13,7 +13,7 @@ #import // CHECK-UNVERSIONED: void moveToPointDUMP(double x, double y) __attribute__((swift_name("moveTo(x:y:)"))); -// CHECK-VERSIONED: void moveToPointDUMP(double x, double y) __attribute__((swift_name("moveTo(a:b:)"))); +// CHECK-VERSIONED:__attribute__((swift_name("moveTo(a:b:)"))) void moveToPointDUMP(double x, double y); // CHECK-DUMP-LABEL: Dumping moveToPointDUMP // CHECK-VERSIONED-DUMP: SwiftVersionedAdditionAttr {{.+}} Implicit 3.0 IsReplacedByActive{{$}} @@ -65,7 +65,7 @@ // CHECK-DUMP-NOT: Dumping -// CHECK-UNVERSIONED: void acceptClosure(void (^block)(void) __attribute__((noescape))); +// CHECK-UNVERSIONED: void acceptClosure(__attribute__((noescape)) void (^block)(void)); // CHECK-VERSIONED: void acceptClosure(void (^block)(void)); // CHECK-UNVERSIONED: void privateFunc(void) __attribute__((swift_private)); diff --git a/clang/test/AST/Interp/arrays.cpp b/clang/test/AST/Interp/arrays.cpp index 2443992f75fb7..607d67bde020c 100644 --- a/clang/test/AST/Interp/arrays.cpp +++ b/clang/test/AST/Interp/arrays.cpp @@ -566,3 +566,7 @@ char melchizedek[2200000000]; typedef decltype(melchizedek[1] - melchizedek[0]) ptrdiff_t; constexpr ptrdiff_t d1 = &melchizedek[0x7fffffff] - &melchizedek[0]; // ok constexpr ptrdiff_t d3 = &melchizedek[0] - &melchizedek[0x80000000u]; // ok + +/// GH#88018 +const int SZA[] = {}; +void testZeroSizedArrayAccess() { unsigned c = SZA[4]; } diff --git a/clang/test/AST/Interp/builtin-functions.cpp b/clang/test/AST/Interp/builtin-functions.cpp index 2e9d1a831dcf6..a7adc92d3714f 100644 --- a/clang/test/AST/Interp/builtin-functions.cpp +++ b/clang/test/AST/Interp/builtin-functions.cpp @@ -482,6 +482,9 @@ void test_noexcept(int *i) { #undef TEST_TYPE } // end namespace test_launder + +/// FIXME: The commented out tests here use a IntAP value and fail. +/// This currently means we will leak the IntAP value since nothing cleans it up. namespace clz { char clz1[__builtin_clz(1) == BITSIZE(int) - 1 ? 1 : -1]; char clz2[__builtin_clz(7) == BITSIZE(int) - 3 ? 1 : -1]; @@ -492,6 +495,63 @@ namespace clz { char clz7[__builtin_clzs(0x1) == BITSIZE(short) - 1 ? 1 : -1]; char clz8[__builtin_clzs(0xf) == BITSIZE(short) - 4 ? 1 : -1]; char clz9[__builtin_clzs(0xfff) == BITSIZE(short) - 12 ? 1 : -1]; + + int clz10 = __builtin_clzg((unsigned char)0); + char clz11[__builtin_clzg((unsigned char)0, 42) == 42 ? 1 : -1]; + char clz12[__builtin_clzg((unsigned char)0x1) == BITSIZE(char) - 1 ? 1 : -1]; + char clz13[__builtin_clzg((unsigned char)0x1, 42) == BITSIZE(char) - 1 ? 1 : -1]; + char clz14[__builtin_clzg((unsigned char)0xf) == BITSIZE(char) - 4 ? 1 : -1]; + char clz15[__builtin_clzg((unsigned char)0xf, 42) == BITSIZE(char) - 4 ? 1 : -1]; + char clz16[__builtin_clzg((unsigned char)(1 << (BITSIZE(char) - 1))) == 0 ? 1 : -1]; + char clz17[__builtin_clzg((unsigned char)(1 << (BITSIZE(char) - 1)), 42) == 0 ? 1 : -1]; + int clz18 = __builtin_clzg((unsigned short)0); + char clz19[__builtin_clzg((unsigned short)0, 42) == 42 ? 1 : -1]; + char clz20[__builtin_clzg((unsigned short)0x1) == BITSIZE(short) - 1 ? 1 : -1]; + char clz21[__builtin_clzg((unsigned short)0x1, 42) == BITSIZE(short) - 1 ? 1 : -1]; + char clz22[__builtin_clzg((unsigned short)0xf) == BITSIZE(short) - 4 ? 1 : -1]; + char clz23[__builtin_clzg((unsigned short)0xf, 42) == BITSIZE(short) - 4 ? 1 : -1]; + char clz24[__builtin_clzg((unsigned short)(1 << (BITSIZE(short) - 1))) == 0 ? 1 : -1]; + char clz25[__builtin_clzg((unsigned short)(1 << (BITSIZE(short) - 1)), 42) == 0 ? 1 : -1]; + int clz26 = __builtin_clzg(0U); + char clz27[__builtin_clzg(0U, 42) == 42 ? 1 : -1]; + char clz28[__builtin_clzg(0x1U) == BITSIZE(int) - 1 ? 1 : -1]; + char clz29[__builtin_clzg(0x1U, 42) == BITSIZE(int) - 1 ? 1 : -1]; + char clz30[__builtin_clzg(0xfU) == BITSIZE(int) - 4 ? 1 : -1]; + char clz31[__builtin_clzg(0xfU, 42) == BITSIZE(int) - 4 ? 1 : -1]; + char clz32[__builtin_clzg(1U << (BITSIZE(int) - 1)) == 0 ? 1 : -1]; + char clz33[__builtin_clzg(1U << (BITSIZE(int) - 1), 42) == 0 ? 1 : -1]; + int clz34 = __builtin_clzg(0UL); + char clz35[__builtin_clzg(0UL, 42) == 42 ? 1 : -1]; + char clz36[__builtin_clzg(0x1UL) == BITSIZE(long) - 1 ? 1 : -1]; + char clz37[__builtin_clzg(0x1UL, 42) == BITSIZE(long) - 1 ? 1 : -1]; + char clz38[__builtin_clzg(0xfUL) == BITSIZE(long) - 4 ? 1 : -1]; + char clz39[__builtin_clzg(0xfUL, 42) == BITSIZE(long) - 4 ? 1 : -1]; + char clz40[__builtin_clzg(1UL << (BITSIZE(long) - 1)) == 0 ? 1 : -1]; + char clz41[__builtin_clzg(1UL << (BITSIZE(long) - 1), 42) == 0 ? 1 : -1]; + int clz42 = __builtin_clzg(0ULL); + char clz43[__builtin_clzg(0ULL, 42) == 42 ? 1 : -1]; + char clz44[__builtin_clzg(0x1ULL) == BITSIZE(long long) - 1 ? 1 : -1]; + char clz45[__builtin_clzg(0x1ULL, 42) == BITSIZE(long long) - 1 ? 1 : -1]; + char clz46[__builtin_clzg(0xfULL) == BITSIZE(long long) - 4 ? 1 : -1]; + char clz47[__builtin_clzg(0xfULL, 42) == BITSIZE(long long) - 4 ? 1 : -1]; + char clz48[__builtin_clzg(1ULL << (BITSIZE(long long) - 1)) == 0 ? 1 : -1]; + char clz49[__builtin_clzg(1ULL << (BITSIZE(long long) - 1), 42) == 0 ? 1 : -1]; +#ifdef __SIZEOF_INT128__ + // int clz50 = __builtin_clzg((unsigned __int128)0); + char clz51[__builtin_clzg((unsigned __int128)0, 42) == 42 ? 1 : -1]; + char clz52[__builtin_clzg((unsigned __int128)0x1) == BITSIZE(__int128) - 1 ? 1 : -1]; + char clz53[__builtin_clzg((unsigned __int128)0x1, 42) == BITSIZE(__int128) - 1 ? 1 : -1]; + char clz54[__builtin_clzg((unsigned __int128)0xf) == BITSIZE(__int128) - 4 ? 1 : -1]; + char clz55[__builtin_clzg((unsigned __int128)0xf, 42) == BITSIZE(__int128) - 4 ? 1 : -1]; +#endif +#ifndef __AVR__ + // int clz58 = __builtin_clzg((unsigned _BitInt(128))0); + char clz59[__builtin_clzg((unsigned _BitInt(128))0, 42) == 42 ? 1 : -1]; + char clz60[__builtin_clzg((unsigned _BitInt(128))0x1) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; + char clz61[__builtin_clzg((unsigned _BitInt(128))0x1, 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; + char clz62[__builtin_clzg((unsigned _BitInt(128))0xf) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1]; + char clz63[__builtin_clzg((unsigned _BitInt(128))0xf, 42) == BITSIZE(_BitInt(128)) - 4 ? 1 : -1]; +#endif } namespace ctz { @@ -502,6 +562,66 @@ namespace ctz { char ctz5[__builtin_ctzl(0x10L) == 4 ? 1 : -1]; char ctz6[__builtin_ctzll(0x100LL) == 8 ? 1 : -1]; char ctz7[__builtin_ctzs(1 << (BITSIZE(short) - 1)) == BITSIZE(short) - 1 ? 1 : -1]; + int ctz8 = __builtin_ctzg((unsigned char)0); + char ctz9[__builtin_ctzg((unsigned char)0, 42) == 42 ? 1 : -1]; + char ctz10[__builtin_ctzg((unsigned char)0x1) == 0 ? 1 : -1]; + char ctz11[__builtin_ctzg((unsigned char)0x1, 42) == 0 ? 1 : -1]; + char ctz12[__builtin_ctzg((unsigned char)0x10) == 4 ? 1 : -1]; + char ctz13[__builtin_ctzg((unsigned char)0x10, 42) == 4 ? 1 : -1]; + char ctz14[__builtin_ctzg((unsigned char)(1 << (BITSIZE(char) - 1))) == BITSIZE(char) - 1 ? 1 : -1]; + char ctz15[__builtin_ctzg((unsigned char)(1 << (BITSIZE(char) - 1)), 42) == BITSIZE(char) - 1 ? 1 : -1]; + int ctz16 = __builtin_ctzg((unsigned short)0); + char ctz17[__builtin_ctzg((unsigned short)0, 42) == 42 ? 1 : -1]; + char ctz18[__builtin_ctzg((unsigned short)0x1) == 0 ? 1 : -1]; + char ctz19[__builtin_ctzg((unsigned short)0x1, 42) == 0 ? 1 : -1]; + char ctz20[__builtin_ctzg((unsigned short)0x10) == 4 ? 1 : -1]; + char ctz21[__builtin_ctzg((unsigned short)0x10, 42) == 4 ? 1 : -1]; + char ctz22[__builtin_ctzg((unsigned short)(1 << (BITSIZE(short) - 1))) == BITSIZE(short) - 1 ? 1 : -1]; + char ctz23[__builtin_ctzg((unsigned short)(1 << (BITSIZE(short) - 1)), 42) == BITSIZE(short) - 1 ? 1 : -1]; + int ctz24 = __builtin_ctzg(0U); + char ctz25[__builtin_ctzg(0U, 42) == 42 ? 1 : -1]; + char ctz26[__builtin_ctzg(0x1U) == 0 ? 1 : -1]; + char ctz27[__builtin_ctzg(0x1U, 42) == 0 ? 1 : -1]; + char ctz28[__builtin_ctzg(0x10U) == 4 ? 1 : -1]; + char ctz29[__builtin_ctzg(0x10U, 42) == 4 ? 1 : -1]; + char ctz30[__builtin_ctzg(1U << (BITSIZE(int) - 1)) == BITSIZE(int) - 1 ? 1 : -1]; + char ctz31[__builtin_ctzg(1U << (BITSIZE(int) - 1), 42) == BITSIZE(int) - 1 ? 1 : -1]; + int ctz32 = __builtin_ctzg(0UL); + char ctz33[__builtin_ctzg(0UL, 42) == 42 ? 1 : -1]; + char ctz34[__builtin_ctzg(0x1UL) == 0 ? 1 : -1]; + char ctz35[__builtin_ctzg(0x1UL, 42) == 0 ? 1 : -1]; + char ctz36[__builtin_ctzg(0x10UL) == 4 ? 1 : -1]; + char ctz37[__builtin_ctzg(0x10UL, 42) == 4 ? 1 : -1]; + char ctz38[__builtin_ctzg(1UL << (BITSIZE(long) - 1)) == BITSIZE(long) - 1 ? 1 : -1]; + char ctz39[__builtin_ctzg(1UL << (BITSIZE(long) - 1), 42) == BITSIZE(long) - 1 ? 1 : -1]; + int ctz40 = __builtin_ctzg(0ULL); + char ctz41[__builtin_ctzg(0ULL, 42) == 42 ? 1 : -1]; + char ctz42[__builtin_ctzg(0x1ULL) == 0 ? 1 : -1]; + char ctz43[__builtin_ctzg(0x1ULL, 42) == 0 ? 1 : -1]; + char ctz44[__builtin_ctzg(0x10ULL) == 4 ? 1 : -1]; + char ctz45[__builtin_ctzg(0x10ULL, 42) == 4 ? 1 : -1]; + char ctz46[__builtin_ctzg(1ULL << (BITSIZE(long long) - 1)) == BITSIZE(long long) - 1 ? 1 : -1]; + char ctz47[__builtin_ctzg(1ULL << (BITSIZE(long long) - 1), 42) == BITSIZE(long long) - 1 ? 1 : -1]; +#ifdef __SIZEOF_INT128__ + // int ctz48 = __builtin_ctzg((unsigned __int128)0); + char ctz49[__builtin_ctzg((unsigned __int128)0, 42) == 42 ? 1 : -1]; + char ctz50[__builtin_ctzg((unsigned __int128)0x1) == 0 ? 1 : -1]; + char ctz51[__builtin_ctzg((unsigned __int128)0x1, 42) == 0 ? 1 : -1]; + char ctz52[__builtin_ctzg((unsigned __int128)0x10) == 4 ? 1 : -1]; + char ctz53[__builtin_ctzg((unsigned __int128)0x10, 42) == 4 ? 1 : -1]; + char ctz54[__builtin_ctzg((unsigned __int128)1 << (BITSIZE(__int128) - 1)) == BITSIZE(__int128) - 1 ? 1 : -1]; + char ctz55[__builtin_ctzg((unsigned __int128)1 << (BITSIZE(__int128) - 1), 42) == BITSIZE(__int128) - 1 ? 1 : -1]; +#endif +#ifndef __AVR__ + // int ctz56 = __builtin_ctzg((unsigned _BitInt(128))0); + char ctz57[__builtin_ctzg((unsigned _BitInt(128))0, 42) == 42 ? 1 : -1]; + char ctz58[__builtin_ctzg((unsigned _BitInt(128))0x1) == 0 ? 1 : -1]; + char ctz59[__builtin_ctzg((unsigned _BitInt(128))0x1, 42) == 0 ? 1 : -1]; + char ctz60[__builtin_ctzg((unsigned _BitInt(128))0x10) == 4 ? 1 : -1]; + char ctz61[__builtin_ctzg((unsigned _BitInt(128))0x10, 42) == 4 ? 1 : -1]; + char ctz62[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1)) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; + char ctz63[__builtin_ctzg((unsigned _BitInt(128))1 << (BITSIZE(_BitInt(128)) - 1), 42) == BITSIZE(_BitInt(128)) - 1 ? 1 : -1]; +#endif } namespace bswap { diff --git a/clang/test/AST/Interp/c.c b/clang/test/AST/Interp/c.c index 10e23839f2ba2..cdecd3e83a997 100644 --- a/clang/test/AST/Interp/c.c +++ b/clang/test/AST/Interp/c.c @@ -209,3 +209,21 @@ const struct StrA * const sb = &sa; const struct StrA sc = *sb; _Static_assert(sc.a == 12, ""); // pedantic-ref-warning {{GNU extension}} \ // pedantic-expected-warning {{GNU extension}} + +_Static_assert(((void*)0 + 1) != (void*)0, ""); // pedantic-expected-warning {{arithmetic on a pointer to void is a GNU extension}} \ + // pedantic-expected-warning {{not an integer constant expression}} \ + // pedantic-expected-note {{cannot perform pointer arithmetic on null pointer}} \ + // pedantic-ref-warning {{arithmetic on a pointer to void is a GNU extension}} \ + // pedantic-ref-warning {{not an integer constant expression}} \ + // pedantic-ref-note {{cannot perform pointer arithmetic on null pointer}} + +typedef __INTPTR_TYPE__ intptr_t; +int array[(intptr_t)(int*)1]; // ref-warning {{variable length array folded to constant array}} \ + // pedantic-ref-warning {{variable length array folded to constant array}} \ + // expected-warning {{variable length array folded to constant array}} \ + // pedantic-expected-warning {{variable length array folded to constant array}} + +int castViaInt[*(int*)(unsigned long)"test"]; // ref-error {{variable length array}} \ + // pedantic-ref-error {{variable length array}} \ + // expected-error {{variable length array}} \ + // pedantic-expected-error {{variable length array}} diff --git a/clang/test/AST/Interp/const-eval.c b/clang/test/AST/Interp/const-eval.c new file mode 100644 index 0000000000000..72c0833a0f630 --- /dev/null +++ b/clang/test/AST/Interp/const-eval.c @@ -0,0 +1,192 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=both,ref -triple x86_64-linux %s -Wno-tautological-pointer-compare -Wno-pointer-to-int-cast +// RUN: %clang_cc1 -fsyntax-only -verify=both,expected -triple x86_64-linux %s -Wno-tautological-pointer-compare -Wno-pointer-to-int-cast -fexperimental-new-constant-interpreter -DNEW_INTERP +// RUN: %clang_cc1 -fsyntax-only -verify=both,ref -triple powerpc64-ibm-aix-xcoff %s -Wno-tautological-pointer-compare -Wno-pointer-to-int-cast +// RUN: %clang_cc1 -fsyntax-only -verify=both,expected -triple powerpc64-ibm-aix-xcoff %s -Wno-tautological-pointer-compare -Wno-pointer-to-int-cast -fexperimental-new-constant-interpreter -DNEW_INTERP + +/// This is a version of test/Sema/const-eval.c with the +/// tests commented out that the new constant expression interpreter does +/// not support yet. They are all marked with the NEW_INTERP define: +/// +/// - builtin_constant_p +/// - unions + + +#define EVAL_EXPR(testno, expr) enum { test##testno = (expr) }; struct check_positive##testno { int a[test##testno]; }; +int x; +EVAL_EXPR(1, (_Bool)&x) +EVAL_EXPR(2, (int)(1.0+(double)4)) +EVAL_EXPR(3, (int)(1.0+(float)4.0)) +EVAL_EXPR(4, (_Bool)(1 ? (void*)&x : 0)) +EVAL_EXPR(5, (_Bool)(int[]){0}) +struct y {int x,y;}; +EVAL_EXPR(6, (int)(1+(struct y*)0)) +_Static_assert((long)&((struct y*)0)->y > 0, ""); +EVAL_EXPR(7, (int)&((struct y*)0)->y) +EVAL_EXPR(8, (_Bool)"asdf") +EVAL_EXPR(9, !!&x) +EVAL_EXPR(10, ((void)1, 12)) +void g0(void); +EVAL_EXPR(11, (g0(), 12)) // both-error {{not an integer constant expression}} +EVAL_EXPR(12, 1.0&&2.0) +EVAL_EXPR(13, x || 3.0) // both-error {{not an integer constant expression}} + +unsigned int l_19 = 1; +EVAL_EXPR(14, (1 ^ l_19) && 1); // both-error {{not an integer constant expression}} + +void f(void) +{ + int a; + EVAL_EXPR(15, (_Bool)&a); +} + +_Complex float g16 = (1.0f + 1.0fi); + +// ?: in constant expressions. +int g17[(3?:1) - 2]; + +EVAL_EXPR(18, ((int)((void*)10 + 10)) == 20 ? 1 : -1); + +struct s { + int a[(int)-1.0f]; // both-error {{array size is negative}} +}; + +EVAL_EXPR(19, ((int)&*(char*)10 == 10 ? 1 : -1)); + +#ifndef NEW_INTERP +EVAL_EXPR(20, __builtin_constant_p(*((int*) 10))); +#endif + +EVAL_EXPR(21, (__imag__ 2i) == 2 ? 1 : -1); + +EVAL_EXPR(22, (__real__ (2i+3)) == 3 ? 1 : -1); + +int g23[(int)(1.0 / 1.0)] = { 1 }; // both-warning {{folded to constant array}} +int g24[(int)(1.0 / 1.0)] = { 1 , 2 }; // both-warning {{folded to constant array}} \ + // both-warning {{excess elements in array initializer}} +int g25[(int)(1.0 + 1.0)], g26 = sizeof(g25); // both-warning {{folded to constant array}} + +EVAL_EXPR(26, (_Complex double)0 ? -1 : 1) +EVAL_EXPR(27, (_Complex int)0 ? -1 : 1) +EVAL_EXPR(28, (_Complex double)1 ? 1 : -1) +EVAL_EXPR(29, (_Complex int)1 ? 1 : -1) + +// PR4027 +struct a { int x, y; }; +static struct a V2 = (struct a)(struct a){ 1, 2}; +static const struct a V1 = (struct a){ 1, 2}; + +EVAL_EXPR(30, (int)(_Complex float)((1<<30)-1) == (1<<30) ? 1 : -1) +EVAL_EXPR(31, (int*)0 == (int*)0 ? 1 : -1) +EVAL_EXPR(32, (int*)0 != (int*)0 ? -1 : 1) +EVAL_EXPR(33, (void*)0 - (void*)0 == 0 ? 1 : -1) + +void foo(void) {} +EVAL_EXPR(34, (foo == (void *)0) ? -1 : 1) + +// No PR. Mismatched bitwidths lead to a crash on second evaluation. +const _Bool constbool = 0; +EVAL_EXPR(35, constbool) +EVAL_EXPR(36, constbool) + +EVAL_EXPR(37, ((void)1,2.0) == 2.0 ? 1 : -1) +EVAL_EXPR(38, __builtin_expect(1,1) == 1 ? 1 : -1) + +// PR7884 +EVAL_EXPR(39, __real__(1.f) == 1 ? 1 : -1) +EVAL_EXPR(40, __imag__(1.f) == 0 ? 1 : -1) + +// From gcc testsuite +EVAL_EXPR(41, (int)(1+(_Complex unsigned)2)) + +void rdar8875946(void) { + double _Complex P; + float _Complex P2 = 3.3f + P; +} + +double d = (d = 0.0); // both-error {{not a compile-time constant}} +double d2 = ++d; // both-error {{not a compile-time constant}} + +int n = 2; +int intLvalue[*(int*)((long)&n ?: 1)] = { 1, 2 }; // both-error {{variable length array}} + +union u { int a; char b[4]; }; +char c = ((union u)(123456)).b[0]; // both-error {{not a compile-time constant}} + +#ifndef NEW_INTERP +extern const int weak_int __attribute__((weak)); +const int weak_int = 42; +int weak_int_test = weak_int; // both-error {{not a compile-time constant}} +#endif + +int literalVsNull1 = "foo" == 0; +int literalVsNull2 = 0 == "foo"; + +// PR11385. +int castViaInt[*(int*)(unsigned long)"test"]; // both-error {{variable length array}} + +// PR11391. +#ifndef NEW_INTERP +struct PR11391 { _Complex float f; } pr11391; +EVAL_EXPR(42, __builtin_constant_p(pr11391.f = 1)) +#endif + +// PR12043 +float varfloat; +const float constfloat = 0; +EVAL_EXPR(43, varfloat && constfloat) // both-error {{not an integer constant expression}} +EVAL_EXPR(45, ((char*)-1) + 1 == 0 ? 1 : -1) +EVAL_EXPR(46, ((char*)-1) + 1 < (char*) -1 ? 1 : -1) +EVAL_EXPR(47, &x < &x + 1 ? 1 : -1) +EVAL_EXPR(48, &x != &x - 1 ? 1 : -1) +EVAL_EXPR(49, &x < &x - 100 ? 1 : -1) // ref-error {{not an integer constant expression}} + +/// FIXME: Rejecting this is correct, BUT when converting the innermost pointer +/// to an integer, we do not preserve the information where it came from. So when we later +/// create a pointer from it, it also doesn't have that information, which means +/// hasSameBase() for those two pointers will return false. And in those cases, we emit +/// the diagnostic: +/// comparison between '&Test50' and '&(631578)' has unspecified value +extern struct Test50S Test50; +EVAL_EXPR(50, &Test50 < (struct Test50S*)((unsigned long)&Test50 + 10)) // both-error {{not an integer constant expression}} \ + // expected-note {{comparison between}} + +EVAL_EXPR(51, 0 != (float)1e99) + +// PR21945 +void PR21945(void) { int i = (({}), 0l); } + +void PR24622(void); +struct PR24622 {} pr24622; +EVAL_EXPR(52, &pr24622 == (void *)&PR24622); + +// We evaluate these by providing 2s' complement semantics in constant +// expressions, like we do for integers. +void *PR28739a = (__int128)(unsigned long)-1 + &PR28739a; // both-warning {{the pointer incremented by 18446744073709551615 refers past the last possible element for an array in 64-bit address space containing 64-bit (8-byte) elements (max possible 2305843009213693952 elements)}} + +void *PR28739b = &PR28739b + (__int128)(unsigned long)-1; // both-warning {{refers past the last possible element}} +__int128 PR28739c = (&PR28739c + (__int128)(unsigned long)-1) - &PR28739c; // both-warning {{refers past the last possible element}} +void *PR28739d = &(&PR28739d)[(__int128)(unsigned long)-1]; // both-warning {{refers past the last possible element}} + +struct PR35214_X { + int k; + int arr[]; +}; +int PR35214_x; +int PR35214_y = ((struct PR35214_X *)&PR35214_x)->arr[1]; // both-error {{not a compile-time constant}} +#ifndef NEW_INTERP +int *PR35214_z = &((struct PR35214_X *)&PR35214_x)->arr[1]; // ok, &PR35214_x + 2 +#endif + +/// From const-eval-64.c +EVAL_EXPR(53, ((char*)-1LL) + 1 == 0 ? 1 : -1) +EVAL_EXPR(54, ((char*)-1LL) + 1 < (char*) -1 ? 1 : -1) + +/// === Additions === +#if __SIZEOF_INT__ == 4 +typedef __INTPTR_TYPE__ intptr_t; +const intptr_t A = (intptr_t)(((int*) 0) + 1); +const intptr_t B = (intptr_t)(((char*)0) + 3); +_Static_assert(A > B, ""); +#else +#error :( +#endif diff --git a/clang/test/AST/Interp/functions.cpp b/clang/test/AST/Interp/functions.cpp index 67fd9036d81e7..4fb3c816000ab 100644 --- a/clang/test/AST/Interp/functions.cpp +++ b/clang/test/AST/Interp/functions.cpp @@ -185,6 +185,21 @@ namespace FunctionReturnType { constexpr int (*invalidFnPtr)() = m; static_assert(invalidFnPtr() == 5, ""); // both-error {{not an integral constant expression}} \ // both-note {{non-constexpr function 'm'}} + + +namespace ToBool { + void mismatched(int x) {} + typedef void (*callback_t)(int); + void foo() { + callback_t callback = (callback_t)mismatched; // warns + /// Casts a function pointer to a boolean and then back to a function pointer. + /// This is extracted from test/Sema/callingconv-cast.c + callback = (callback_t)!mismatched; // both-warning {{address of function 'mismatched' will always evaluate to 'true'}} \ + // both-note {{prefix with the address-of operator to silence this warning}} + } +} + + } namespace Comparison { diff --git a/clang/test/AST/Interp/ms.cpp b/clang/test/AST/Interp/ms.cpp index 99716e90c7a1d..fe5ed219946e7 100644 --- a/clang/test/AST/Interp/ms.cpp +++ b/clang/test/AST/Interp/ms.cpp @@ -6,3 +6,5 @@ /// Used to assert because the two parameters to _rotl do not have the same type. static_assert(_rotl(0x01, 5) == 32); + +static_assert(alignof(__unaligned int) == 1, ""); diff --git a/clang/test/AST/Interp/vectors.cpp b/clang/test/AST/Interp/vectors.cpp new file mode 100644 index 0000000000000..8afef3c897bff --- /dev/null +++ b/clang/test/AST/Interp/vectors.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -verify=ref,both %s + +// both-no-diagnostics + +typedef int __attribute__((vector_size(16))) VI4; +constexpr VI4 A = {1,2,3,4}; + +/// From constant-expression-cxx11.cpp +namespace Vector { + typedef int __attribute__((vector_size(16))) VI4; + constexpr VI4 f(int n) { + return VI4 { n * 3, n + 4, n - 5, n / 6 }; + } + constexpr auto v1 = f(10); + + typedef double __attribute__((vector_size(32))) VD4; + constexpr VD4 g(int n) { + return (VD4) { n / 2.0, n + 1.5, n - 5.4, n * 0.9 }; + } + constexpr auto v2 = g(4); +} diff --git a/clang/test/AST/Interp/weak.cpp b/clang/test/AST/Interp/weak.cpp new file mode 100644 index 0000000000000..d4aac3ff764dd --- /dev/null +++ b/clang/test/AST/Interp/weak.cpp @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -std=c++20 -fexperimental-new-constant-interpreter -verify=expected,both %s +// RUN: %clang_cc1 -std=c++20 -verify=ref,both %s + + + + +/// FIXME: The new interpreter also emits the "address of weak declaration" note in the pointer-to-bool case. + +[[gnu::weak]] extern int a; +int ha[(bool)&a]; // both-warning {{variable length arrays in C++ are a Clang extension}} \ + // expected-note {{comparison against address of weak declaration}} \ + // both-error {{variable length array declaration not allowed at file scope}} +int ha2[&a == nullptr]; // both-warning {{variable length arrays in C++ are a Clang extension}} \ + // both-note {{comparison against address of weak declaration '&a' can only be performed at runtime}} \ + // both-error {{variable length array declaration not allowed at file scope}} diff --git a/clang/test/AST/ast-print-method-decl.cpp b/clang/test/AST/ast-print-method-decl.cpp index 75dea0cac16be..cb5d10096381a 100644 --- a/clang/test/AST/ast-print-method-decl.cpp +++ b/clang/test/AST/ast-print-method-decl.cpp @@ -94,7 +94,7 @@ struct DefMethodsWithoutBody { // CHECK-NEXT: DefMethodsWithoutBody() = default; ~DefMethodsWithoutBody() = default; - // CHECK-NEXT: __attribute__((alias("X"))) void m1(); + // CHECK-NEXT: void m1() __attribute__((alias("X"))); void m1() __attribute__((alias("X"))); // CHECK-NEXT: }; diff --git a/clang/test/AST/ast-print-no-sanitize.cpp b/clang/test/AST/ast-print-no-sanitize.cpp index 4ff97190955ad..a5ada8246f0c0 100644 --- a/clang/test/AST/ast-print-no-sanitize.cpp +++ b/clang/test/AST/ast-print-no-sanitize.cpp @@ -4,4 +4,4 @@ void should_not_crash_1() __attribute__((no_sanitize_memory)); [[clang::no_sanitize_memory]] void should_not_crash_2(); // CHECK: void should_not_crash_1() __attribute__((no_sanitize("memory"))); -// CHECK: void should_not_crash_2() {{\[\[}}clang::no_sanitize("memory"){{\]\]}}; +// CHECK: {{\[\[}}clang::no_sanitize("memory"){{\]\]}} void should_not_crash_2(); diff --git a/clang/test/AST/attr-print-emit.cpp b/clang/test/AST/attr-print-emit.cpp index 8c48eb92daba5..8c8a2b2080599 100644 --- a/clang/test/AST/attr-print-emit.cpp +++ b/clang/test/AST/attr-print-emit.cpp @@ -73,3 +73,18 @@ class C { // CHECK: void pwtt(void *, int) __attribute__((pointer_with_type_tag(foo, 2, 3))); void pwtt(void *, int) __attribute__((pointer_with_type_tag(foo, 2, 3))); }; + +#define ANNOTATE_ATTR __attribute__((annotate("Annotated"))) +ANNOTATE_ATTR int annotated_attr ANNOTATE_ATTR = 0; +// CHECK: __attribute__((annotate("Annotated"))) int annotated_attr __attribute__((annotate("Annotated"))) = 0; + +// FIXME: We do not print the attribute as written after the type specifier. +int ANNOTATE_ATTR annotated_attr_fixme = 0; +// CHECK: __attribute__((annotate("Annotated"))) int annotated_attr_fixme = 0; + +#define NONNULL_ATTR __attribute__((nonnull(1))) +ANNOTATE_ATTR NONNULL_ATTR void fn_non_null_annotated_attr(int *) __attribute__((annotate("AnnotatedRHS"))); +// CHECK:__attribute__((annotate("Annotated"))) __attribute__((nonnull(1))) void fn_non_null_annotated_attr(int *) __attribute__((annotate("AnnotatedRHS"))); + +[[gnu::nonnull(1)]] [[gnu::always_inline]] void cxx11_attr(int*) ANNOTATE_ATTR; +// CHECK: {{\[\[}}gnu::nonnull(1)]] {{\[\[}}gnu::always_inline]] void cxx11_attr(int *) __attribute__((annotate("Annotated"))); diff --git a/clang/test/Analysis/analyzer-config.c b/clang/test/Analysis/analyzer-config.c index 2167a2b32f596..23e37a856d09f 100644 --- a/clang/test/Analysis/analyzer-config.c +++ b/clang/test/Analysis/analyzer-config.c @@ -12,6 +12,7 @@ // CHECK-NEXT: alpha.security.MmapWriteExec:MmapProtExec = 0x04 // CHECK-NEXT: alpha.security.MmapWriteExec:MmapProtRead = 0x01 // CHECK-NEXT: alpha.security.taint.TaintPropagation:Config = "" +// CHECK-NEXT: alpha.unix.Stream:Pedantic = false // CHECK-NEXT: apply-fixits = false // CHECK-NEXT: assume-controlled-environment = false // CHECK-NEXT: avoid-suppressing-null-argument-paths = false diff --git a/clang/test/Analysis/scopes-cfg-output.cpp b/clang/test/Analysis/scopes-cfg-output.cpp index 4eb8967e37351..5e6706602d456 100644 --- a/clang/test/Analysis/scopes-cfg-output.cpp +++ b/clang/test/Analysis/scopes-cfg-output.cpp @@ -1469,7 +1469,7 @@ void test_cleanup_functions2(int m) { // CHECK: [B1] // CHECK-NEXT: 1: CFGScopeBegin(f) // CHECK-NEXT: 2: (CXXConstructExpr, [B1.3], F) -// CHECK-NEXT: 3: __attribute__((cleanup(cleanup_F))) F f; +// CHECK-NEXT: 3: F f __attribute__((cleanup(cleanup_F))); // CHECK-NEXT: 4: CleanupFunction (cleanup_F) // CHECK-NEXT: 5: [B1.3].~F() (Implicit destructor) // CHECK-NEXT: 6: CFGScopeEnd(f) diff --git a/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c b/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c index 281fbaaffe703..cac3fe5c5151c 100644 --- a/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c +++ b/clang/test/Analysis/std-c-library-functions-vs-stream-checker.c @@ -1,6 +1,7 @@ // Check the case when only the StreamChecker is enabled. // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core,alpha.unix.Stream \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config eagerly-assume=false \ // RUN: -triple x86_64-unknown-linux \ @@ -19,6 +20,7 @@ // StdLibraryFunctionsChecker are enabled. // RUN: %clang_analyze_cc1 %s \ // RUN: -analyzer-checker=core,alpha.unix.Stream \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-config unix.StdCLibraryFunctions:DisplayLoadedSummaries=true \ // RUN: -analyzer-checker=debug.ExprInspection \ diff --git a/clang/test/Analysis/stream-errno-note.c b/clang/test/Analysis/stream-errno-note.c index 2411a2d9a00a7..fb12f0bace937 100644 --- a/clang/test/Analysis/stream-errno-note.c +++ b/clang/test/Analysis/stream-errno-note.c @@ -1,5 +1,6 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-checker=unix.Errno \ // RUN: -analyzer-checker=unix.StdCLibraryFunctions \ // RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true \ diff --git a/clang/test/Analysis/stream-errno.c b/clang/test/Analysis/stream-errno.c index 5f0a58032fa26..08382eaf6abf9 100644 --- a/clang/test/Analysis/stream-errno.c +++ b/clang/test/Analysis/stream-errno.c @@ -1,4 +1,5 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,unix.Errno,unix.StdCLibraryFunctions,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify %s #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/stream-error.c b/clang/test/Analysis/stream-error.c index 7f9116ff40144..2abf4b900a047 100644 --- a/clang/test/Analysis/stream-error.c +++ b/clang/test/Analysis/stream-error.c @@ -1,6 +1,7 @@ // RUN: %clang_analyze_cc1 -verify %s \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-checker=debug.StreamTester \ // RUN: -analyzer-checker=debug.ExprInspection diff --git a/clang/test/Analysis/stream-note.c b/clang/test/Analysis/stream-note.c index 54ea699f46674..03a8ff4e468f6 100644 --- a/clang/test/Analysis/stream-note.c +++ b/clang/test/Analysis/stream-note.c @@ -1,6 +1,8 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream -analyzer-output text \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -verify %s // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,unix.StdCLibraryFunctions -analyzer-output text \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=expected,stdargs %s #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/stream-pedantic.c b/clang/test/Analysis/stream-pedantic.c new file mode 100644 index 0000000000000..2bbea81d47ef6 --- /dev/null +++ b/clang/test/Analysis/stream-pedantic.c @@ -0,0 +1,95 @@ +// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=false -verify=nopedantic %s + +// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true -verify=pedantic %s + +#include "Inputs/system-header-simulator.h" + +void clang_analyzer_eval(int); + +void check_fwrite(void) { + char *Buf = "123456789"; + FILE *Fp = tmpfile(); + if (!Fp) + return; + size_t Ret = fwrite(Buf, 1, 10, Fp); + clang_analyzer_eval(Ret == 0); // nopedantic-warning {{FALSE}} \ + // pedantic-warning {{FALSE}} \ + // pedantic-warning {{TRUE}} + fputc('A', Fp); // pedantic-warning {{might be 'indeterminate'}} + fclose(Fp); +} + +void check_fputc(void) { + FILE *Fp = tmpfile(); + if (!Fp) + return; + int Ret = fputc('A', Fp); + clang_analyzer_eval(Ret == EOF); // nopedantic-warning {{FALSE}} \ + // pedantic-warning {{FALSE}} \ + // pedantic-warning {{TRUE}} + fputc('A', Fp); // pedantic-warning {{might be 'indeterminate'}} + fclose(Fp); +} + +void check_fputs(void) { + FILE *Fp = tmpfile(); + if (!Fp) + return; + int Ret = fputs("ABC", Fp); + clang_analyzer_eval(Ret == EOF); // nopedantic-warning {{FALSE}} \ + // pedantic-warning {{FALSE}} \ + // pedantic-warning {{TRUE}} + fputc('A', Fp); // pedantic-warning {{might be 'indeterminate'}} + fclose(Fp); +} + +void check_fprintf(void) { + FILE *Fp = tmpfile(); + if (!Fp) + return; + int Ret = fprintf(Fp, "ABC"); + clang_analyzer_eval(Ret < 0); // nopedantic-warning {{FALSE}} \ + // pedantic-warning {{FALSE}} \ + // pedantic-warning {{TRUE}} + fputc('A', Fp); // pedantic-warning {{might be 'indeterminate'}} + fclose(Fp); +} + +void check_fseek(void) { + FILE *Fp = tmpfile(); + if (!Fp) + return; + int Ret = fseek(Fp, 0, 0); + clang_analyzer_eval(Ret == -1); // nopedantic-warning {{FALSE}} \ + // pedantic-warning {{FALSE}} \ + // pedantic-warning {{TRUE}} + fputc('A', Fp); // pedantic-warning {{might be 'indeterminate'}} + fclose(Fp); +} + +void check_fseeko(void) { + FILE *Fp = tmpfile(); + if (!Fp) + return; + int Ret = fseeko(Fp, 0, 0); + clang_analyzer_eval(Ret == -1); // nopedantic-warning {{FALSE}} \ + // pedantic-warning {{FALSE}} \ + // pedantic-warning {{TRUE}} + fputc('A', Fp); // pedantic-warning {{might be 'indeterminate'}} + fclose(Fp); +} + +void check_fsetpos(void) { + FILE *Fp = tmpfile(); + if (!Fp) + return; + fpos_t Pos; + int Ret = fsetpos(Fp, &Pos); + clang_analyzer_eval(Ret); // nopedantic-warning {{FALSE}} \ + // pedantic-warning {{FALSE}} \ + // pedantic-warning {{TRUE}} + fputc('A', Fp); // pedantic-warning {{might be 'indeterminate'}} + fclose(Fp); +} diff --git a/clang/test/Analysis/stream-stdlibraryfunctionargs.c b/clang/test/Analysis/stream-stdlibraryfunctionargs.c index 0053510163efc..2ea6a8c472c61 100644 --- a/clang/test/Analysis/stream-stdlibraryfunctionargs.c +++ b/clang/test/Analysis/stream-stdlibraryfunctionargs.c @@ -1,10 +1,13 @@ // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,unix.StdCLibraryFunctions,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stream,any %s // RUN: %clang_analyze_cc1 -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stream,any %s // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.StdCLibraryFunctions,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true \ // RUN: -analyzer-config unix.StdCLibraryFunctions:ModelPOSIX=true -verify=stdfunc,any %s #include "Inputs/system-header-simulator.h" diff --git a/clang/test/Analysis/stream.c b/clang/test/Analysis/stream.c index ba5e66a4102e3..93ed555c89ebd 100644 --- a/clang/test/Analysis/stream.c +++ b/clang/test/Analysis/stream.c @@ -1,7 +1,11 @@ -// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection -verify %s -// RUN: %clang_analyze_cc1 -triple=armv8-none-linux-eabi -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection -verify %s -// RUN: %clang_analyze_cc1 -triple=aarch64-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection -verify %s -// RUN: %clang_analyze_cc1 -triple=hexagon -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection -verify %s +// RUN: %clang_analyze_cc1 -triple=x86_64-pc-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true -verify %s +// RUN: %clang_analyze_cc1 -triple=armv8-none-linux-eabi -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true -verify %s +// RUN: %clang_analyze_cc1 -triple=aarch64-linux-gnu -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true -verify %s +// RUN: %clang_analyze_cc1 -triple=hexagon -analyzer-checker=core,alpha.unix.Stream,debug.ExprInspection \ +// RUN: -analyzer-config alpha.unix.Stream:Pedantic=true -verify %s #include "Inputs/system-header-simulator.h" #include "Inputs/system-header-simulator-for-malloc.h" diff --git a/clang/test/C/C11/n1514.c b/clang/test/C/C11/n1514.c new file mode 100644 index 0000000000000..c4c3c1cb86a10 --- /dev/null +++ b/clang/test/C/C11/n1514.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -verify -std=c11 %s +// expected-no-diagnostics + +/* WG14 N1514: Yes + * Conditional normative status for Annex G + */ + +// We don't support Annex G (which introduces imaginary types), but support for +// this annex is conditional in C11. So we can test for conformance to this +// paper by ensuring we don't define the macro claiming we support Annex G. + +#ifdef __STDC_IEC_559_COMPLEX__ +#error "when did this happen??" +#endif diff --git a/clang/test/C/C99/n717.c b/clang/test/C/C99/n717.c new file mode 100644 index 0000000000000..25010b4137065 --- /dev/null +++ b/clang/test/C/C99/n717.c @@ -0,0 +1,75 @@ +// RUN: %clang_cc1 -verify -std=c99 %s +// RUN: %clang_cc1 -verify -std=c99 -fno-dollars-in-identifiers %s + +/* WG14 N717: Clang 17 + * Extended identifiers + */ + +// Used as a sink for UCNs. +#define M(arg) + +// C99 6.4.3p1 specifies the grammar for UCNs. A \u must be followed by exactly +// four hex digits, and \U must be followed by exactly eight. +M(\u1) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\u12) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\u123) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\u1234) // Okay +M(\u12345)// Okay, two tokens (UCN followed by 5) + +M(\U1) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\U12) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\U123) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\U1234) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} \ + expected-note {{did you mean to use '\u'?}} +M(\U12345) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\U123456) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\U1234567) // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} +M(\U12345678) // Okay +M(\U123456789) // Okay-ish, two tokens (valid-per-spec-but-actually-invalid UCN followed by 9) + +// Now test the ones that should work. Note, these work in C17 and earlier but +// are part of the basic character set in C23 and thus should be diagnosed in +// that mode. They're valid in a character constant, but not valid in an +// identifier, except for U+0024 which is allowed if -fdollars-in-identifiers +// is enabled. +// FIXME: These three should be handled the same way, and should be accepted +// when dollar signs are allowed in identifiers, rather than rejected, see +// GH87106. +M(\u0024) // expected-error {{character '$' cannot be specified by a universal character name}} +M(\U00000024) // expected-error {{character '$' cannot be specified by a universal character name}} +M($) + +// These should always be rejected because they're not valid identifier +// characters. +// FIXME: the diagnostic could be improved to make it clear this is an issue +// with forming an identifier rather than a UCN. +M(\u0040) // expected-error {{character '@' cannot be specified by a universal character name}} +M(\u0060) // expected-error {{character '`' cannot be specified by a universal character name}} +M(\U00000040) // expected-error {{character '@' cannot be specified by a universal character name}} +M(\U00000060) // expected-error {{character '`' cannot be specified by a universal character name}} + +// UCNs outside of identifiers are handled in Phase 5 of translation, so we +// cannot use the macro expansion to test their behavior. + +// This is outside of the range of values specified by ISO 10646. +const char *c1 = "\U00110000"; // expected-error {{invalid universal character}} +// This does not fall outside of the range +const char *c2 = "\U0010FFFF"; + +// These should always be accepted because they're a valid in a character +// constant. +int c3 = '\u0024'; +int c4 = '\u0040'; +int c5 = '\u0060'; + +int c6 = '\U00000024'; +int c7 = '\U00000040'; +int c8 = '\U00000060'; + +// Valid lone surrogates. +M(\uD799) +const char *c9 = "\U0000E000"; + +// Invalid lone surrogates, which are excluded explicitly by 6.4.3p2. +M(\uD800) // expected-error {{invalid universal character}} +const char *c10 = "\U0000DFFF"; // expected-error {{invalid universal character}} diff --git a/clang/test/CXX/drs/dr392.cpp b/clang/test/CXX/drs/dr392.cpp new file mode 100644 index 0000000000000..26e6259f71961 --- /dev/null +++ b/clang/test/CXX/drs/dr392.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -disable-llvm-passes -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +#if __cplusplus == 199711L +#define NOTHROW throw() +#else +#define NOTHROW noexcept(true) +#endif + +namespace dr392 { // dr392: 2.8 + +struct A { + operator bool() NOTHROW; +}; + +class C { +public: + C() NOTHROW; + ~C() NOTHROW; + A& get() NOTHROW { return p; } +private: + A p; +}; + +void f() +{ + if (C().get()) {} +} + +} // namespace dr392 + +// CHECK-LABEL: define {{.*}} void @dr392::f()() +// CHECK: call {{.*}} i1 @dr392::A::operator bool() +// CHECK: call void @dr392::C::~C() +// CHECK-LABEL: } diff --git a/clang/test/CXX/drs/dr3xx.cpp b/clang/test/CXX/drs/dr3xx.cpp index 4584801f9f971..483ebf7a08aad 100644 --- a/clang/test/CXX/drs/dr3xx.cpp +++ b/clang/test/CXX/drs/dr3xx.cpp @@ -1401,7 +1401,7 @@ namespace dr387 { // dr387: 2.8 } } -// FIXME: dr388 needs codegen test +// FIXME: dr388 needs libc++abi test namespace dr389 { // dr389: no struct S { @@ -1567,7 +1567,7 @@ namespace dr391 { // dr391: 2.8 c++11 const C &c = fc(); } -// dr392 FIXME write codegen test +// dr392 is in dr392.cpp // dr394: na namespace dr395 { // dr395: 3.0 diff --git a/clang/test/CXX/drs/dr4xx.cpp b/clang/test/CXX/drs/dr4xx.cpp index 343c4ee6f3344..34dd638c1d9b0 100644 --- a/clang/test/CXX/drs/dr4xx.cpp +++ b/clang/test/CXX/drs/dr4xx.cpp @@ -948,7 +948,7 @@ namespace dr460 { // dr460: yes // dr464: na // dr465: na -namespace dr466 { // dr466: no +namespace dr466 { // dr466: 2.8 typedef int I; typedef const int CI; typedef volatile int VI; @@ -960,7 +960,7 @@ namespace dr466 { // dr466: no a->CI::~CI(); a->VI::~VI(); - a->CI::~VI(); // FIXME: This is invalid; CI and VI are not the same scalar type. + a->CI::~VI(); // allowed by changes to [expr.id.prim.qual]/2 introduced in P1131R2 b->~I(); b->~CI(); diff --git a/clang/test/CXX/drs/dr593.cpp b/clang/test/CXX/drs/dr593.cpp new file mode 100644 index 0000000000000..4998af966ebb9 --- /dev/null +++ b/clang/test/CXX/drs/dr593.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +#if __cplusplus == 199711L +#define NOTHROW throw() +#else +#define NOTHROW noexcept(true) +#endif + +namespace dr593 { // dr593: 2.8 + +void f(); +void fence() NOTHROW; + +struct A { + ~A() try { + f(); + } catch (...) { + fence(); + } +}; + +void g() { + A(); +} + +} // namespace dr593 + +// CHECK: call void @dr593::fence()() +// CHECK-NEXT: invoke void @__cxa_rethrow() diff --git a/clang/test/CXX/drs/dr5xx.cpp b/clang/test/CXX/drs/dr5xx.cpp index 426b368b390ae..0ea306a041167 100644 --- a/clang/test/CXX/drs/dr5xx.cpp +++ b/clang/test/CXX/drs/dr5xx.cpp @@ -1098,7 +1098,7 @@ namespace dr591 { // dr591: no } // dr592: na -// dr593 needs an IRGen test. +// dr593 is in dr593.cpp // dr594: na namespace dr595 { // dr595: dup 1330 diff --git a/clang/test/CXX/drs/dr605.cpp b/clang/test/CXX/drs/dr605.cpp new file mode 100644 index 0000000000000..6c212d8dabc06 --- /dev/null +++ b/clang/test/CXX/drs/dr605.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +namespace dr605 { // dr605: 2.7 + +template +static T f(T t) {} + +template <> +int f(int t) {} + +void g(int a) { + f(a); +} + +} // namespace dr605 + +// CHECK: define internal {{.*}} i32 @int dr605::f(int) diff --git a/clang/test/CXX/drs/dr650.cpp b/clang/test/CXX/drs/dr650.cpp new file mode 100644 index 0000000000000..715b4fdf04a7f --- /dev/null +++ b/clang/test/CXX/drs/dr650.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +#if __cplusplus == 199711L +#define NOTHROW throw() +#else +#define NOTHROW noexcept(true) +#endif + +namespace dr650 { // dr650: 2.8 + +struct Q { + ~Q() NOTHROW; +}; + +struct R { + ~R() NOTHROW; +}; + +struct S { + ~S() NOTHROW; +}; + +const S& f() { + Q q; + return (R(), S()); +} + +} // namespace dr650 + +// CHECK-LABEL: define {{.*}} @dr650::f()() +// CHECK: call void @dr650::S::~S() +// CHECK: call void @dr650::R::~R() +// CHECK: call void @dr650::Q::~Q() +// CHECK-LABEL: } diff --git a/clang/test/CXX/drs/dr653.cpp b/clang/test/CXX/drs/dr653.cpp new file mode 100644 index 0000000000000..fd1f0153bfb74 --- /dev/null +++ b/clang/test/CXX/drs/dr653.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +namespace dr653 { // dr653: 2.7 + +union U { + int a; + float b; +}; + +void f(U u) { + U v; + v = u; +} + +} // namespace dr653 + +// CHECK-LABEL: define {{.*}} void @dr653::f(dr653::U) +// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %v, ptr {{.*}} %u, {{.*}}) +// CHECK-LABEL: } diff --git a/clang/test/CXX/drs/dr658.cpp b/clang/test/CXX/drs/dr658.cpp new file mode 100644 index 0000000000000..51034c2af3bf3 --- /dev/null +++ b/clang/test/CXX/drs/dr658.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +namespace dr658 { // dr658: 2.7 + +void f(int* p1) { + char* p2 = reinterpret_cast(p1); +} + +} // namespace dr658 + +// We're checking that p1 is stored into p2 without changes. + +// CHECK-LABEL: define {{.*}} void @dr658::f(int*)(ptr noundef %p1) +// CHECK: [[P1_ADDR:%.+]] = alloca ptr, align 8 +// CHECK-NEXT: [[P2:%.+]] = alloca ptr, align 8 +// CHECK: store ptr %p1, ptr [[P1_ADDR]] +// CHECK-NEXT: [[TEMP:%.+]] = load ptr, ptr [[P1_ADDR]] +// CHECK-NEXT: store ptr [[TEMP]], ptr [[P2]] +// CHECK-LABEL: } diff --git a/clang/test/CXX/drs/dr661.cpp b/clang/test/CXX/drs/dr661.cpp new file mode 100644 index 0000000000000..4e97bb7088476 --- /dev/null +++ b/clang/test/CXX/drs/dr661.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +namespace dr661 { + +void f(int a, int b) { // dr661: 2.7 + a == b; + a != b; + a < b; + a <= b; + a > b; + a >= b; +} + +} // namespace dr661 + +// CHECK-LABEL: define {{.*}} void @dr661::f(int, int) +// CHECK: icmp eq +// CHECK: icmp ne +// CHECK: icmp slt +// CHECK: icmp sle +// CHECK: icmp sgt +// CHECK: icmp sge +// CHECK-LABEL: } diff --git a/clang/test/CXX/drs/dr672.cpp b/clang/test/CXX/drs/dr672.cpp new file mode 100644 index 0000000000000..d5f0530ecbc9d --- /dev/null +++ b/clang/test/CXX/drs/dr672.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | llvm-cxxfilt -n | FileCheck %s --check-prefixes CHECK + +#if __cplusplus == 199711L +#define NOTHROW throw() +#else +#define NOTHROW noexcept(true) +#endif + +namespace dr672 { // dr672: 2.7 + +struct A { + A() NOTHROW; +}; + +void f() { + A *a = new A; +} + +} // namespace dr672 + +// CHECK-LABEL: define {{.*}} void @dr672::f()() +// CHECK: [[A:%.+]] = alloca ptr +// CHECK: [[CALL:%.+]] = call {{.*}} ptr @operator new(unsigned long) +// CHECK: call void @dr672::A::A() +// CHECK: store ptr [[CALL]], ptr [[A]] +// CHECK-LABEL: } diff --git a/clang/test/CXX/drs/dr6xx.cpp b/clang/test/CXX/drs/dr6xx.cpp index 190e05784f32b..eeb41eee9c30f 100644 --- a/clang/test/CXX/drs/dr6xx.cpp +++ b/clang/test/CXX/drs/dr6xx.cpp @@ -81,7 +81,7 @@ namespace dr603 { // dr603: yes } // dr604: na -// dr605 needs IRGen test +// dr605 is in dr605.cpp namespace dr606 { // dr606: 3.0 #if __cplusplus >= 201103L @@ -253,7 +253,7 @@ namespace dr621 { // dr621: yes // dr623: na // FIXME: Add documentation saying we allow invalid pointer values. -// dr624 needs an IRGen check. +// dr624 needs a libc++abi test. namespace dr625 { // dr625: yes template struct A {}; @@ -650,7 +650,7 @@ struct Y { } #endif -// dr650 FIXME: add codegen test +// dr650 is in dr650.cpp #if __cplusplus >= 201103L namespace dr651 { // dr651: yes @@ -672,7 +672,7 @@ namespace dr652 { // dr652: yes } #endif -// dr653 FIXME: add codegen test +// dr653 is in dr653.cpp #if __cplusplus >= 201103L namespace dr654 { // dr654: sup 1423 @@ -798,7 +798,7 @@ namespace dr657 { // dr657: partial Cnvt2::type err; } -// dr658 FIXME: add codegen test +// dr658 is in dr658.cpp #if __cplusplus >= 201103L namespace dr659 { // dr659: 3.0 @@ -829,7 +829,7 @@ namespace dr660 { // dr660: 3.0 } #endif -// dr661 FIXME: add codegen test +// dr661 is in dr661.cpp namespace dr662 { // dr662: yes template void f(T t) { @@ -931,7 +931,7 @@ namespace dr667 { // dr667: 8 } #endif -// dr668 FIXME: add codegen test +// dr668 needs an libc++abi test #if __cplusplus >= 201103L namespace dr669 { // dr669: yes @@ -971,7 +971,7 @@ namespace dr671 { // dr671: 2.9 int m = static_cast(e); } -// dr672 FIXME: add codegen test +// dr672 is in dr672.cpp namespace dr673 { // dr673: yes template struct X { static const int n = 0; }; diff --git a/clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template b/clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template index 7ae3c88aedd8d..ca56799cd08ea 100644 --- a/clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template +++ b/clang/test/ClangScanDeps/Inputs/removed-args/cdb.json.template @@ -1,7 +1,7 @@ [ { "directory": "DIR", - "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-validate-once-per-build-session -fbuild-session-file=DIR/build-session -fmodules-prune-interval=123 -fmodules-prune-after=123 -fmodules-cache-path=DIR/cache -include DIR/header.h -grecord-command-line -fdebug-compilation-dir=DIR/debug -fcoverage-compilation-dir=DIR/coverage -ftest-coverage -o DIR/tu.o -serialize-diagnostics DIR/tu.diag -MT tu -MD -MF DIR/tu.d", + "command": "clang -fsyntax-only DIR/tu.c -fmodules -fimplicit-module-maps -fmodules-validate-once-per-build-session -fbuild-session-file=DIR/build-session -fmodules-prune-interval=123 -fmodules-prune-after=123 -fmodules-cache-path=DIR/cache -include DIR/header.h -grecord-command-line -fdebug-compilation-dir=DIR/debug -fcoverage-compilation-dir=DIR/coverage -ftest-coverage -fprofile-instr-use=DIR/tu.profdata -o DIR/tu.o -serialize-diagnostics DIR/tu.diag -MT tu -MD -MF DIR/tu.d", "file": "DIR/tu.c" } ] diff --git a/clang/test/ClangScanDeps/modules-extension.c b/clang/test/ClangScanDeps/modules-minimize-extension.c similarity index 100% rename from clang/test/ClangScanDeps/modules-extension.c rename to clang/test/ClangScanDeps/modules-minimize-extension.c diff --git a/clang/test/ClangScanDeps/modules-minimize-module.c b/clang/test/ClangScanDeps/modules-minimize-module.c new file mode 100644 index 0000000000000..fb58c61bc5094 --- /dev/null +++ b/clang/test/ClangScanDeps/modules-minimize-module.c @@ -0,0 +1,24 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +// This test checks that source files of modules undergo dependency directives +// scan. If a.h would not, the scan would fail when lexing `#error`. + +//--- module.modulemap +module A { header "a.h" } + +//--- a.h +#error blah + +//--- tu.c +#include "a.h" + +//--- cdb.json.in +[{ + "directory": "DIR", + "file": "DIR/tu.c", + "command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache" +}] + +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json +// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/deps.json diff --git a/clang/test/ClangScanDeps/removed-args.c b/clang/test/ClangScanDeps/removed-args.c index 9a4ef25838e46..f49e4ead82f7b 100644 --- a/clang/test/ClangScanDeps/removed-args.c +++ b/clang/test/ClangScanDeps/removed-args.c @@ -9,6 +9,8 @@ // RUN: rm -rf %t && mkdir %t // RUN: cp %S/Inputs/removed-args/* %t // RUN: touch %t/build-session +// RUN: touch %t/tu.proftext +// RUN: llvm-profdata merge %t/tu.proftext -o %t/tu.profdata // RUN: sed "s|DIR|%/t|g" %S/Inputs/removed-args/cdb.json.template > %t/cdb.json // RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full > %t/result.json @@ -25,6 +27,7 @@ // CHECK-NOT: "-fcoverage-compilation-dir=" // CHECK-NOT: "-coverage-notes-file // CHECK-NOT: "-coverage-data-file +// CHECK-NOT: "-fprofile-instrument-use-path // CHECK-NOT: "-dwarf-debug-flags" // CHECK-NOT: "-main-file-name" // CHECK-NOT: "-include" @@ -50,6 +53,7 @@ // CHECK-NOT: "-fcoverage-compilation-dir= // CHECK-NOT: "-coverage-notes-file // CHECK-NOT: "-coverage-data-file +// CHECK-NOT: "-fprofile-instrument-use-path // CHECK-NOT: "-dwarf-debug-flags" // CHECK-NOT: "-main-file-name" // CHECK-NOT: "-include" diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c index 272e0222dc9e4..f7f357df62af1 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c @@ -24,7 +24,6 @@ void test_trap(void) { __tw(ia, ib, 0); //expected-error {{argument value 0 is outside the valid range [1, 31]}} } -#ifdef __PPC64__ void test_builtin_ppc_rldimi() { unsigned int shift; unsigned long long mask; @@ -33,7 +32,6 @@ void test_builtin_ppc_rldimi() { res = __builtin_ppc_rldimi(ull, ull, 63, 0xFFFF000000000F00); // expected-error {{argument 3 value should represent a contiguous bit field}} res = __builtin_ppc_rldimi(ull, ull, 64, 0xFFFF000000000000); // expected-error {{argument value 64 is outside the valid range [0, 63]}} } -#endif void test_builtin_ppc_rlwimi() { unsigned int shift; @@ -86,10 +84,6 @@ void testalignx(const void *pointer, unsigned int alignment) { } #ifndef __PPC64__ -unsigned long long testrldimi32() { - return __rldimi(ull, ui, 3, 0x7ffff8ULL); //expected-error {{this builtin is only available on 64-bit targets}} -} - long long testbpermd(long long bit_selector, long long source) { return __bpermd(bit_selector, source); //expected-error {{this builtin is only available on 64-bit targets}} } diff --git a/clang/test/CodeGen/X86/x86_64-arguments.c b/clang/test/CodeGen/X86/x86_64-arguments.c index cf5636cfd518b..82845f0a2b31f 100644 --- a/clang/test/CodeGen/X86/x86_64-arguments.c +++ b/clang/test/CodeGen/X86/x86_64-arguments.c @@ -533,6 +533,24 @@ typedef float t66 __attribute__((__vector_size__(128), __aligned__(128))); void f66(t66 a0) { } +typedef long long t67 __attribute__((aligned (4))); +struct s67 { + int a; + t67 b; +}; +// CHECK-LABEL: define{{.*}} void @f67(ptr noundef byval(%struct.s67) align 8 %x) +void f67(struct s67 x) { +} + +typedef double t68 __attribute__((aligned (4))); +struct s68 { + int a; + t68 b; +}; +// CHECK-LABEL: define{{.*}} void @f68(ptr noundef byval(%struct.s68) align 8 %x) +void f68(struct s68 x) { +} + /// The synthesized __va_list_tag does not have file/line fields. // CHECK: = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "__va_list_tag", // CHECK-NOT: file: diff --git a/clang/test/CodeGen/allow-ubsan-check.c b/clang/test/CodeGen/allow-ubsan-check.c index bc425230c8ec7..5232d24085466 100644 --- a/clang/test/CodeGen/allow-ubsan-check.c +++ b/clang/test/CodeGen/allow-ubsan-check.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null -fsanitize-trap=signed-integer-overflow,integer-divide-by-zero,null | FileCheck %s --check-prefixes=TRAP -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,null | FileCheck %s --check-prefixes=RECOVER +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null -mllvm -ubsan-guard-checks | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null -mllvm -ubsan-guard-checks -fsanitize-trap=signed-integer-overflow,integer-divide-by-zero,null | FileCheck %s --check-prefixes=TRAP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null -mllvm -ubsan-guard-checks -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,null | FileCheck %s --check-prefixes=RECOVER // CHECK-LABEL: define dso_local i32 @div( @@ -18,11 +18,14 @@ // CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], -1, !nosanitize [[META2]] // CHECK-NEXT: [[OR:%.*]] = or i1 [[TMP3]], [[TMP4]], !nosanitize [[META2]] // CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP2]], [[OR]], !nosanitize [[META2]] -// CHECK-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[HANDLER_DIVREM_OVERFLOW:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.allow.ubsan.check(i8 3), !nosanitize [[META2]] +// CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]], !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[HANDLER_DIVREM_OVERFLOW:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] // CHECK: handler.divrem_overflow: -// CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: call void @__ubsan_handle_divrem_overflow_abort(ptr @[[GLOB1:[0-9]+]], i64 [[TMP6]], i64 [[TMP7]]) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: call void @__ubsan_handle_divrem_overflow_abort(ptr @[[GLOB1:[0-9]+]], i64 [[TMP9]], i64 [[TMP10]]) #[[ATTR4:[0-9]+]], !nosanitize [[META2]] // CHECK-NEXT: unreachable, !nosanitize [[META2]] // CHECK: cont: // CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], [[TMP1]] @@ -42,9 +45,12 @@ // TRAP-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], -1, !nosanitize [[META2]] // TRAP-NEXT: [[OR:%.*]] = or i1 [[TMP3]], [[TMP4]], !nosanitize [[META2]] // TRAP-NEXT: [[TMP5:%.*]] = and i1 [[TMP2]], [[OR]], !nosanitize [[META2]] -// TRAP-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] +// TRAP-NEXT: [[TMP6:%.*]] = call i1 @llvm.allow.ubsan.check(i8 3), !nosanitize [[META2]] +// TRAP-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] +// TRAP-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]], !nosanitize [[META2]] +// TRAP-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] // TRAP: trap: -// TRAP-NEXT: call void @llvm.ubsantrap(i8 3) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] +// TRAP-NEXT: call void @llvm.ubsantrap(i8 3) #[[ATTR4:[0-9]+]], !nosanitize [[META2]] // TRAP-NEXT: unreachable, !nosanitize [[META2]] // TRAP: cont: // TRAP-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], [[TMP1]] @@ -64,11 +70,14 @@ // RECOVER-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], -1, !nosanitize [[META2]] // RECOVER-NEXT: [[OR:%.*]] = or i1 [[TMP3]], [[TMP4]], !nosanitize [[META2]] // RECOVER-NEXT: [[TMP5:%.*]] = and i1 [[TMP2]], [[OR]], !nosanitize [[META2]] -// RECOVER-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[HANDLER_DIVREM_OVERFLOW:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP6:%.*]] = call i1 @llvm.allow.ubsan.check(i8 3), !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]], !nosanitize [[META2]] +// RECOVER-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[HANDLER_DIVREM_OVERFLOW:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] // RECOVER: handler.divrem_overflow: -// RECOVER-NEXT: [[TMP6:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] -// RECOVER-NEXT: [[TMP7:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] -// RECOVER-NEXT: call void @__ubsan_handle_divrem_overflow(ptr @[[GLOB1:[0-9]+]], i64 [[TMP6]], i64 [[TMP7]]) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP9:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP10:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: call void @__ubsan_handle_divrem_overflow(ptr @[[GLOB1:[0-9]+]], i64 [[TMP9]], i64 [[TMP10]]) #[[ATTR4:[0-9]+]], !nosanitize [[META2]] // RECOVER-NEXT: br label [[CONT]], !nosanitize [[META2]] // RECOVER: cont: // RECOVER-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], [[TMP1]] @@ -85,14 +94,17 @@ int div(int x, int y) { // CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = icmp ne ptr [[TMP0]], null, !nosanitize [[META2]] -// CHECK-NEXT: br i1 [[TMP1]], label [[CONT:%.*]], label [[HANDLER_TYPE_MISMATCH:%.*]], !prof [[PROF3]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 22), !nosanitize [[META2]] +// CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]], !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[TMP4]], label [[CONT:%.*]], label [[HANDLER_TYPE_MISMATCH:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // CHECK: handler.type_mismatch: -// CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: call void @__ubsan_handle_type_mismatch_v1_abort(ptr @[[GLOB2:[0-9]+]], i64 [[TMP2]]) #[[ATTR3]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP0]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: call void @__ubsan_handle_type_mismatch_v1_abort(ptr @[[GLOB2:[0-9]+]], i64 [[TMP5]]) #[[ATTR4]], !nosanitize [[META2]] // CHECK-NEXT: unreachable, !nosanitize [[META2]] // CHECK: cont: -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK-NEXT: ret i32 [[TMP3]] +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: ret i32 [[TMP6]] // // TRAP-LABEL: define dso_local i32 @null( // TRAP-SAME: ptr noundef [[X:%.*]]) #[[ATTR0]] { @@ -101,13 +113,16 @@ int div(int x, int y) { // TRAP-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // TRAP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // TRAP-NEXT: [[TMP1:%.*]] = icmp ne ptr [[TMP0]], null, !nosanitize [[META2]] -// TRAP-NEXT: br i1 [[TMP1]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] +// TRAP-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 22), !nosanitize [[META2]] +// TRAP-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true, !nosanitize [[META2]] +// TRAP-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]], !nosanitize [[META2]] +// TRAP-NEXT: br i1 [[TMP4]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] // TRAP: trap: -// TRAP-NEXT: call void @llvm.ubsantrap(i8 22) #[[ATTR3]], !nosanitize [[META2]] +// TRAP-NEXT: call void @llvm.ubsantrap(i8 22) #[[ATTR4]], !nosanitize [[META2]] // TRAP-NEXT: unreachable, !nosanitize [[META2]] // TRAP: cont: -// TRAP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// TRAP-NEXT: ret i32 [[TMP2]] +// TRAP-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +// TRAP-NEXT: ret i32 [[TMP5]] // // RECOVER-LABEL: define dso_local i32 @null( // RECOVER-SAME: ptr noundef [[X:%.*]]) #[[ATTR0]] { @@ -116,14 +131,17 @@ int div(int x, int y) { // RECOVER-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // RECOVER-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // RECOVER-NEXT: [[TMP1:%.*]] = icmp ne ptr [[TMP0]], null, !nosanitize [[META2]] -// RECOVER-NEXT: br i1 [[TMP1]], label [[CONT:%.*]], label [[HANDLER_TYPE_MISMATCH:%.*]], !prof [[PROF3]], !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP2:%.*]] = call i1 @llvm.allow.ubsan.check(i8 22), !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]], !nosanitize [[META2]] +// RECOVER-NEXT: br i1 [[TMP4]], label [[CONT:%.*]], label [[HANDLER_TYPE_MISMATCH:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // RECOVER: handler.type_mismatch: -// RECOVER-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64, !nosanitize [[META2]] -// RECOVER-NEXT: call void @__ubsan_handle_type_mismatch_v1(ptr @[[GLOB2:[0-9]+]], i64 [[TMP2]]) #[[ATTR3]], !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP0]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: call void @__ubsan_handle_type_mismatch_v1(ptr @[[GLOB2:[0-9]+]], i64 [[TMP5]]) #[[ATTR4]], !nosanitize [[META2]] // RECOVER-NEXT: br label [[CONT]], !nosanitize [[META2]] // RECOVER: cont: -// RECOVER-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// RECOVER-NEXT: ret i32 [[TMP3]] +// RECOVER-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 +// RECOVER-NEXT: ret i32 [[TMP6]] // int null(int* x) { return *x; @@ -142,11 +160,14 @@ int null(int* x) { // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !nosanitize [[META2]] // CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !nosanitize [[META2]] // CHECK-NEXT: [[TMP5:%.*]] = xor i1 [[TMP4]], true, !nosanitize [[META2]] -// CHECK-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[HANDLER_ADD_OVERFLOW:%.*]], !prof [[PROF3]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.allow.ubsan.check(i8 0), !nosanitize [[META2]] +// CHECK-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]], !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[HANDLER_ADD_OVERFLOW:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // CHECK: handler.add_overflow: -// CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] -// CHECK-NEXT: call void @__ubsan_handle_add_overflow_abort(ptr @[[GLOB3:[0-9]+]], i64 [[TMP6]], i64 [[TMP7]]) #[[ATTR3]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: call void @__ubsan_handle_add_overflow_abort(ptr @[[GLOB3:[0-9]+]], i64 [[TMP9]], i64 [[TMP10]]) #[[ATTR4]], !nosanitize [[META2]] // CHECK-NEXT: unreachable, !nosanitize [[META2]] // CHECK: cont: // CHECK-NEXT: ret i32 [[TMP3]] @@ -164,9 +185,12 @@ int null(int* x) { // TRAP-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !nosanitize [[META2]] // TRAP-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !nosanitize [[META2]] // TRAP-NEXT: [[TMP5:%.*]] = xor i1 [[TMP4]], true, !nosanitize [[META2]] -// TRAP-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] +// TRAP-NEXT: [[TMP6:%.*]] = call i1 @llvm.allow.ubsan.check(i8 0), !nosanitize [[META2]] +// TRAP-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] +// TRAP-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]], !nosanitize [[META2]] +// TRAP-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] // TRAP: trap: -// TRAP-NEXT: call void @llvm.ubsantrap(i8 0) #[[ATTR3]], !nosanitize [[META2]] +// TRAP-NEXT: call void @llvm.ubsantrap(i8 0) #[[ATTR4]], !nosanitize [[META2]] // TRAP-NEXT: unreachable, !nosanitize [[META2]] // TRAP: cont: // TRAP-NEXT: ret i32 [[TMP3]] @@ -184,11 +208,14 @@ int null(int* x) { // RECOVER-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !nosanitize [[META2]] // RECOVER-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !nosanitize [[META2]] // RECOVER-NEXT: [[TMP5:%.*]] = xor i1 [[TMP4]], true, !nosanitize [[META2]] -// RECOVER-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[HANDLER_ADD_OVERFLOW:%.*]], !prof [[PROF3]], !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP6:%.*]] = call i1 @llvm.allow.ubsan.check(i8 0), !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP8:%.*]] = or i1 [[TMP5]], [[TMP7]], !nosanitize [[META2]] +// RECOVER-NEXT: br i1 [[TMP8]], label [[CONT:%.*]], label [[HANDLER_ADD_OVERFLOW:%.*]], !prof [[PROF3]], !nosanitize [[META2]] // RECOVER: handler.add_overflow: -// RECOVER-NEXT: [[TMP6:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] -// RECOVER-NEXT: [[TMP7:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] -// RECOVER-NEXT: call void @__ubsan_handle_add_overflow(ptr @[[GLOB3:[0-9]+]], i64 [[TMP6]], i64 [[TMP7]]) #[[ATTR3]], !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP9:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP10:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: call void @__ubsan_handle_add_overflow(ptr @[[GLOB3:[0-9]+]], i64 [[TMP9]], i64 [[TMP10]]) #[[ATTR4]], !nosanitize [[META2]] // RECOVER-NEXT: br label [[CONT]], !nosanitize [[META2]] // RECOVER: cont: // RECOVER-NEXT: ret i32 [[TMP3]] diff --git a/clang/test/CodeGen/catch-implicit-conversions-basics.c b/clang/test/CodeGen/catch-implicit-conversions-basics.c index 6bc5472bf39db..e658a9aab50f5 100644 --- a/clang/test/CodeGen/catch-implicit-conversions-basics.c +++ b/clang/test/CodeGen/catch-implicit-conversions-basics.c @@ -9,15 +9,15 @@ // However, not all of them should result in the check. // So here, we *only* check which should and which should not result in checks. -// CHECK-DAG: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, {{.*}}, {{.*}}, i8 1 } -// CHECK-DAG: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 4 } -// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2 } +// CHECK-DAG: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, {{.*}}, {{.*}}, i8 1, i32 0 } +// CHECK-DAG: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 4, i32 0 } +// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } // CHECK-LABEL: @convert_unsigned_int_to_unsigned_int unsigned int convert_unsigned_int_to_unsigned_int(unsigned int x) { diff --git a/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c index 2ce0c6ef21b42..da2e0d00d51e2 100644 --- a/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c +++ b/clang/test/CodeGen/catch-implicit-conversions-incdec-basics.c @@ -2,25 +2,25 @@ // CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } // CHECK-LABEL: @t0( unsigned short t0(unsigned short x) { diff --git a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-basics.c b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-basics.c index 32ed6b1a7bac9..645c0708e3690 100644 --- a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-basics.c +++ b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-basics.c @@ -9,14 +9,14 @@ // However, not all of them should result in the check. // So here, we *only* check which should and which should not result in checks. -// CHECK-DAG: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 4 } -// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2 } +// CHECK-DAG: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 4, i32 0 } +// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } // CHECK-LABEL: @convert_unsigned_int_to_unsigned_int unsigned int convert_unsigned_int_to_unsigned_int(unsigned int x) { diff --git a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c index 8bc89f43e55c3..50cde38bb8547 100644 --- a/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c +++ b/clang/test/CodeGen/catch-implicit-integer-arithmetic-value-change-incdec-basics.c @@ -2,25 +2,25 @@ // CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } // CHECK-LABEL: @t0( unsigned short t0(unsigned short x) { diff --git a/clang/test/CodeGen/catch-implicit-integer-conversions-basics.c b/clang/test/CodeGen/catch-implicit-integer-conversions-basics.c index 6bc5472bf39db..e658a9aab50f5 100644 --- a/clang/test/CodeGen/catch-implicit-integer-conversions-basics.c +++ b/clang/test/CodeGen/catch-implicit-integer-conversions-basics.c @@ -9,15 +9,15 @@ // However, not all of them should result in the check. // So here, we *only* check which should and which should not result in checks. -// CHECK-DAG: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, {{.*}}, {{.*}}, i8 1 } -// CHECK-DAG: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 4 } -// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2 } +// CHECK-DAG: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, {{.*}}, {{.*}}, i8 1, i32 0 } +// CHECK-DAG: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 4, i32 0 } +// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } // CHECK-LABEL: @convert_unsigned_int_to_unsigned_int unsigned int convert_unsigned_int_to_unsigned_int(unsigned int x) { diff --git a/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c index 2ce0c6ef21b42..da2e0d00d51e2 100644 --- a/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c +++ b/clang/test/CodeGen/catch-implicit-integer-conversions-incdec-basics.c @@ -2,25 +2,25 @@ // CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } // CHECK-LABEL: @t0( unsigned short t0(unsigned short x) { diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-CompoundAssignOperator.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-CompoundAssignOperator.c index d6aff6c0b54ea..197ec53edaedb 100644 --- a/clang/test/CodeGen/catch-implicit-integer-sign-changes-CompoundAssignOperator.c +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-CompoundAssignOperator.c @@ -12,89 +12,89 @@ // CHECK-SANITIZE-ANYRECOVER: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_SIGN_CHANGE:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_200_SIGN_CHANGE:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_400_SIGN_CHANGE:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_SIGN_CHANGE:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_200_SIGN_CHANGE:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_400_SIGN_CHANGE:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_500_SIGN_CHANGE:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_600_SIGN_CHANGE:.*]] = {{.*}}, i32 600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_500_SIGN_CHANGE:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_600_SIGN_CHANGE:.*]] = {{.*}}, i32 600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_INT:.*]] = {{.*}} c"'unsigned int'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_700_SIGN_CHANGE:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_800_SIGN_CHANGE:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1500_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1600_SIGN_CHANGE:.*]] = {{.*}}, i32 1600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1700_SIGN_CHANGE:.*]] = {{.*}}, i32 1700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1800_SIGN_CHANGE:.*]] = {{.*}}, i32 1800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2000_SIGN_CHANGE:.*]] = {{.*}}, i32 2000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2100_SIGN_CHANGE:.*]] = {{.*}}, i32 2100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2200_SIGN_CHANGE:.*]] = {{.*}}, i32 2200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2300_SIGN_CHANGE:.*]] = {{.*}}, i32 2300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2400_SIGN_CHANGE:.*]] = {{.*}}, i32 2400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2500_SIGN_CHANGE:.*]] = {{.*}}, i32 2500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2600_SIGN_CHANGE:.*]] = {{.*}}, i32 2600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2800_SIGN_CHANGE:.*]] = {{.*}}, i32 2800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2900_SIGN_CHANGE:.*]] = {{.*}}, i32 2900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3000_SIGN_CHANGE:.*]] = {{.*}}, i32 3000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3100_SIGN_CHANGE:.*]] = {{.*}}, i32 3100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3200_SIGN_CHANGE:.*]] = {{.*}}, i32 3200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3300_SIGN_CHANGE:.*]] = {{.*}}, i32 3300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3400_SIGN_CHANGE:.*]] = {{.*}}, i32 3400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3600_SIGN_CHANGE:.*]] = {{.*}}, i32 3600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3700_SIGN_CHANGE:.*]] = {{.*}}, i32 3700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3800_SIGN_CHANGE:.*]] = {{.*}}, i32 3800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3900_SIGN_CHANGE:.*]] = {{.*}}, i32 3900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4000_SIGN_CHANGE:.*]] = {{.*}}, i32 4000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4100_SIGN_CHANGE:.*]] = {{.*}}, i32 4100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4200_SIGN_CHANGE:.*]] = {{.*}}, i32 4200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4300_SIGN_CHANGE:.*]] = {{.*}}, i32 4300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4400_SIGN_CHANGE:.*]] = {{.*}}, i32 4400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4500_SIGN_CHANGE:.*]] = {{.*}}, i32 4500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4600_SIGN_CHANGE:.*]] = {{.*}}, i32 4600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4700_SIGN_CHANGE:.*]] = {{.*}}, i32 4700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4800_SIGN_CHANGE:.*]] = {{.*}}, i32 4800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4900_SIGN_CHANGE:.*]] = {{.*}}, i32 4900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5000_SIGN_CHANGE:.*]] = {{.*}}, i32 5000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5100_SIGN_CHANGE:.*]] = {{.*}}, i32 5100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5200_SIGN_CHANGE:.*]] = {{.*}}, i32 5200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5300_SIGN_CHANGE:.*]] = {{.*}}, i32 5300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5400_SIGN_CHANGE:.*]] = {{.*}}, i32 5400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5500_SIGN_CHANGE:.*]] = {{.*}}, i32 5500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5600_SIGN_CHANGE:.*]] = {{.*}}, i32 5600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5700_SIGN_CHANGE:.*]] = {{.*}}, i32 5700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5800_SIGN_CHANGE:.*]] = {{.*}}, i32 5800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6000_SIGN_CHANGE:.*]] = {{.*}}, i32 6000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6100_SIGN_CHANGE:.*]] = {{.*}}, i32 6100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6200_SIGN_CHANGE:.*]] = {{.*}}, i32 6200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6300_SIGN_CHANGE:.*]] = {{.*}}, i32 6300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6400_SIGN_CHANGE:.*]] = {{.*}}, i32 6400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6500_SIGN_CHANGE:.*]] = {{.*}}, i32 6500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6600_SIGN_CHANGE:.*]] = {{.*}}, i32 6600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6800_SIGN_CHANGE:.*]] = {{.*}}, i32 6800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6900_SIGN_CHANGE:.*]] = {{.*}}, i32 6900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7000_SIGN_CHANGE:.*]] = {{.*}}, i32 7000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7100_SIGN_CHANGE:.*]] = {{.*}}, i32 7100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7200_SIGN_CHANGE:.*]] = {{.*}}, i32 7200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7300_SIGN_CHANGE:.*]] = {{.*}}, i32 7300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7400_SIGN_CHANGE:.*]] = {{.*}}, i32 7400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7600_SIGN_CHANGE:.*]] = {{.*}}, i32 7600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7700_SIGN_CHANGE:.*]] = {{.*}}, i32 7700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7800_SIGN_CHANGE:.*]] = {{.*}}, i32 7800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7900_SIGN_CHANGE:.*]] = {{.*}}, i32 7900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_8000_SIGN_CHANGE:.*]] = {{.*}}, i32 8000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_700_SIGN_CHANGE:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_800_SIGN_CHANGE:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1500_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1600_SIGN_CHANGE:.*]] = {{.*}}, i32 1600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1700_SIGN_CHANGE:.*]] = {{.*}}, i32 1700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1800_SIGN_CHANGE:.*]] = {{.*}}, i32 1800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2000_SIGN_CHANGE:.*]] = {{.*}}, i32 2000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2100_SIGN_CHANGE:.*]] = {{.*}}, i32 2100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2200_SIGN_CHANGE:.*]] = {{.*}}, i32 2200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2300_SIGN_CHANGE:.*]] = {{.*}}, i32 2300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2400_SIGN_CHANGE:.*]] = {{.*}}, i32 2400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2500_SIGN_CHANGE:.*]] = {{.*}}, i32 2500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2600_SIGN_CHANGE:.*]] = {{.*}}, i32 2600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2800_SIGN_CHANGE:.*]] = {{.*}}, i32 2800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2900_SIGN_CHANGE:.*]] = {{.*}}, i32 2900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3000_SIGN_CHANGE:.*]] = {{.*}}, i32 3000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3100_SIGN_CHANGE:.*]] = {{.*}}, i32 3100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3200_SIGN_CHANGE:.*]] = {{.*}}, i32 3200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3300_SIGN_CHANGE:.*]] = {{.*}}, i32 3300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3400_SIGN_CHANGE:.*]] = {{.*}}, i32 3400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3600_SIGN_CHANGE:.*]] = {{.*}}, i32 3600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3700_SIGN_CHANGE:.*]] = {{.*}}, i32 3700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3800_SIGN_CHANGE:.*]] = {{.*}}, i32 3800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3900_SIGN_CHANGE:.*]] = {{.*}}, i32 3900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4000_SIGN_CHANGE:.*]] = {{.*}}, i32 4000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4100_SIGN_CHANGE:.*]] = {{.*}}, i32 4100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4200_SIGN_CHANGE:.*]] = {{.*}}, i32 4200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4300_SIGN_CHANGE:.*]] = {{.*}}, i32 4300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4400_SIGN_CHANGE:.*]] = {{.*}}, i32 4400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4500_SIGN_CHANGE:.*]] = {{.*}}, i32 4500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4600_SIGN_CHANGE:.*]] = {{.*}}, i32 4600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4700_SIGN_CHANGE:.*]] = {{.*}}, i32 4700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4800_SIGN_CHANGE:.*]] = {{.*}}, i32 4800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4900_SIGN_CHANGE:.*]] = {{.*}}, i32 4900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5000_SIGN_CHANGE:.*]] = {{.*}}, i32 5000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5100_SIGN_CHANGE:.*]] = {{.*}}, i32 5100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5200_SIGN_CHANGE:.*]] = {{.*}}, i32 5200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5300_SIGN_CHANGE:.*]] = {{.*}}, i32 5300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5400_SIGN_CHANGE:.*]] = {{.*}}, i32 5400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5500_SIGN_CHANGE:.*]] = {{.*}}, i32 5500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5600_SIGN_CHANGE:.*]] = {{.*}}, i32 5600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5700_SIGN_CHANGE:.*]] = {{.*}}, i32 5700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5800_SIGN_CHANGE:.*]] = {{.*}}, i32 5800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6000_SIGN_CHANGE:.*]] = {{.*}}, i32 6000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6100_SIGN_CHANGE:.*]] = {{.*}}, i32 6100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6200_SIGN_CHANGE:.*]] = {{.*}}, i32 6200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6300_SIGN_CHANGE:.*]] = {{.*}}, i32 6300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6400_SIGN_CHANGE:.*]] = {{.*}}, i32 6400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6500_SIGN_CHANGE:.*]] = {{.*}}, i32 6500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6600_SIGN_CHANGE:.*]] = {{.*}}, i32 6600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6800_SIGN_CHANGE:.*]] = {{.*}}, i32 6800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6900_SIGN_CHANGE:.*]] = {{.*}}, i32 6900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7000_SIGN_CHANGE:.*]] = {{.*}}, i32 7000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7100_SIGN_CHANGE:.*]] = {{.*}}, i32 7100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7200_SIGN_CHANGE:.*]] = {{.*}}, i32 7200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7300_SIGN_CHANGE:.*]] = {{.*}}, i32 7300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7400_SIGN_CHANGE:.*]] = {{.*}}, i32 7400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7600_SIGN_CHANGE:.*]] = {{.*}}, i32 7600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7700_SIGN_CHANGE:.*]] = {{.*}}, i32 7700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7800_SIGN_CHANGE:.*]] = {{.*}}, i32 7800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7900_SIGN_CHANGE:.*]] = {{.*}}, i32 7900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_8000_SIGN_CHANGE:.*]] = {{.*}}, i32 8000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } //----------------------------------------------------------------------------// // Compound add operator. // diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-basics.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-basics.c index 22d44e6ab9e47..bc912827d29d7 100644 --- a/clang/test/CodeGen/catch-implicit-integer-sign-changes-basics.c +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-basics.c @@ -9,14 +9,14 @@ // However, not all of them should result in the check. // So here, we *only* check which should and which should not result in checks. -// CHECK-DAG: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1100_SIGN_CHANGE:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1500_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 3 } -// CHECK-DAG: @[[LINE_1600_SIGN_CHANGE:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 3 } +// CHECK-DAG: @[[LINE_900_SIGN_CHANGE:.*]] = {{.*}}, i32 900, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1000_SIGN_CHANGE:.*]] = {{.*}}, i32 1000, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1100_SIGN_CHANGE:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1200_SIGN_CHANGE:.*]] = {{.*}}, i32 1200, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1300_SIGN_CHANGE:.*]] = {{.*}}, i32 1300, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1400_SIGN_CHANGE:.*]] = {{.*}}, i32 1400, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1500_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1600_SIGN_CHANGE:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 3, i32 0 } //============================================================================// // Half of the cases do not need the check. // diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c index 9f3f53fa1fd0a..fec90fa4b6c7f 100644 --- a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec-basics.c @@ -2,25 +2,25 @@ // CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3, i32 0 } // CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 3 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 3 } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 3, i32 0 } // CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } // CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3 } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } // CHECK-LABEL: @t0( unsigned short t0(unsigned short x) { diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c index 0de45720e9560..00cf75af22865 100644 --- a/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes-incdec.c @@ -6,15 +6,15 @@ // CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 3, i32 0 } // CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, ptr @[[INT]], ptr @[[SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, ptr @[[INT]], ptr @[[SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[INT]], ptr @[[SHORT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SHORT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, ptr @[[INT]], ptr @[[SHORT]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, ptr @[[INT]], ptr @[[SHORT]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[INT]], ptr @[[SHORT]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SHORT]], i8 3, i32 0 } unsigned short t0(unsigned short x) { // CHECK-NOSANITIZE-LABEL: @t0( diff --git a/clang/test/CodeGen/catch-implicit-integer-sign-changes.c b/clang/test/CodeGen/catch-implicit-integer-sign-changes.c index e91aec61a430d..8d5bad73112e0 100644 --- a/clang/test/CodeGen/catch-implicit-integer-sign-changes.c +++ b/clang/test/CodeGen/catch-implicit-integer-sign-changes.c @@ -5,19 +5,19 @@ // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_INT:.*]] = {{.*}} c"'unsigned int'\00" } // CHECK-SANITIZE-ANYRECOVER-NEXT: @[[SIGNED_INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_INT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[UNSIGNED_INT]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_INT]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[UNSIGNED_INT]], i8 3, i32 0 } // CHECK-SANITIZE-ANYRECOVER-NEXT: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } // CHECK-SANITIZE-ANYRECOVER-NEXT: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[SIGNED_CHAR]], ptr @[[UNSIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[UNSIGNED_CHAR]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 10 }, ptr @[[SIGNED_CHAR]], ptr @[[UNSIGNED_INT]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[SIGNED_CHAR]], ptr @[[UNSIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[UNSIGNED_CHAR]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 10 }, ptr @[[SIGNED_CHAR]], ptr @[[UNSIGNED_INT]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[UINT32:.*]] = {{.*}} c"'uint32_t' (aka 'unsigned int')\00" } // CHECK-SANITIZE-ANYRECOVER: @[[INT32:.*]] = {{.*}} c"'int32_t' (aka 'int')\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 10 }, ptr @[[UINT32]], ptr @[[INT32]], i8 3 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 10 }, ptr @[[UINT32]], ptr @[[INT32]], i8 3, i32 0 } // ========================================================================== // // The expected true-positives. diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations-CompoundAssignOperator.c b/clang/test/CodeGen/catch-implicit-integer-truncations-CompoundAssignOperator.c index 866e25ef127fa..2c81a5c93ee16 100644 --- a/clang/test/CodeGen/catch-implicit-integer-truncations-CompoundAssignOperator.c +++ b/clang/test/CodeGen/catch-implicit-integer-truncations-CompoundAssignOperator.c @@ -12,97 +12,97 @@ // CHECK-SANITIZE-ANYRECOVER: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_INT:.*]] = {{.*}} c"'unsigned int'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_300_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_300_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1100_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1900_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 1900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2700_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 2700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 3500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5900_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 5900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6700_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 6700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 7500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_8000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 8000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1100_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1900_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 1900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2700_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 2700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 3500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5900_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 5900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6700_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 6700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 7500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_8000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 8000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } //----------------------------------------------------------------------------// // Compound add operator. // diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations-basics-negatives.c b/clang/test/CodeGen/catch-implicit-integer-truncations-basics-negatives.c index 3acd1c5e5f3bb..c1837f39180d7 100644 --- a/clang/test/CodeGen/catch-implicit-integer-truncations-basics-negatives.c +++ b/clang/test/CodeGen/catch-implicit-integer-truncations-basics-negatives.c @@ -1,9 +1,9 @@ // RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK -// CHECK-DAG: @[[LINE_100_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, {{.*}}, {{.*}}, i8 1 } -// CHECK-DAG: @[[LINE_200_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, {{.*}}, {{.*}}, i8 1 } -// CHECK-DAG: @[[LINE_300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}, {{.*}}, i8 2 } +// CHECK-DAG: @[[LINE_100_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, {{.*}}, {{.*}}, i8 1, i32 0 } +// CHECK-DAG: @[[LINE_200_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, {{.*}}, {{.*}}, i8 1, i32 0 } +// CHECK-DAG: @[[LINE_300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 300, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } //----------------------------------------------------------------------------// // Unsigned case. diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations-basics.c b/clang/test/CodeGen/catch-implicit-integer-truncations-basics.c index eccfc374c05c3..16320b8822d75 100644 --- a/clang/test/CodeGen/catch-implicit-integer-truncations-basics.c +++ b/clang/test/CodeGen/catch-implicit-integer-truncations-basics.c @@ -9,10 +9,10 @@ // However, not all of them should result in the check. // So here, we *only* check which should and which should not result in checks. -// CHECK-DAG: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, {{.*}}, {{.*}}, i8 1 } -// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2 } +// CHECK-DAG: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, {{.*}}, {{.*}}, i8 1, i32 0 } +// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } // CHECK-LABEL: @convert_unsigned_int_to_unsigned_int unsigned int convert_unsigned_int_to_unsigned_int(unsigned int x) { diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c index 5bf859dd893a2..40a1e789b543c 100644 --- a/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c +++ b/clang/test/CodeGen/catch-implicit-integer-truncations-incdec-basics.c @@ -2,25 +2,25 @@ // CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } // CHECK-LABEL: @t0( unsigned short t0(unsigned short x) { diff --git a/clang/test/CodeGen/catch-implicit-integer-truncations.c b/clang/test/CodeGen/catch-implicit-integer-truncations.c index 17b64b9bbe397..8f991b14ad778 100644 --- a/clang/test/CodeGen/catch-implicit-integer-truncations.c +++ b/clang/test/CodeGen/catch-implicit-integer-truncations.c @@ -6,16 +6,16 @@ // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_INT:.*]] = {{.*}} c"'unsigned int'\00" } // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 1, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[SIGNED_INT:.*]] = {{.*}} c"'int'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[SIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[UINT32:.*]] = {{.*}} c"'uint32_t' (aka 'unsigned int')\00" } // CHECK-SANITIZE-ANYRECOVER: @[[UINT8:.*]] = {{.*}} c"'uint8_t' (aka 'unsigned char')\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[UINT32]], ptr @[[UINT8]], i8 1 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[UINT32]], ptr @[[UINT8]], i8 1, i32 0 } // ========================================================================== // // The expected true-positives. These are implicit conversions, and they truncate. diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncation-or-sign-change-CompoundAssignOperator.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncation-or-sign-change-CompoundAssignOperator.c index c131bbba1c586..34ffc8bf56a44 100644 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncation-or-sign-change-CompoundAssignOperator.c +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncation-or-sign-change-CompoundAssignOperator.c @@ -12,89 +12,89 @@ // CHECK-SANITIZE-ANYRECOVER: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_INT:.*]] = {{.*}} c"'unsigned int'\00" } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_700_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1500_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2300_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 2300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3100_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 3100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3900_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 3900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6300_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 6300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7100_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 7100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } - -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7900_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 7900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER: @[[LINE_8000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 8000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_700_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1500_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 1500, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_1800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2300_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 2300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_2900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 2900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3100_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 3100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 3800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_3900_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 3900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_4900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 4900, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5100, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5200, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5300, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5500, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5600, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5700, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_5800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 5800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6000, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6100, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6300_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 6300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6400, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6500, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6800, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_6900_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 6900, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7100_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 7100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7200, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } + +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7300_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7600, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7700_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7700, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7800_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 7800, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_7900_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 7900, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER: @[[LINE_8000_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 8000, i32 10 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } //----------------------------------------------------------------------------// // Compound add operator. // diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncation-or-sign-change.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncation-or-sign-change.c index b26d4b1924c1c..758fc85f894f0 100644 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncation-or-sign-change.c +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncation-or-sign-change.c @@ -5,10 +5,10 @@ // CHECK-SANITIZE-ANYRECOVER: @[[UNSIGNED_INT:.*]] = {{.*}} c"'unsigned int'\00" } // CHECK-SANITIZE-ANYRECOVER-NEXT: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_100_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4 } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_200_SIGN_CHANGE:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_300_SIGN_CHANGE:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3 } -// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_100_SIGNED_TRUNCATION_OR_SIGN_CHANGE:.*]] = {{.*}}, i32 100, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 4, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_200_SIGN_CHANGE:.*]] = {{.*}}, i32 200, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_300_SIGN_CHANGE:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 3, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-NEXT: @[[LINE_400_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[UNSIGNED_INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } //============================================================================// // Both sanitizers are enabled, and not disabled per-function. diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-basics-negatives.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-basics-negatives.c index 496599a650d2d..9466af731478a 100644 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-basics-negatives.c +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-basics-negatives.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsanitize=implicit-signed-integer-truncation -fsanitize-recover=implicit-signed-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK -// CHECK-DAG: @[[LINE_100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, {{.*}}, {{.*}}, i8 2 } +// CHECK-DAG: @[[LINE_100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_200_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } // CHECK-LABEL: @ignorelist_0_convert_signed_int_to_signed_char __attribute__((no_sanitize("undefined"))) signed char ignorelist_0_convert_signed_int_to_signed_char(signed int x) { diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-basics.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-basics.c index 12cb864b7df60..76d522799a6bb 100644 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-basics.c +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-basics.c @@ -9,9 +9,9 @@ // However, not all of them should result in the check. // So here, we *only* check which should and which should not result in checks. -// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 2 } -// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2 } +// CHECK-DAG: @[[LINE_1100_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1100, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1500_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1500, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1600_SIGNED_TRUNCATION:.*]] = {{.*}}, i32 1600, i32 10 }, {{.*}}, {{.*}}, i8 2, i32 0 } // CHECK-LABEL: @convert_unsigned_int_to_unsigned_int unsigned int convert_unsigned_int_to_unsigned_int(unsigned int x) { diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c index d0c016a994b6e..5885d2dba8f90 100644 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec-basics.c @@ -2,25 +2,25 @@ // CHECK-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } +// CHECK-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 4 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 3 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } // CHECK-DAG: @[[UNSIGNED_CHAR:.*]] = {{.*}} c"'unsigned char'\00" } -// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 4 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 3 }, ptr @[[INT]], ptr @[[UNSIGNED_CHAR]], i8 2, i32 0 } // CHECK-DAG: @[[SIGNED_CHAR:.*]] = {{.*}} c"'signed char'\00" } -// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } -// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2 } +// CHECK-DAG: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 4 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } +// CHECK-DAG: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 3 }, ptr @[[INT]], ptr @[[SIGNED_CHAR]], i8 2, i32 0 } // CHECK-LABEL: @t0( unsigned short t0(unsigned short x) { diff --git a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c index 101cdc83c8dc0..84183d7eb72a0 100644 --- a/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c +++ b/clang/test/CodeGen/catch-implicit-signed-integer-truncations-incdec.c @@ -6,15 +6,15 @@ // CHECK-SANITIZE-ANYRECOVER-DAG: @[[INT:.*]] = {{.*}} c"'int'\00" } // CHECK-SANITIZE-ANYRECOVER-DAG: @[[UNSIGNED_SHORT:.*]] = {{.*}} c"'unsigned short'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_100:.*]] = {{.*}}, i32 100, i32 11 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_200:.*]] = {{.*}}, i32 200, i32 11 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_300:.*]] = {{.*}}, i32 300, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_400:.*]] = {{.*}}, i32 400, i32 10 }, ptr @[[INT]], ptr @[[UNSIGNED_SHORT]], i8 2, i32 0 } // CHECK-SANITIZE-ANYRECOVER-DAG: @[[SHORT:.*]] = {{.*}} c"'short'\00" } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } -// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SHORT]], i8 2 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_500:.*]] = {{.*}}, i32 500, i32 11 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_600:.*]] = {{.*}}, i32 600, i32 11 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 10 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } +// CHECK-SANITIZE-ANYRECOVER-DAG: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 10 }, ptr @[[INT]], ptr @[[SHORT]], i8 2, i32 0 } unsigned short t0(unsigned short x) { // CHECK-NOSANITIZE-LABEL: @t0( diff --git a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-basics-negatives.c b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-basics-negatives.c index 82f6afd36f2f4..297962ec2e536 100644 --- a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-basics-negatives.c +++ b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-basics-negatives.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation -fsanitize-recover=implicit-unsigned-integer-truncation -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK -// CHECK-DAG: @[[LINE_100_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, {{.*}}, {{.*}}, i8 1 } -// CHECK-DAG: @[[LINE_200_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, {{.*}}, {{.*}}, i8 1 } +// CHECK-DAG: @[[LINE_100_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 100, i32 10 }, {{.*}}, {{.*}}, i8 1, i32 0 } +// CHECK-DAG: @[[LINE_200_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 200, i32 10 }, {{.*}}, {{.*}}, i8 1, i32 0 } // CHECK-LABEL: @ignorelist_0_convert_unsigned_int_to_unsigned_char __attribute__((no_sanitize("undefined"))) unsigned char ignorelist_0_convert_unsigned_int_to_unsigned_char(unsigned int x) { diff --git a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-basics.c b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-basics.c index 7eca6e1957c2e..156ab208107d2 100644 --- a/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-basics.c +++ b/clang/test/CodeGen/catch-implicit-unsigned-integer-truncations-basics.c @@ -9,7 +9,7 @@ // However, not all of them should result in the check. // So here, we *only* check which should and which should not result in checks. -// CHECK-DAG: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, {{.*}}, {{.*}}, i8 1 } +// CHECK-DAG: @[[LINE_500_UNSIGNED_TRUNCATION:.*]] = {{.*}}, i32 500, i32 10 }, {{.*}}, {{.*}}, i8 1, i32 0 } // CHECK-LABEL: @convert_unsigned_int_to_unsigned_int unsigned int convert_unsigned_int_to_unsigned_int(unsigned int x) { diff --git a/clang/test/CodeGen/remote-traps.c b/clang/test/CodeGen/remote-traps.c deleted file mode 100644 index 6751afb96d25f..0000000000000 --- a/clang/test/CodeGen/remote-traps.c +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow %s -o - | FileCheck %s -// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -mllvm -clang-remove-traps -mllvm -remove-traps-random-rate=1 %s -o - | FileCheck %s --implicit-check-not="call void @llvm.ubsantrap" --check-prefixes=REMOVE - -int test(int x) { - return x + 123; -} - -// CHECK-LABEL: define {{.*}}i32 @test( -// CHECK: call { i32, i1 } @llvm.sadd.with.overflow.i32( -// CHECK: trap: -// CHECK-NEXT: call void @llvm.ubsantrap(i8 0) -// CHECK-NEXT: unreachable - -// REMOVE-LABEL: define {{.*}}i32 @test( -// REMOVE: call { i32, i1 } @llvm.sadd.with.overflow.i32( diff --git a/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp b/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp new file mode 100644 index 0000000000000..e8bb46982537b --- /dev/null +++ b/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -triple aarch64_be-apple-darwin -emit-llvm -o - -O1 %s | \ +// RUN: FileCheck -check-prefixes=CHECK %s +// RUN: %clang_cc1 -triple aarch64-apple-darwin -emit-llvm -o - -O1 %s | \ +// RUN: FileCheck -check-prefixes=CHECK %s +// +// Check that TBAA metadata for structs containing bitfields is +// consistent between big and little endian layouts. + +struct NamedBitfields { + int f1 : 8; + int f2 : 8; + unsigned f3 : 1; + unsigned f4 : 15; + int f5; + double f6; +}; + +// CHECK-LABEL: _Z4copyP14NamedBitfieldsS0_ +// CHECK-SAME: ptr nocapture noundef writeonly [[A1:%.*]], ptr nocapture noundef readonly [[A2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) [[A1]], ptr noundef nonnull align 8 dereferenceable(16) [[A2]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT2:![0-9]+]] +// CHECK-NEXT: ret void +// +void copy(NamedBitfields *a1, NamedBitfields *a2) { + *a1 = *a2; +} + +// CHECK: [[TBAA_STRUCT2]] = !{i64 0, i64 4, [[META3:![0-9]+]], i64 4, i64 4, [[META6:![0-9]+]], i64 8, i64 8, [[META8:![0-9]+]]} +// CHECK: [[META3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[META6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"int", [[META4]], i64 0} +// CHECK: [[META8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK: [[META9]] = !{!"double", [[META4]], i64 0} diff --git a/clang/test/CodeGen/ubsan-bitfield-conversion.c b/clang/test/CodeGen/ubsan-bitfield-conversion.c new file mode 100644 index 0000000000000..61d7634f9a336 --- /dev/null +++ b/clang/test/CodeGen/ubsan-bitfield-conversion.c @@ -0,0 +1,61 @@ +// RUN: %clang -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION +// RUN: %clang -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION + +typedef struct _xx { + int x1:3; + char x2:2; +} xx, *pxx; + +xx vxx; + +// CHECK-LABEL: define{{.*}} void @foo1 +void foo1(int x) { + vxx.x1 = x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo2 +void foo2(int x) { + vxx.x2 = x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo3 +void foo3() { + vxx.x1++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo4 +void foo4(int x) { + vxx.x1 += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} \ No newline at end of file diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu index 4f5cf65ecd0bd..ec21f018607ff 100644 --- a/clang/test/CodeGenCUDA/offloading-entries.cu +++ b/clang/test/CodeGenCUDA/offloading-entries.cu @@ -1,4 +1,4 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".omp_offloading.entry.*" +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \ // RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \ // RUN: --check-prefix=CUDA %s @@ -15,49 +15,49 @@ #include "Inputs/cuda.h" //. -// CUDA: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" -// CUDA: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 -// CUDA: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" -// CUDA: @.omp_offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 -// CUDA: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" -// CUDA: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 -// CUDA: @.omp_offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" -// CUDA: @.omp_offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.omp_offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1 -// CUDA: @.omp_offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" -// CUDA: @.omp_offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.omp_offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1 +// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 +// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" +// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 +// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" +// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 +// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" +// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1 +// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" +// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1 //. -// HIP: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" -// HIP: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1 -// HIP: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" -// HIP: @.omp_offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1 -// HIP: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" -// HIP: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1 -// HIP: @.omp_offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" -// HIP: @.omp_offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.omp_offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1 -// HIP: @.omp_offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" -// HIP: @.omp_offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.omp_offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1 +// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1 +// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" +// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1 +// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" +// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1 +// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" +// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1 +// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" +// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1 //. -// CUDA-COFF: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" -// CUDA-COFF: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" -// CUDA-COFF: @.omp_offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" -// CUDA-COFF: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.omp_offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" -// CUDA-COFF: @.omp_offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.omp_offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.omp_offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" -// CUDA-COFF: @.omp_offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.omp_offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" +// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" +// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" +// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1 +// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" +// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1 //. -// HIP-COFF: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" -// HIP-COFF: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" -// HIP-COFF: @.omp_offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" -// HIP-COFF: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.omp_offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" -// HIP-COFF: @.omp_offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.omp_offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.omp_offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" -// HIP-COFF: @.omp_offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.omp_offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" +// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" +// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" +// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1 +// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" +// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1 //. // CUDA-LABEL: @_Z18__device_stub__foov( // CUDA-NEXT: entry: diff --git a/clang/test/CodeGenCXX/arm64ec-vectorcall.cpp b/clang/test/CodeGenCXX/arm64ec-vectorcall.cpp new file mode 100644 index 0000000000000..73d2d63835917 --- /dev/null +++ b/clang/test/CodeGenCXX/arm64ec-vectorcall.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple arm64ec-windows-msvc -emit-llvm -o - %s -verify + +// ARM64EC doesn't support generating __vectorcall calls... but __vectorcall +// function types need to be distinct from __cdecl function types to support +// compiling the STL. Make sure we only diagnose constructs that actually +// require generating code. +void __vectorcall f1(); +void f2(void __vectorcall p()) {} +void f2(void p()) {} +void __vectorcall (*f3)(); +void __vectorcall f4(); // expected-error {{__vectorcall}} +void __vectorcall f5() { // expected-error {{__vectorcall}} + f4(); // expected-error{{__vectorcall}} +} diff --git a/clang/test/CodeGenCXX/control-flow-in-stmt-expr.cpp b/clang/test/CodeGenCXX/control-flow-in-stmt-expr.cpp new file mode 100644 index 0000000000000..ffde1bd6a724d --- /dev/null +++ b/clang/test/CodeGenCXX/control-flow-in-stmt-expr.cpp @@ -0,0 +1,364 @@ +// RUN: %clang_cc1 --std=c++20 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +struct Printy { + Printy(const char *name) : name(name) {} + ~Printy() {} + const char *name; +}; + +int foo() { return 2; } + +struct Printies { + Printy a; + Printy b; + Printy c; +}; + +void ParenInit() { + // CHECK-LABEL: define dso_local void @_Z9ParenInitv() + // CHECK: [[CLEANUP_DEST:%.+]] = alloca i32, align 4 + Printies ps(Printy("a"), + // CHECK: call void @_ZN6PrintyC1EPKc + ({ + if (foo()) return; + // CHECK: if.then: + // CHECK-NEXT: store i32 1, ptr [[CLEANUP_DEST]], align 4 + // CHECK-NEXT: br label %cleanup + Printy("b"); + // CHECK: if.end: + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + }), + ({ + if (foo()) return; + // CHECK: if.then{{.*}}: + // CHECK-NEXT: store i32 1, ptr [[CLEANUP_DEST]], align 4 + // CHECK-NEXT: call void @_ZN6PrintyD1Ev + // CHECK-NEXT: br label %cleanup + Printy("c"); + // CHECK: if.end{{.*}}: + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + // CHECK-NEXT: call void @_ZN8PrintiesD1Ev + // CHECK-NEXT: br label %return + })); + // CHECK: cleanup: + // CHECK-NEXT: call void @_ZN6PrintyD1Ev + // CHECK-NEXT: br label %return +} + +void break_in_stmt_expr() { + // Verify that the "break" in "if.then".calls dtor before jumping to "for.end". + + // CHECK-LABEL: define dso_local void @_Z18break_in_stmt_exprv() + Printies p{Printy("a"), + // CHECK: call void @_ZN6PrintyC1EPKc + ({ + for (;;) { + Printies ps{ + Printy("b"), + // CHECK: for.cond: + // CHECK: call void @_ZN6PrintyC1EPKc + ({ + if (foo()) { + break; + // CHECK: if.then: + // CHECK-NEXT: call void @_ZN6PrintyD1Ev + // CHECK-NEXT: br label %for.end + } + Printy("c"); + // CHECK: if.end: + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + }), + Printy("d")}; + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK-NEXT: call void @_ZN8PrintiesD1Ev + // CHECK-NEXT: br label %for.cond + } + Printy("e"); + // CHECK: for.end: + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + }), + Printy("f")}; + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK-NEXT: call void @_ZN8PrintiesD1Ev +} + +void goto_in_stmt_expr() { + // Verify that: + // - correct branch fixups for deactivated normal cleanups are generated correctly. + + // CHECK-LABEL: define dso_local void @_Z17goto_in_stmt_exprv() + // CHECK: [[CLEANUP_DEST_SLOT:%cleanup.dest.slot.*]] = alloca i32, align 4 + { + Printies p1{Printy("a"), // CHECK: call void @_ZN6PrintyC1EPKc + ({ + { + Printies p2{Printy("b"), + // CHECK: call void @_ZN6PrintyC1EPKc + ({ + if (foo() == 1) { + goto in; + // CHECK: if.then: + // CHECK-NEXT: store i32 2, ptr [[CLEANUP_DEST_SLOT]], align 4 + // CHECK-NEXT: br label %[[CLEANUP1:.+]] + } + if (foo() == 2) { + goto out; + // CHECK: if.then{{.*}}: + // CHECK-NEXT: store i32 3, ptr [[CLEANUP_DEST_SLOT]], align 4 + // CHECK-NEXT: br label %[[CLEANUP1]] + } + Printy("c"); + // CHECK: if.end{{.*}}: + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + }), + Printy("d")}; + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK-NEXT: call void @_ZN8PrintiesD1Ev + // CHECK-NEXT: br label %in + + } + in: + Printy("e"); + // CHECK: in: ; preds = %if.end{{.*}}, %[[CLEANUP1]] + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + }), + Printy("f")}; + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK-NEXT: call void @_ZN8PrintiesD1Ev + // CHECK-NEXT: br label %out + } +out: + return; + // CHECK: out: + // CHECK-NEXT: ret void + + // CHECK: [[CLEANUP1]]: ; preds = %if.then{{.*}}, %if.then + // CHECK-NEXT: call void @_ZN6PrintyD1Ev + // CHECK-NEXT: %cleanup.dest = load i32, ptr [[CLEANUP_DEST_SLOT]], align 4 + // CHECK-NEXT: switch i32 %cleanup.dest, label %[[CLEANUP2:.+]] [ + // CHECK-NEXT: i32 2, label %in + // CHECK-NEXT: ] + + // CHECK: [[CLEANUP2]]: ; preds = %[[CLEANUP1]] + // CHECK-NEXT: call void @_ZN6PrintyD1Ev + // CHECK-NEXT: %cleanup.dest{{.*}} = load i32, ptr [[CLEANUP_DEST_SLOT]], align 4 + // CHECK-NEXT: switch i32 %cleanup.dest{{.*}}, label %unreachable [ + // CHECK-NEXT: i32 3, label %out + // CHECK-NEXT: ] +} + +void ArrayInit() { + // Printy arr[4] = {ctorA, ctorB, stmt-exprC, stmt-exprD}; + // Verify that: + // - We do the necessary stores for array cleanups (endOfInit and last constructed element). + // - We update the array init element correctly for ctorA, ctorB and stmt-exprC. + // - stmt-exprC and stmt-exprD share the array body dtor code (see %cleanup). + + // CHECK-LABEL: define dso_local void @_Z9ArrayInitv() + // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 + // CHECK: %cleanup.dest.slot = alloca i32, align 4 + // CHECK: %arrayinit.begin = getelementptr inbounds [4 x %struct.Printy], ptr %arr, i64 0, i64 0 + // CHECK: store ptr %arrayinit.begin, ptr %arrayinit.endOfInit, align 8 + Printy arr[4] = { + Printy("a"), + // CHECK: call void @_ZN6PrintyC1EPKc(ptr noundef nonnull align 8 dereferenceable(8) %arrayinit.begin, ptr noundef @.str) + // CHECK: [[ARRAYINIT_ELEMENT1:%.+]] = getelementptr inbounds %struct.Printy, ptr %arrayinit.begin, i64 1 + // CHECK: store ptr [[ARRAYINIT_ELEMENT1]], ptr %arrayinit.endOfInit, align 8 + Printy("b"), + // CHECK: call void @_ZN6PrintyC1EPKc(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT1]], ptr noundef @.str.1) + // CHECK: [[ARRAYINIT_ELEMENT2:%.+]] = getelementptr inbounds %struct.Printy, ptr [[ARRAYINIT_ELEMENT1]], i64 1 + // CHECK: store ptr [[ARRAYINIT_ELEMENT2]], ptr %arrayinit.endOfInit, align 8 + ({ + // CHECK: br i1 {{.*}}, label %if.then, label %if.end + if (foo()) { + return; + // CHECK: if.then: + // CHECK-NEXT: store i32 1, ptr %cleanup.dest.slot, align 4 + // CHECK-NEXT: br label %cleanup + } + // CHECK: if.end: + Printy("c"); + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + // CHECK-NEXT: %arrayinit.element2 = getelementptr inbounds %struct.Printy, ptr %arrayinit.element1, i64 1 + // CHECK-NEXT: store ptr %arrayinit.element2, ptr %arrayinit.endOfInit, align 8 + }), + ({ + // CHECK: br i1 {{%.+}} label %[[IF_THEN2:.+]], label %[[IF_END2:.+]] + if (foo()) { + return; + // CHECK: [[IF_THEN2]]: + // CHECK-NEXT: store i32 1, ptr %cleanup.dest.slot, align 4 + // CHECK-NEXT: br label %cleanup + } + // CHECK: [[IF_END2]]: + Printy("d"); + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + // CHECK-NEXT: %array.begin = getelementptr inbounds [4 x %struct.Printy], ptr %arr, i32 0, i32 0 + // CHECK-NEXT: %0 = getelementptr inbounds %struct.Printy, ptr %array.begin, i64 4 + // CHECK-NEXT: br label %[[ARRAY_DESTROY_BODY1:.+]] + }), + }; + + // CHECK: [[ARRAY_DESTROY_BODY1]]: + // CHECK-NEXT: %arraydestroy.elementPast{{.*}} = phi ptr [ %0, %[[IF_END2]] ], [ %arraydestroy.element{{.*}}, %[[ARRAY_DESTROY_BODY1]] ] + // CHECK-NEXT: %arraydestroy.element{{.*}} = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast{{.*}}, i64 -1 + // CHECK-NEXT: call void @_ZN6PrintyD1Ev + // CHECK-NEXT: %arraydestroy.done{{.*}} = icmp eq ptr %arraydestroy.element{{.*}}, %array.begin + // CHECK-NEXT: br i1 %arraydestroy.done{{.*}}, label %[[ARRAY_DESTROY_DONE1:.+]], label %[[ARRAY_DESTROY_BODY1]] + + // CHECK: [[ARRAY_DESTROY_DONE1]]: + // CHECK-NEXT: ret void + + // CHECK: cleanup: + // CHECK-NEXT: %1 = load ptr, ptr %arrayinit.endOfInit, align 8 + // CHECK-NEXT: %arraydestroy.isempty = icmp eq ptr %arrayinit.begin, %1 + // CHECK-NEXT: br i1 %arraydestroy.isempty, label %[[ARRAY_DESTROY_DONE2:.+]], label %[[ARRAY_DESTROY_BODY2:.+]] + + // CHECK: [[ARRAY_DESTROY_BODY2]]: + // CHECK-NEXT: %arraydestroy.elementPast = phi ptr [ %1, %cleanup ], [ %arraydestroy.element, %[[ARRAY_DESTROY_BODY2]] ] + // CHECK-NEXT: %arraydestroy.element = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast, i64 -1 + // CHECK-NEXT: call void @_ZN6PrintyD1Ev(ptr noundef nonnull align 8 dereferenceable(8) %arraydestroy.element) + // CHECK-NEXT: %arraydestroy.done = icmp eq ptr %arraydestroy.element, %arrayinit.begin + // CHECK-NEXT: br i1 %arraydestroy.done, label %[[ARRAY_DESTROY_DONE2]], label %[[ARRAY_DESTROY_BODY2]] + + // CHECK: [[ARRAY_DESTROY_DONE2]]: + // CHECK-NEXT: br label %[[ARRAY_DESTROY_DONE1]] +} + +void ArraySubobjects() { + struct S { + Printy arr1[2]; + Printy arr2[2]; + Printy p; + }; + // CHECK-LABEL: define dso_local void @_Z15ArraySubobjectsv() + // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 + S s{{Printy("a"), Printy("b")}, + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK: call void @_ZN6PrintyC1EPKc + {Printy("a"), + // CHECK: [[ARRAYINIT_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.Printy] + // CHECK: store ptr [[ARRAYINIT_BEGIN]], ptr %arrayinit.endOfInit, align 8 + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK: [[ARRAYINIT_ELEMENT:%.+]] = getelementptr inbounds %struct.Printy + // CHECK: store ptr [[ARRAYINIT_ELEMENT]], ptr %arrayinit.endOfInit, align 8 + ({ + if (foo()) { + return; + // CHECK: if.then: + // CHECK-NEXT: [[V0:%.+]] = load ptr, ptr %arrayinit.endOfInit, align 8 + // CHECK-NEXT: %arraydestroy.isempty = icmp eq ptr [[ARRAYINIT_BEGIN]], [[V0]] + // CHECK-NEXT: br i1 %arraydestroy.isempty, label %[[ARRAY_DESTROY_DONE:.+]], label %[[ARRAY_DESTROY_BODY:.+]] + } + Printy("b"); + }) + }, + Printy("c") + // CHECK: if.end: + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK-NEXT: call void @_ZZ15ArraySubobjectsvEN1SD1Ev + // CHECK-NEXT: br label %return + }; + // CHECK: return: + // CHECK-NEXT: ret void + + // CHECK: [[ARRAY_DESTROY_BODY]]: + // CHECK-NEXT: %arraydestroy.elementPast = phi ptr [ %0, %if.then ], [ %arraydestroy.element, %[[ARRAY_DESTROY_BODY]] ] + // CHECK-NEXT: %arraydestroy.element = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast, i64 -1 + // CHECK-NEXT: call void @_ZN6PrintyD1Ev(ptr noundef nonnull align 8 dereferenceable(8) %arraydestroy.element) + // CHECK-NEXT: %arraydestroy.done = icmp eq ptr %arraydestroy.element, [[ARRAYINIT_BEGIN]] + // CHECK-NEXT: br i1 %arraydestroy.done, label %[[ARRAY_DESTROY_DONE]], label %[[ARRAY_DESTROY_BODY]] + + // CHECK: [[ARRAY_DESTROY_DONE]] + // CHECK-NEXT: [[ARRAY_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.Printy], ptr %arr1, i32 0, i32 0 + // CHECK-NEXT: [[V1:%.+]] = getelementptr inbounds %struct.Printy, ptr [[ARRAY_BEGIN]], i64 2 + // CHECK-NEXT: br label %[[ARRAY_DESTROY_BODY2:.+]] + + // CHECK: [[ARRAY_DESTROY_BODY2]]: + // CHECK-NEXT: %arraydestroy.elementPast5 = phi ptr [ %1, %[[ARRAY_DESTROY_DONE]] ], [ %arraydestroy.element6, %[[ARRAY_DESTROY_BODY2]] ] + // CHECK-NEXT: %arraydestroy.element6 = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast5, i64 -1 + // CHECK-NEXT: call void @_ZN6PrintyD1Ev(ptr noundef nonnull align 8 dereferenceable(8) %arraydestroy.element6) + // CHECK-NEXT: %arraydestroy.done7 = icmp eq ptr %arraydestroy.element6, [[ARRAY_BEGIN]] + // CHECK-NEXT: br i1 %arraydestroy.done7, label %[[ARRAY_DESTROY_DONE2:.+]], label %[[ARRAY_DESTROY_BODY2]] + + + // CHECK: [[ARRAY_DESTROY_DONE2]]: + // CHECK-NEXT: br label %return +} + +void LambdaInit() { + // CHECK-LABEL: define dso_local void @_Z10LambdaInitv() + auto S = [a = Printy("a"), b = ({ + if (foo()) { + return; + // CHECK: if.then: + // CHECK-NEXT: call void @_ZN6PrintyD1Ev + // CHECK-NEXT: br label %return + } + Printy("b"); + })]() { return a; }; +} + +void LifetimeExtended() { + // CHECK-LABEL: define dso_local void @_Z16LifetimeExtendedv + struct PrintyRefBind { + const Printy &a; + const Printy &b; + }; + PrintyRefBind ps = {Printy("a"), ({ + if (foo()) { + return; + // CHECK: if.then: + // CHECK-NEXT: call void @_ZN6PrintyD1Ev + // CHECK-NEXT: br label %return + } + Printy("b"); + })}; +} + +void NewArrayInit() { + // CHECK-LABEL: define dso_local void @_Z12NewArrayInitv() + // CHECK: %array.init.end = alloca ptr, align 8 + // CHECK: store ptr %0, ptr %array.init.end, align 8 + Printy *array = new Printy[3]{ + "a", + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK: store ptr %array.exp.next, ptr %array.init.end, align 8 + "b", + // CHECK: call void @_ZN6PrintyC1EPKc + // CHECK: store ptr %array.exp.next1, ptr %array.init.end, align 8 + ({ + if (foo()) { + return; + // CHECK: if.then: + // CHECK: br i1 %arraydestroy.isempty, label %arraydestroy.done{{.*}}, label %arraydestroy.body + } + "b"; + // CHECK: if.end: + // CHECK: call void @_ZN6PrintyC1EPKc + })}; + // CHECK: arraydestroy.body: + // CHECK-NEXT: %arraydestroy.elementPast = phi ptr [ %{{.*}}, %if.then ], [ %arraydestroy.element, %arraydestroy.body ] + // CHECK-NEXT: %arraydestroy.element = getelementptr inbounds %struct.Printy, ptr %arraydestroy.elementPast, i64 -1 + // CHECK-NEXT: call void @_ZN6PrintyD1Ev(ptr noundef nonnull align 8 dereferenceable(8) %arraydestroy.element) + // CHECK-NEXT: %arraydestroy.done = icmp eq ptr %arraydestroy.element, %0 + // CHECK-NEXT: br i1 %arraydestroy.done, label %arraydestroy.done{{.*}}, label %arraydestroy.body + + // CHECK: arraydestroy.done{{.*}}: ; preds = %arraydestroy.body, %if.then + // CHECK-NEXT: br label %return +} + +void ArrayInitWithContinue() { + // CHECK-LABEL: @_Z21ArrayInitWithContinuev + // Verify that we start to emit the array destructor. + // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 + for (int i = 0; i < 1; ++i) { + Printy arr[2] = {"a", ({ + if (foo()) { + continue; + } + "b"; + })}; + } +} diff --git a/clang/test/CodeGenCXX/cxx2b-deducing-this.cpp b/clang/test/CodeGenCXX/cxx2b-deducing-this.cpp index de8c124c050eb..b755e80db35a1 100644 --- a/clang/test/CodeGenCXX/cxx2b-deducing-this.cpp +++ b/clang/test/CodeGenCXX/cxx2b-deducing-this.cpp @@ -109,3 +109,76 @@ void test_temporary() { //CHECK: %ref.tmp = alloca %struct.MaterializedTemporary, align 1 //CHECK: call void @_ZN21MaterializedTemporaryC1Ev(ptr noundef nonnull align 1 dereferenceable(1) %ref.tmp){{.*}} //CHECK invoke void @_ZNH21MaterializedTemporary3fooEOS_(ptr noundef nonnull align 1 dereferenceable(1) %ref.tmp){{.*}} + +namespace GH86399 { +volatile int a = 0; +struct function { + function& operator=(function const&) { + a = 1; + return *this; + } +}; + +void f() { + function list; + + //CHECK-LABEL: define internal void @"_ZZN7GH863991f{{.*}}"(ptr %{{.*}}) + //CHECK: call {{.*}} @_ZN7GH863998functionaSERKS0_ + //CHECK-NEXT: ret void + [&list](this auto self) { + list = function{}; + }(); +} +} + +namespace GH84163 { +// Just check that this doesn't crash (we were previously not instantiating +// everything that needs instantiating in here). +template struct S {}; + +void a() { + int x; + const auto l = [&x](this auto&) { S q; }; + l(); +} +} + +namespace GH84425 { +// As above. +void do_thing(int x) { + auto second = [&](this auto const& self, int b) -> int { + if (x) return x; + else return self(x); + }; + + second(1); +} + +void do_thing2(int x) { + auto second = [&](this auto const& self) { + if (true) return x; + else return x; + }; + + second(); +} +} + +namespace GH79754 { +// As above. +void f() { + int x; + [&x](this auto&&) {return x;}(); +} +} + +namespace GH70604 { +auto dothing(int num) +{ + auto fun = [&num](this auto&& self) -> void { + auto copy = num; + }; + + fun(); +} +} diff --git a/clang/test/CodeGenCXX/skip-vtable-pointer-initialization.cpp b/clang/test/CodeGenCXX/skip-vtable-pointer-initialization.cpp index c001ce9b755d1..714ce3e67be7c 100644 --- a/clang/test/CodeGenCXX/skip-vtable-pointer-initialization.cpp +++ b/clang/test/CodeGenCXX/skip-vtable-pointer-initialization.cpp @@ -198,3 +198,65 @@ struct C : virtual B { C::~C() {} } + +namespace Test10 { + +// Check that we don't initialize the vtable pointer in A::~A(), since the class has an anonymous union which +// never has its destructor invoked. +struct A { + virtual void f(); + ~A(); + + union + { + int i; + unsigned u; + }; +}; + +// CHECK-LABEL: define{{.*}} void @_ZN6Test101AD2Ev +// CHECK-NOT: store ptr getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTVN6Test101AE, i32 0, inrange i32 0, i32 2), ptr +A::~A() { +} + +} + +namespace Test11 { + +// Check that we don't initialize the vtable pointer in A::~A(), even if the base class has a non trivial destructor. +struct Field { + ~Field(); +}; + +struct A : public Field { + virtual void f(); + ~A(); +}; + +// CHECK-LABEL: define{{.*}} void @_ZN6Test111AD2Ev +// CHECK-NOT: store ptr getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTVN6Test111AE, i32 0, inrange i32 0, i32 2), ptr +A::~A() { +} + +} + +namespace Test12 { + +// Check that we don't initialize the vtable pointer in A::~A(), since the class has an anonymous struct with trivial fields. +struct A { + virtual void f(); + ~A(); + + struct + { + int i; + unsigned u; + }; +}; + +// CHECK-LABEL: define{{.*}} void @_ZN6Test121AD2Ev +// CHECK-NOT: store ptr getelementptr inbounds ({ [3 x ptr] }, ptr @_ZTVN6Test121AE, i32 0, inrange i32 0, i32 2), ptr +A::~A() { +} + +} diff --git a/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp b/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp new file mode 100644 index 0000000000000..c0248871ddc26 --- /dev/null +++ b/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp @@ -0,0 +1,94 @@ +// RUN: %clang -x c++ -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION +// RUN: %clang -x c++ -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION + +struct S { + int a:3; + char b:2; +}; + +class C : public S { + public: + short c:3; +}; + +S s; +C c; + +// CHECK-LABEL: define{{.*}} void @{{.*foo1.*}} +void foo1(int x) { + s.a = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + c.a = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo2.*}} +void foo2(int x) { + s.b = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + c.b = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo3.*}} +void foo3() { + s.a++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + c.a++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo4.*}} +void foo4(int x) { + s.a += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + c.a += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} \ No newline at end of file diff --git a/clang/test/CodeGenCoroutines/coro-suspend-cleanups.cpp b/clang/test/CodeGenCoroutines/coro-suspend-cleanups.cpp new file mode 100644 index 0000000000000..06cc2069dbe9a --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-suspend-cleanups.cpp @@ -0,0 +1,93 @@ +// RUN: %clang_cc1 --std=c++20 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s + +#include "Inputs/coroutine.h" + +struct Printy { + Printy(const char *name) : name(name) {} + ~Printy() {} + const char *name; +}; + +struct coroutine { + struct promise_type; + std::coroutine_handle handle; + ~coroutine() { + if (handle) handle.destroy(); + } +}; + +struct coroutine::promise_type { + coroutine get_return_object() { + return {std::coroutine_handle::from_promise(*this)}; + } + std::suspend_never initial_suspend() noexcept { return {}; } + std::suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() {} +}; + +struct Awaiter : std::suspend_always { + Printy await_resume() { return {"awaited"}; } +}; + +int foo() { return 2; } + +coroutine ArrayInitCoro() { + // Verify that: + // - We do the necessary stores for array cleanups. + // - Array cleanups are called by await.cleanup. + // - We activate the cleanup after the first element and deactivate it in await.ready (see cleanup.isactive). + + // CHECK-LABEL: define dso_local void @_Z13ArrayInitCorov + // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 + // CHECK: %cleanup.isactive = alloca i1, align 1 + Printy arr[2] = { + Printy("a"), + // CHECK: %arrayinit.begin = getelementptr inbounds [2 x %struct.Printy], ptr %arr.reload.addr, i64 0, i64 0 + // CHECK-NEXT: %arrayinit.begin.spill.addr = getelementptr inbounds %_Z13ArrayInitCorov.Frame, ptr %0, i32 0, i32 10 + // CHECK-NEXT: store ptr %arrayinit.begin, ptr %arrayinit.begin.spill.addr, align 8 + // CHECK-NEXT: store i1 true, ptr %cleanup.isactive.reload.addr, align 1 + // CHECK-NEXT: store ptr %arrayinit.begin, ptr %arrayinit.endOfInit.reload.addr, align 8 + // CHECK-NEXT: call void @_ZN6PrintyC1EPKc(ptr noundef nonnull align 8 dereferenceable(8) %arrayinit.begin, ptr noundef @.str) + // CHECK-NEXT: %arrayinit.element = getelementptr inbounds %struct.Printy, ptr %arrayinit.begin, i64 1 + // CHECK-NEXT: %arrayinit.element.spill.addr = getelementptr inbounds %_Z13ArrayInitCorov.Frame, ptr %0, i32 0, i32 11 + // CHECK-NEXT: store ptr %arrayinit.element, ptr %arrayinit.element.spill.addr, align 8 + // CHECK-NEXT: store ptr %arrayinit.element, ptr %arrayinit.endOfInit.reload.addr, align 8 + co_await Awaiter{} + // CHECK-NEXT: @_ZNSt14suspend_always11await_readyEv + // CHECK-NEXT: br i1 %{{.+}}, label %await.ready, label %CoroSave30 + }; + // CHECK: await.cleanup: ; preds = %AfterCoroSuspend{{.*}} + // CHECK-NEXT: br label %cleanup{{.*}}.from.await.cleanup + + // CHECK: cleanup{{.*}}.from.await.cleanup: ; preds = %await.cleanup + // CHECK: br label %cleanup{{.*}} + + // CHECK: await.ready: + // CHECK-NEXT: %arrayinit.element.reload.addr = getelementptr inbounds %_Z13ArrayInitCorov.Frame, ptr %0, i32 0, i32 11 + // CHECK-NEXT: %arrayinit.element.reload = load ptr, ptr %arrayinit.element.reload.addr, align 8 + // CHECK-NEXT: call void @_ZN7Awaiter12await_resumeEv + // CHECK-NEXT: store i1 false, ptr %cleanup.isactive.reload.addr, align 1 + // CHECK-NEXT: br label %cleanup{{.*}}.from.await.ready + + // CHECK: cleanup{{.*}}: ; preds = %cleanup{{.*}}.from.await.ready, %cleanup{{.*}}.from.await.cleanup + // CHECK: %cleanup.is_active = load i1, ptr %cleanup.isactive.reload.addr, align 1 + // CHECK-NEXT: br i1 %cleanup.is_active, label %cleanup.action, label %cleanup.done + + // CHECK: cleanup.action: + // CHECK: %arraydestroy.isempty = icmp eq ptr %arrayinit.begin.reload{{.*}}, %{{.*}} + // CHECK-NEXT: br i1 %arraydestroy.isempty, label %arraydestroy.done{{.*}}, label %arraydestroy.body.from.cleanup.action + // Ignore rest of the array cleanup. +} + +coroutine ArrayInitWithCoReturn() { + // CHECK-LABEL: define dso_local void @_Z21ArrayInitWithCoReturnv + // Verify that we start to emit the array destructor. + // CHECK: %arrayinit.endOfInit = alloca ptr, align 8 + Printy arr[2] = {"a", ({ + if (foo()) { + co_return; + } + "b"; + })}; +} diff --git a/clang/test/CodeGenHLSL/builtins/all.hlsl b/clang/test/CodeGenHLSL/builtins/all.hlsl new file mode 100644 index 0000000000000..b48daa287480f --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/all.hlsl @@ -0,0 +1,277 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,SPIR_NO_HALF,SPIR_CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: --check-prefixes=CHECK,NATIVE_HALF,DXIL_NATIVE_HALF,DXIL_CHECK +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s --check-prefixes=CHECK,DXIL_NO_HALF,DXIL_CHECK + +#ifdef __HLSL_ENABLE_16_BIT +// DXIL_NATIVE_HALF: define noundef i1 @ +// SPIR_NATIVE_HALF: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.i16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.i16 +// NATIVE_HALF: ret i1 %hlsl.all +bool test_all_int16_t(int16_t p0) { return all(p0); } +// DXIL_NATIVE_HALF: define noundef i1 @ +// SPIR_NATIVE_HALF: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v2i16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v2i16 +// NATIVE_HALF: ret i1 %hlsl.all +bool test_all_int16_t2(int16_t2 p0) { return all(p0); } +// DXIL_NATIVE_HALF: define noundef i1 @ +// SPIR_NATIVE_HALF: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v3i16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v3i16 +// NATIVE_HALF: ret i1 %hlsl.all +bool test_all_int16_t3(int16_t3 p0) { return all(p0); } +// DXIL_NATIVE_HALF: define noundef i1 @ +// SPIR_NATIVE_HALF: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v4i16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v4i16 +// NATIVE_HALF: ret i1 %hlsl.all +bool test_all_int16_t4(int16_t4 p0) { return all(p0); } + +// DXIL_NATIVE_HALF: define noundef i1 @ +// SPIR_NATIVE_HALF: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.i16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.i16 +// NATIVE_HALF: ret i1 %hlsl.all +bool test_all_uint16_t(uint16_t p0) { return all(p0); } +// DXIL_NATIVE_HALF: define noundef i1 @ +// SPIR_NATIVE_HALF: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v2i16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v2i16 +// NATIVE_HALF: ret i1 %hlsl.all +bool test_all_uint16_t2(uint16_t2 p0) { return all(p0); } +// DXIL_NATIVE_HALF: define noundef i1 @ +// SPIR_NATIVE_HALF: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v3i16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v3i16 +// NATIVE_HALF: ret i1 %hlsl.all +bool test_all_uint16_t3(uint16_t3 p0) { return all(p0); } +// DXIL_NATIVE_HALF: define noundef i1 @ +// SPIR_NATIVE_HALF: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v4i16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v4i16 +// NATIVE_HALF: ret i1 %hlsl.all +bool test_all_uint16_t4(uint16_t4 p0) { return all(p0); } +#endif // __HLSL_ENABLE_16_BIT + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.f16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.f16 +// DXIL_NO_HALF: %hlsl.all = call i1 @llvm.dx.all.f32 +// SPIR_NO_HALF: %hlsl.all = call i1 @llvm.spv.all.f32 +// CHECK: ret i1 %hlsl.all +bool test_all_half(half p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v2f16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v2f16 +// DXIL_NO_HALF: %hlsl.all = call i1 @llvm.dx.all.v2f32 +// SPIR_NO_HALF: %hlsl.all = call i1 @llvm.spv.all.v2f32 +// CHECK: ret i1 %hlsl.all +bool test_all_half2(half2 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v3f16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v3f16 +// DXIL_NO_HALF: %hlsl.all = call i1 @llvm.dx.all.v3f32 +// SPIR_NO_HALF: %hlsl.all = call i1 @llvm.spv.all.v3f32 +// CHECK: ret i1 %hlsl.all +bool test_all_half3(half3 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_NATIVE_HALF: %hlsl.all = call i1 @llvm.dx.all.v4f16 +// SPIR_NATIVE_HALF: %hlsl.all = call i1 @llvm.spv.all.v4f16 +// DXIL_NO_HALF: %hlsl.all = call i1 @llvm.dx.all.v4f32 +// SPIR_NO_HALF: %hlsl.all = call i1 @llvm.spv.all.v4f32 +// CHECK: ret i1 %hlsl.all +bool test_all_half4(half4 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.f32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.f32 +// CHECK: ret i1 %hlsl.all +bool test_all_float(float p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2f32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2f32 +// CHECK: ret i1 %hlsl.all +bool test_all_float2(float2 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3f32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3f32 +// CHECK: ret i1 %hlsl.all +bool test_all_float3(float3 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4f32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4f32 +// CHECK: ret i1 %hlsl.all +bool test_all_float4(float4 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.f64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.f64 +// CHECK: ret i1 %hlsl.all +bool test_all_double(double p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2f64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2f64 +// CHECK: ret i1 %hlsl.all +bool test_all_double2(double2 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3f64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3f64 +// CHECK: ret i1 %hlsl.all +bool test_all_double3(double3 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4f64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4f64 +// CHECK: ret i1 %hlsl.all +bool test_all_double4(double4 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i32 +// CHECK: ret i1 %hlsl.all +bool test_all_int(int p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i32 +// CHECK: ret i1 %hlsl.all +bool test_all_int2(int2 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i32 +// CHECK: ret i1 %hlsl.all +bool test_all_int3(int3 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i32 +// CHECK: ret i1 %hlsl.all +bool test_all_int4(int4 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i32 +// CHECK: ret i1 %hlsl.all +bool test_all_uint(uint p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i32 +// CHECK: ret i1 %hlsl.all +bool test_all_uint2(uint2 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i32 +// CHECK: ret i1 %hlsl.all +bool test_all_uint3(uint3 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i32 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i32 +// CHECK: ret i1 %hlsl.all +bool test_all_uint4(uint4 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i64 +// CHECK: ret i1 %hlsl.all +bool test_all_int64_t(int64_t p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i64 +// CHECK: ret i1 %hlsl.all +bool test_all_int64_t2(int64_t2 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i64 +// CHECK: ret i1 %hlsl.all +bool test_all_int64_t3(int64_t3 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i64 +// CHECK: ret i1 %hlsl.all +bool test_all_int64_t4(int64_t4 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i64 +// CHECK: ret i1 %hlsl.all +bool test_all_uint64_t(uint64_t p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i64 +// CHECK: ret i1 %hlsl.all +bool test_all_uint64_t2(uint64_t2 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i64 +// CHECK: ret i1 %hlsl.all +bool test_all_uint64_t3(uint64_t3 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i64 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i64 +// CHECK: ret i1 %hlsl.all +bool test_all_uint64_t4(uint64_t4 p0) { return all(p0); } + +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.i1 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.i1 +// CHECK: ret i1 %hlsl.all +bool test_all_bool(bool p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v2i1 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v2i1 +// CHECK: ret i1 %hlsl.all +bool test_all_bool2(bool2 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v3i1 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v3i1 +// CHECK: ret i1 %hlsl.all +bool test_all_bool3(bool3 p0) { return all(p0); } +// DXIL_CHECK: define noundef i1 @ +// SPIR_CHECK: define spir_func noundef i1 @ +// DXIL_CHECK: %hlsl.all = call i1 @llvm.dx.all.v4i1 +// SPIR_CHECK: %hlsl.all = call i1 @llvm.spv.all.v4i1 +// CHECK: ret i1 %hlsl.all +bool test_all_bool4(bool4 p0) { return all(p0); } diff --git a/clang/test/Driver/aarch64-fixed-x-register.c b/clang/test/Driver/aarch64-fixed-x-register.c index 29024fde41254..7fc3e3e61105d 100644 --- a/clang/test/Driver/aarch64-fixed-x-register.c +++ b/clang/test/Driver/aarch64-fixed-x-register.c @@ -94,10 +94,6 @@ // RUN: FileCheck --check-prefix=CHECK-FIXED-X28 < %t %s // CHECK-FIXED-X28: "-target-feature" "+reserve-x28" -// RUN: %clang --target=aarch64-none-gnu -ffixed-x30 -### %s 2> %t -// RUN: FileCheck --check-prefix=CHECK-FIXED-X30 < %t %s -// CHECK-FIXED-X30: "-target-feature" "+reserve-x30" - // Test multiple of reserve-x# options together. // RUN: %clang --target=aarch64-none-gnu \ // RUN: -ffixed-x1 \ diff --git a/clang/test/Driver/arm-compiler-rt.c b/clang/test/Driver/arm-compiler-rt.c index 5e9e528400d08..cb6c29f48a781 100644 --- a/clang/test/Driver/arm-compiler-rt.c +++ b/clang/test/Driver/arm-compiler-rt.c @@ -10,47 +10,47 @@ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-GNUEABI -// ARM-GNUEABI: "{{.*[/\\]}}libclang_rt.builtins-arm.a" +// ARM-GNUEABI: "{{.*[/\\]}}libclang_rt.builtins.a" // RUN: %clang -target arm-linux-gnueabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -mfloat-abi=hard -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-GNUEABI-ABI -// ARM-GNUEABI-ABI: "{{.*[/\\]}}libclang_rt.builtins-armhf.a" +// ARM-GNUEABI-ABI: "{{.*[/\\]}}libclang_rt.builtins.a" // RUN: %clang -target arm-linux-gnueabihf \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-GNUEABIHF -// ARM-GNUEABIHF: "{{.*[/\\]}}libclang_rt.builtins-armhf.a" +// ARM-GNUEABIHF: "{{.*[/\\]}}libclang_rt.builtins.a" // RUN: %clang -target arm-linux-gnueabihf \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -mfloat-abi=soft -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-GNUEABIHF-ABI -// ARM-GNUEABIHF-ABI: "{{.*[/\\]}}libclang_rt.builtins-arm.a" +// ARM-GNUEABIHF-ABI: "{{.*[/\\]}}libclang_rt.builtins.a" // RUN: %clang -target arm-windows-itanium \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-WINDOWS -// ARM-WINDOWS: "{{.*[/\\]}}clang_rt.builtins-arm.lib" +// ARM-WINDOWS: "{{.*[/\\]}}clang_rt.builtins.lib" // RUN: %clang -target arm-linux-androideabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-ANDROID -// ARM-ANDROID: "{{.*[/\\]}}libclang_rt.builtins-arm-android.a" +// ARM-ANDROID: "{{.*[/\\]}}libclang_rt.builtins.a" // RUN: not %clang --target=arm-linux-androideabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -rtlib=compiler-rt -mfloat-abi=hard -### %s 2>&1 \ // RUN: | FileCheck %s -check-prefix ARM-ANDROIDHF -// ARM-ANDROIDHF: "{{.*[/\\]}}libclang_rt.builtins-armhf-android.a" +// ARM-ANDROIDHF: "{{.*[/\\]}}libclang_rt.builtins.a" diff --git a/clang/test/Driver/cl-link.c b/clang/test/Driver/cl-link.c index 444f0c01b3f99..ffd0b5ac4bade 100644 --- a/clang/test/Driver/cl-link.c +++ b/clang/test/Driver/cl-link.c @@ -13,20 +13,20 @@ // ASAN: link.exe // ASAN: "-debug" // ASAN: "-incremental:no" -// ASAN: "{{[^"]*}}clang_rt.asan-i386.lib" -// ASAN: "-wholearchive:{{.*}}clang_rt.asan-i386.lib" -// ASAN: "{{[^"]*}}clang_rt.asan_cxx-i386.lib" -// ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx-i386.lib" +// ASAN: "{{[^"]*}}clang_rt.asan.lib" +// ASAN: "-wholearchive:{{.*}}clang_rt.asan.lib" +// ASAN: "{{[^"]*}}clang_rt.asan_cxx.lib" +// ASAN: "-wholearchive:{{.*}}clang_rt.asan_cxx.lib" // ASAN: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl -m32 -arch:IA32 --target=i386-pc-win32 /MD /Tc%s -fuse-ld=link -### -fsanitize=address 2>&1 | FileCheck --check-prefix=ASAN-MD %s // ASAN-MD: link.exe // ASAN-MD: "-debug" // ASAN-MD: "-incremental:no" -// ASAN-MD: "{{.*}}clang_rt.asan_dynamic-i386.lib" -// ASAN-MD: "{{[^"]*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib" +// ASAN-MD: "{{.*}}clang_rt.asan_dynamic.lib" +// ASAN-MD: "{{[^"]*}}clang_rt.asan_dynamic_runtime_thunk.lib" // ASAN-MD: "-include:___asan_seh_interceptor" -// ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib" +// ASAN-MD: "-wholearchive:{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" // ASAN-MD: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl /LD -fuse-ld=link -### /Tc%s 2>&1 | FileCheck --check-prefix=DLL %s @@ -40,7 +40,7 @@ // ASAN-DLL: "-dll" // ASAN-DLL: "-debug" // ASAN-DLL: "-incremental:no" -// ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk-i386.lib" +// ASAN-DLL: "{{.*}}clang_rt.asan_dll_thunk.lib" // ASAN-DLL: "{{.*}}cl-link{{.*}}.obj" // RUN: %clang_cl /Zi /Tc%s -fuse-ld=link -### 2>&1 | FileCheck --check-prefix=DEBUG %s diff --git a/clang/test/Driver/compiler-rt-unwind.c b/clang/test/Driver/compiler-rt-unwind.c index 7f4e3f22ab19a..c5040d7fd900b 100644 --- a/clang/test/Driver/compiler-rt-unwind.c +++ b/clang/test/Driver/compiler-rt-unwind.c @@ -98,14 +98,14 @@ // RUN: --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \ // RUN: -shared-libgcc \ // RUN: | FileCheck --check-prefix=MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT %s -// MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a" +// MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins.a" // MINGW-RTLIB-COMPILER-RT-SHARED-UNWINDLIB-COMPILER-RT-SAME: "-l:libunwind.dll.a" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \ // RUN: -static-libgcc \ // RUN: | FileCheck --check-prefix=MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT %s -// MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a" +// MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins.a" // MINGW-RTLIB-COMPILER-RT-STATIC-UNWINDLIB-COMPILER-RT-SAME: "-l:libunwind.a" // // RUN: %clang -### %s 2>&1 \ @@ -114,5 +114,5 @@ // RUN: %clangxx -### %s 2>&1 \ // RUN: --target=x86_64-w64-mingw32 -rtlib=compiler-rt --unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT %s -// MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins-x86_64.a" +// MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT: "{{.*}}libclang_rt.builtins.a" // MINGW-RTLIB-COMPILER-RT-UNWINDLIB-COMPILER-RT-SAME: "-lunwind" diff --git a/clang/test/Driver/coverage-ld.c b/clang/test/Driver/coverage-ld.c index acb08eb5db59a..be1d8320ab8be 100644 --- a/clang/test/Driver/coverage-ld.c +++ b/clang/test/Driver/coverage-ld.c @@ -33,7 +33,7 @@ // RUN: | FileCheck --check-prefix=CHECK-FREEBSD-X86-64 %s // // CHECK-FREEBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-freebsd{{/|\\\\}}libclang_rt.profile.a" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-netbsd --coverage -fuse-ld=ld \ @@ -42,7 +42,7 @@ // RUN: | FileCheck --check-prefix=CHECK-NETBSD-X86-64 %s // CHECK-NETBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-NETBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}netbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-NETBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-netbsd{{/|\\\\}}libclang_rt.profile.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-openbsd --coverage -fuse-ld=ld \ @@ -51,7 +51,7 @@ // RUN: | FileCheck --check-prefix=CHECK-OPENBSD-X86-64 %s // CHECK-OPENBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-OPENBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}openbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-OPENBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-openbsd{{/|\\\\}}libclang_rt.profile.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=arm-linux-androideabi --coverage -fuse-ld=ld \ @@ -60,4 +60,4 @@ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-ARM %s // // CHECK-ANDROID-ARM: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" -// CHECK-ANDROID-ARM: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.profile-arm-android.a" +// CHECK-ANDROID-ARM: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}arm-unknown-linux-android{{/|\\\\}}libclang_rt.profile.a" diff --git a/clang/test/Driver/cuda-external-tools.cu b/clang/test/Driver/cuda-external-tools.cu index 5c82f2bf70aff..bd5ea5d28d760 100644 --- a/clang/test/Driver/cuda-external-tools.cu +++ b/clang/test/Driver/cuda-external-tools.cu @@ -86,6 +86,12 @@ // RUN: -Xcuda-fatbinary -bar1 -Xcuda-ptxas -foo2 -Xcuda-fatbinary -bar2 %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHECK,SM35,PTXAS-EXTRA,FATBINARY-EXTRA %s +// Check -Xcuda-ptxas with clang-cl +// RUN: %clang_cl -### -c -Xcuda-ptxas -foo1 \ +// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ +// RUN: -Xcuda-ptxas -foo2 -- %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHECK,SM35,PTXAS-EXTRA %s + // MacOS spot-checks // RUN: %clang -### --target=x86_64-apple-macosx -O0 -c %s 2>&1 \ // RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ @@ -140,6 +146,8 @@ // CHECK-SAME: "[[PTXFILE]]" // PTXAS-EXTRA-SAME: "-foo1" // PTXAS-EXTRA-SAME: "-foo2" +// CHECK-NOT: "-foo1" +// CHECK-NOT: "-foo2" // RDC-SAME: "-c" // CHECK-NOT: "-c" diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 1671825042c32..571f79a6e7f70 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -35,20 +35,20 @@ // RUN: %clang --target=%itanium_abi_triple -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER -implicit-check-not="-fsanitize-address-use-after-scope" // CHECK-INTEGER: "-fsanitize={{((signed-integer-overflow|unsigned-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change|unsigned-shift-base),?){9}"}} -// RUN: %clang -fsanitize=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-conversion -fsanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-conversion -fno-sanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-NORECOVER -// RUN: %clang -fsanitize=implicit-conversion -fsanitize-trap=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-TRAP -// CHECK-implicit-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? -// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// RUN: %clang -fsanitize=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fno-sanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-NORECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-trap=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-TRAP +// CHECK-implicit-integer-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change -fsanitize-recover=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER diff --git a/clang/test/Driver/instrprof-ld.c b/clang/test/Driver/instrprof-ld.c index 674580b349d42..a96bba4a1e763 100644 --- a/clang/test/Driver/instrprof-ld.c +++ b/clang/test/Driver/instrprof-ld.c @@ -34,7 +34,7 @@ // RUN: | FileCheck --check-prefix=CHECK-FREEBSD-X86-64 %s // // CHECK-FREEBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-FREEBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-freebsd{{/|\\\\}}libclang_rt.profile.a" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-netbsd -fprofile-instr-generate -fuse-ld=ld \ @@ -43,7 +43,7 @@ // RUN: | FileCheck --check-prefix=CHECK-NETBSD-X86-64 %s // CHECK-NETBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-NETBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}netbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-NETBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-netbsd{{/|\\\\}}libclang_rt.profile.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-openbsd -fprofile-instr-generate -fuse-ld=ld \ @@ -52,7 +52,7 @@ // RUN: | FileCheck --check-prefix=CHECK-OPENBSD-X86-64 %s // CHECK-OPENBSD-X86-64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-OPENBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}openbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-OPENBSD-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-openbsd{{/|\\\\}}libclang_rt.profile.a" // RUN: %clang -### %s 2>&1 \ // RUN: -shared \ @@ -72,7 +72,7 @@ // RUN: | FileCheck --check-prefix=CHECK-LINUX-X86-64-SHARED %s // // CHECK-LINUX-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-LINUX-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{.*}}linux{{.*}}libclang_rt.profile.a" {{.*}} "-lc" +// CHECK-LINUX-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{.*}}x86_64-unknown-linux{{.*}}libclang_rt.profile.a" {{.*}} "-lc" // // RUN: %clang -### %s 2>&1 \ // RUN: -shared \ @@ -82,7 +82,7 @@ // RUN: | FileCheck --check-prefix=CHECK-FREEBSD-X86-64-SHARED %s // // CHECK-FREEBSD-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-FREEBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}freebsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-FREEBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-freebsd{{/|\\\\}}libclang_rt.profile.a" // // RUN: %clang -### %s 2>&1 \ // RUN: -shared \ @@ -92,7 +92,7 @@ // RUN: | FileCheck --check-prefix=CHECK-NETBSD-X86-64-SHARED %s // CHECK-NETBSD-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-NETBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}netbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-NETBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-netbsd{{/|\\\\}}libclang_rt.profile.a" // RUN: %clang -### %s 2>&1 \ // RUN: -shared \ @@ -102,7 +102,7 @@ // RUN: | FileCheck --check-prefix=CHECK-OPENBSD-X86-64-SHARED %s // CHECK-OPENBSD-X86-64-SHARED: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-OPENBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}openbsd{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-OPENBSD-X86-64-SHARED: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-openbsd{{/|\\\\}}libclang_rt.profile.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-apple-darwin14 -fprofile-instr-generate -fuse-ld=ld \ @@ -174,7 +174,7 @@ // RUN: | FileCheck --check-prefix=CHECK-MINGW-X86-64 %s // // CHECK-MINGW-X86-64: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}" -// CHECK-MINGW-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}windows{{/|\\\\}}libclang_rt.profile-x86_64.a" +// CHECK-MINGW-X86-64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}x86_64-unknown-windows-gnu{{/|\\\\}}libclang_rt.profile.a" // Test instrumented profiling dependent-lib flags // diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c index 4020b138dc8fd..e5c5563673858 100644 --- a/clang/test/Driver/linux-ld.c +++ b/clang/test/Driver/linux-ld.c @@ -99,9 +99,9 @@ // CHECK-LD-RT-ANDROID: "--eh-frame-hdr" // CHECK-LD-RT-ANDROID: "-m" "armelf_linux_eabi" // CHECK-LD-RT-ANDROID: "-dynamic-linker" -// CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a" +// CHECK-LD-RT-ANDROID: libclang_rt.builtins.a" // CHECK-LD-RT-ANDROID: "-lc" -// CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a" +// CHECK-LD-RT-ANDROID: libclang_rt.builtins.a" // // RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \ @@ -264,7 +264,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-CLANG-ANDROID-STATIC %s // CHECK-CLANG-ANDROID-STATIC: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-CLANG-ANDROID-STATIC: "--start-group" "{{[^"]*}}{{/|\\\\}}libclang_rt.builtins-aarch64-android.a" "-l:libunwind.a" "-lc" "--end-group" +// CHECK-CLANG-ANDROID-STATIC: "--start-group" "{{[^"]*}}{{/|\\\\}}libclang_rt.builtins.a" "-l:libunwind.a" "-lc" "--end-group" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=x86_64-unknown-linux -rtlib=platform --unwindlib=platform \ diff --git a/clang/test/Driver/mingw-sanitizers.c b/clang/test/Driver/mingw-sanitizers.c index d165648a8fdf6..2325f8f0f1f23 100644 --- a/clang/test/Driver/mingw-sanitizers.c +++ b/clang/test/Driver/mingw-sanitizers.c @@ -4,17 +4,17 @@ // // ASAN-ALL-NOT:"-l{{[^"]+"]}}" // ASAN-ALL-NOT:"[[INPUT]]" -// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic-i386.dll.a" -// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic-x86_64.dll.a" +// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic.dll.a" +// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic.dll.a" // ASAN-ALL: "-lcomponent" // ASAN-ALL: "[[INPUT]]" -// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic-i386.dll.a" -// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk-i386.a" +// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic.dll.a" +// ASAN-I686: "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk.a" // ASAN-I686: "--require-defined" "___asan_seh_interceptor" -// ASAN-I686: "--whole-archive" "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk-i386.a" "--no-whole-archive" -// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic-x86_64.dll.a" -// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk-x86_64.a" +// ASAN-I686: "--whole-archive" "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk.a" "--no-whole-archive" +// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic.dll.a" +// ASAN-X86_64: "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk.a" // ASAN-X86_64: "--require-defined" "__asan_seh_interceptor" -// ASAN-X86_64: "--whole-archive" "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk-x86_64.a" "--no-whole-archive" +// ASAN-X86_64: "--whole-archive" "{{[^"]*}}libclang_rt.asan_dynamic_runtime_thunk.a" "--no-whole-archive" // RUN: %clang -target x86_64-windows-gnu %s -### -fsanitize=vptr diff --git a/clang/test/Driver/msp430-toolchain.c b/clang/test/Driver/msp430-toolchain.c index ef6780c38f2ee..3c3042b482ef2 100644 --- a/clang/test/Driver/msp430-toolchain.c +++ b/clang/test/Driver/msp430-toolchain.c @@ -103,8 +103,8 @@ // LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o" // LIBS-COMPILER-RT-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430" // LIBS-COMPILER-RT-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430" -// LIBS-COMPILER-RT-POS: "{{[^"]*}}libclang_rt.builtins-msp430.a" "--start-group" "-lmul_none" "-lc" "{{[^"]*}}libclang_rt.builtins-msp430.a" "-lcrt" "-lnosys" "--end-group" "{{[^"]*}}libclang_rt.builtins-msp430.a" -// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "{{[^"]*}}libclang_rt.builtins-msp430.a" +// LIBS-COMPILER-RT-POS: "{{[^"]*}}libclang_rt.builtins.a" "--start-group" "-lmul_none" "-lc" "{{[^"]*}}libclang_rt.builtins.a" "-lcrt" "-lnosys" "--end-group" "{{[^"]*}}libclang_rt.builtins.a" +// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "{{[^"]*}}libclang_rt.builtins.a" // LIBS-COMPILER-RT-NEG-NOT: crtbegin.o // LIBS-COMPILER-RT-NEG-NOT: -lssp_nonshared // LIBS-COMPILER-RT-NEG-NOT: -lssp diff --git a/clang/test/Driver/print-libgcc-file-name-clangrt.c b/clang/test/Driver/print-libgcc-file-name-clangrt.c index ed740e0d2917d..a902eedc85209 100644 --- a/clang/test/Driver/print-libgcc-file-name-clangrt.c +++ b/clang/test/Driver/print-libgcc-file-name-clangrt.c @@ -5,14 +5,14 @@ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-X8664 %s -// CHECK-CLANGRT-X8664: libclang_rt.builtins-x86_64.a +// CHECK-CLANGRT-X8664: libclang_rt.builtins.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=i386-pc-linux \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-I386 %s -// CHECK-CLANGRT-I386: libclang_rt.builtins-i386.a +// CHECK-CLANGRT-I386: libclang_rt.builtins.a // Check whether alternate arch values map to the correct library. // @@ -27,28 +27,28 @@ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARM %s -// CHECK-CLANGRT-ARM: libclang_rt.builtins-arm.a +// CHECK-CLANGRT-ARM: libclang_rt.builtins.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=arm-linux-androideabi \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARM-ANDROID %s -// CHECK-CLANGRT-ARM-ANDROID: libclang_rt.builtins-arm-android.a +// CHECK-CLANGRT-ARM-ANDROID: libclang_rt.builtins.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=arm-linux-gnueabihf \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARMHF %s -// CHECK-CLANGRT-ARMHF: libclang_rt.builtins-armhf.a +// CHECK-CLANGRT-ARMHF: libclang_rt.builtins.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=arm-linux-gnueabi -mfloat-abi=hard \ // RUN: --sysroot=%S/Inputs/resource_dir_with_arch_subdir \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_arch_subdir 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-CLANGRT-ARM-ABI %s -// CHECK-CLANGRT-ARM-ABI: libclang_rt.builtins-armhf.a +// CHECK-CLANGRT-ARM-ABI: libclang_rt.builtins.a // RUN: %clang -rtlib=compiler-rt -print-libgcc-file-name \ // RUN: --target=armv7m-none-eabi \ diff --git a/clang/test/Driver/print-runtime-dir.c b/clang/test/Driver/print-runtime-dir.c index 550ffef1aaf6e..ac1ff7e634b81 100644 --- a/clang/test/Driver/print-runtime-dir.c +++ b/clang/test/Driver/print-runtime-dir.c @@ -1,9 +1,3 @@ -// Default directory layout -// RUN: %clang -print-runtime-dir --target=x86_64-pc-windows-msvc \ -// RUN: -resource-dir=%S/Inputs/resource_dir \ -// RUN: | FileCheck --check-prefix=PRINT-RUNTIME-DIR -DFILE=%S/Inputs/resource_dir %s -// PRINT-RUNTIME-DIR: [[FILE]]{{/|\\}}lib{{/|\\}}windows - // Per-target directory layout // RUN: %clang -print-runtime-dir --target=x86_64-pc-windows-msvc \ // RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ diff --git a/clang/test/Driver/riscv-profiles.c b/clang/test/Driver/riscv-profiles.c index 0227487015ba7..647567d4c971f 100644 --- a/clang/test/Driver/riscv-profiles.c +++ b/clang/test/Driver/riscv-profiles.c @@ -50,7 +50,7 @@ // RVA20S64: "-target-feature" "+svade" // RVA20S64: "-target-feature" "+svbare" -// RUN: %clang --target=riscv64 --target=riscv64 -### -c %s 2>&1 -march=rva22u64 \ +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva22u64 \ // RUN: | FileCheck -check-prefix=RVA22U64 %s // RVA22U64: "-target-feature" "+m" // RVA22U64: "-target-feature" "+a" @@ -76,7 +76,7 @@ // RVA22U64: "-target-feature" "+zbs" // RVA22U64: "-target-feature" "+zkt" -// RUN: %clang --target=riscv64 --target=riscv64 -### -c %s 2>&1 -march=rva22s64 \ +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva22s64 \ // RUN: | FileCheck -check-prefix=RVA22S64 %s // RVA22S64: "-target-feature" "+m" // RVA22S64: "-target-feature" "+a" @@ -111,7 +111,7 @@ // RVA22S64: "-target-feature" "+svinval" // RVA22S64: "-target-feature" "+svpbmt" -// RUN: %clang --target=riscv64 --target=riscv64 -### -c %s 2>&1 -march=rva23u64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23u64 \ // RUN: | FileCheck -check-prefix=RVA23U64 %s // RVA23U64: "-target-feature" "+m" // RVA23U64: "-target-feature" "+a" @@ -133,13 +133,13 @@ // RVA23U64: "-target-feature" "+zihintntl" // RVA23U64: "-target-feature" "+zihintpause" // RVA23U64: "-target-feature" "+zihpm" -// RVA23U64: "-target-feature" "+experimental-zimop" +// RVA23U64: "-target-feature" "+zimop" // RVA23U64: "-target-feature" "+za64rs" // RVA23U64: "-target-feature" "+zawrs" // RVA23U64: "-target-feature" "+zfa" // RVA23U64: "-target-feature" "+zfhmin" // RVA23U64: "-target-feature" "+zcb" -// RVA23U64: "-target-feature" "+experimental-zcmop" +// RVA23U64: "-target-feature" "+zcmop" // RVA23U64: "-target-feature" "+zba" // RVA23U64: "-target-feature" "+zbb" // RVA23U64: "-target-feature" "+zbs" @@ -172,13 +172,13 @@ // RVA23S64: "-target-feature" "+zihintntl" // RVA23S64: "-target-feature" "+zihintpause" // RVA23S64: "-target-feature" "+zihpm" -// RVA23S64: "-target-feature" "+experimental-zimop" +// RVA23S64: "-target-feature" "+zimop" // RVA23S64: "-target-feature" "+za64rs" // RVA23S64: "-target-feature" "+zawrs" // RVA23S64: "-target-feature" "+zfa" // RVA23S64: "-target-feature" "+zfhmin" // RVA23S64: "-target-feature" "+zcb" -// RVA23S64: "-target-feature" "+experimental-zcmop" +// RVA23S64: "-target-feature" "+zcmop" // RVA23S64: "-target-feature" "+zba" // RVA23S64: "-target-feature" "+zbb" // RVA23S64: "-target-feature" "+zbs" @@ -207,7 +207,7 @@ // RVA23S64: "-target-feature" "+svnapot" // RVA23S64: "-target-feature" "+svpbmt" -// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23u64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23u64 \ // RUN: | FileCheck -check-prefix=RVB23U64 %s // RVB23U64: "-target-feature" "+m" // RVB23U64: "-target-feature" "+a" @@ -228,12 +228,12 @@ // RVB23U64: "-target-feature" "+zihintntl" // RVB23U64: "-target-feature" "+zihintpause" // RVB23U64: "-target-feature" "+zihpm" -// RVB23U64: "-target-feature" "+experimental-zimop" +// RVB23U64: "-target-feature" "+zimop" // RVB23U64: "-target-feature" "+za64rs" // RVB23U64: "-target-feature" "+zawrs" // RVB23U64: "-target-feature" "+zfa" // RVB23U64: "-target-feature" "+zcb" -// RVB23U64: "-target-feature" "+experimental-zcmop" +// RVB23U64: "-target-feature" "+zcmop" // RVB23U64: "-target-feature" "+zba" // RVB23U64: "-target-feature" "+zbb" // RVB23U64: "-target-feature" "+zbs" @@ -261,12 +261,12 @@ // RVB23S64: "-target-feature" "+zihintntl" // RVB23S64: "-target-feature" "+zihintpause" // RVB23S64: "-target-feature" "+zihpm" -// RVB23S64: "-target-feature" "+experimental-zimop" +// RVB23S64: "-target-feature" "+zimop" // RVB23S64: "-target-feature" "+za64rs" // RVB23S64: "-target-feature" "+zawrs" // RVB23S64: "-target-feature" "+zfa" // RVB23S64: "-target-feature" "+zcb" -// RVB23S64: "-target-feature" "+experimental-zcmop" +// RVB23S64: "-target-feature" "+zcmop" // RVB23S64: "-target-feature" "+zba" // RVB23S64: "-target-feature" "+zbb" // RVB23S64: "-target-feature" "+zbs" @@ -284,7 +284,7 @@ // RVB23S64: "-target-feature" "+svnapot" // RVB23S64: "-target-feature" "+svpbmt" -// RUN: %clang --target=riscv32 -### -c %s 2>&1 -march=rvm23u32 -menable-experimental-extensions \ +// RUN: %clang --target=riscv32 -### -c %s 2>&1 -march=rvm23u32 \ // RUN: | FileCheck -check-prefix=RVM23U32 %s // RVM23U32: "-target-feature" "+m" // RVM23U32: "-target-feature" "+zicbop" @@ -292,9 +292,9 @@ // RVM23U32: "-target-feature" "+zicsr" // RVM23U32: "-target-feature" "+zihintntl" // RVM23U32: "-target-feature" "+zihintpause" -// RVM23U32: "-target-feature" "+experimental-zimop" +// RVM23U32: "-target-feature" "+zimop" // RVM23U32: "-target-feature" "+zce" -// RVM23U32: "-target-feature" "+experimental-zcmop" +// RVM23U32: "-target-feature" "+zcmop" // RVM23U32: "-target-feature" "+zba" // RVM23U32: "-target-feature" "+zbb" // RVM23U32: "-target-feature" "+zbs" diff --git a/clang/test/Driver/riscv32-toolchain-extra.c b/clang/test/Driver/riscv32-toolchain-extra.c index 2d38aa3b545fe..aab6b36f3cfca 100644 --- a/clang/test/Driver/riscv32-toolchain-extra.c +++ b/clang/test/Driver/riscv32-toolchain-extra.c @@ -29,8 +29,8 @@ // C-RV32-BAREMETAL-ILP32-NOGCC: "-internal-isystem" "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/include" // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/bin/riscv32-unknown-elf-ld" // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/lib/crt0.o" -// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/lib/clang_rt.crtbegin-riscv32.o" +// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/riscv32-unknown-unknown-elf/clang_rt.crtbegin.o" // C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/bin/../riscv32-unknown-elf/lib" // C-RV32-BAREMETAL-ILP32-NOGCC: "--start-group" "-lc" "-lgloss" "--end-group" -// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/lib/libclang_rt.builtins-riscv32.a" -// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/lib/clang_rt.crtend-riscv32.o" +// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/riscv32-unknown-unknown-elf/libclang_rt.builtins.a" +// C-RV32-BAREMETAL-ILP32-NOGCC: "{{.*}}/riscv32-nogcc/{{.*}}/riscv32-unknown-unknown-elf/clang_rt.crtend.o" diff --git a/clang/test/Driver/riscv32-toolchain.c b/clang/test/Driver/riscv32-toolchain.c index bb2533cdf1bce..322a6ca2840fb 100644 --- a/clang/test/Driver/riscv32-toolchain.c +++ b/clang/test/Driver/riscv32-toolchain.c @@ -195,9 +195,9 @@ // RUN: --target=riscv32-unknown-elf --rtlib=compiler-rt --unwindlib=compiler-rt 2>&1 \ // RUN: | FileCheck -check-prefix=C-RV32-RTLIB-COMPILERRT-ILP32 %s // C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}crt0.o" -// C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtbegin-riscv32.o" -// C-RV32-RTLIB-COMPILERRT-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins-riscv32.a" -// C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtend-riscv32.o" +// C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtbegin.o" +// C-RV32-RTLIB-COMPILERRT-ILP32: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins.a" +// C-RV32-RTLIB-COMPILERRT-ILP32: "{{.*}}clang_rt.crtend.o" // RUN: %clang -### %s --target=riscv32 \ // RUN: --gcc-toolchain=%S/Inputs/basic_riscv32_tree --sysroot= \ diff --git a/clang/test/Driver/riscv64-toolchain-extra.c b/clang/test/Driver/riscv64-toolchain-extra.c index a6ec9b16cc5ca..d8d9b58441676 100644 --- a/clang/test/Driver/riscv64-toolchain-extra.c +++ b/clang/test/Driver/riscv64-toolchain-extra.c @@ -29,8 +29,8 @@ // C-RV64-BAREMETAL-LP64-NOGCC: "-internal-isystem" "{{.*}}/riscv64-nogcc/bin/../riscv64-unknown-elf/include" // C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/bin/riscv64-unknown-elf-ld" // C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/bin/../riscv64-unknown-elf/lib/crt0.o" -// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/lib/clang_rt.crtbegin-riscv64.o" +// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/riscv64-unknown-unknown-elf/clang_rt.crtbegin.o" // C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/bin/../riscv64-unknown-elf/lib" // C-RV64-BAREMETAL-LP64-NOGCC: "--start-group" "-lc" "-lgloss" "--end-group" -// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/lib/libclang_rt.builtins-riscv64.a" -// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/lib/clang_rt.crtend-riscv64.o" +// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/riscv64-unknown-unknown-elf/libclang_rt.builtins.a" +// C-RV64-BAREMETAL-LP64-NOGCC: "{{.*}}/riscv64-nogcc/{{.*}}/riscv64-unknown-unknown-elf/clang_rt.crtend.o" diff --git a/clang/test/Driver/riscv64-toolchain.c b/clang/test/Driver/riscv64-toolchain.c index 381ee58c470c5..b3216de307540 100644 --- a/clang/test/Driver/riscv64-toolchain.c +++ b/clang/test/Driver/riscv64-toolchain.c @@ -151,9 +151,9 @@ // RUN: --target=riscv64-unknown-elf --rtlib=compiler-rt --unwindlib=compiler-rt 2>&1 \ // RUN: | FileCheck -check-prefix=C-RV64-RTLIB-COMPILERRT-LP64 %s // C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}crt0.o" -// C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtbegin-riscv64.o" -// C-RV64-RTLIB-COMPILERRT-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins-riscv64.a" -// C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtend-riscv64.o" +// C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtbegin.o" +// C-RV64-RTLIB-COMPILERRT-LP64: "--start-group" "-lc" "-lgloss" "--end-group" "{{.*}}libclang_rt.builtins.a" +// C-RV64-RTLIB-COMPILERRT-LP64: "{{.*}}clang_rt.crtend.o" // RUN: %clang -### %s --target=riscv64 \ // RUN: --gcc-toolchain=%S/Inputs/basic_riscv64_tree --sysroot= \ diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c index 53e536d772924..1d52fc1260959 100644 --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -111,7 +111,7 @@ // CHECK-ASAN-FREEBSD: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-ASAN-FREEBSD-NOT: "-lc" // CHECK-ASAN-FREEBSD-NOT: libclang_rt.asan_cxx -// CHECK-ASAN-FREEBSD: freebsd{{/|\\+}}libclang_rt.asan-i386.a" +// CHECK-ASAN-FREEBSD: freebsd{{/|\\+}}libclang_rt.asan.a" // CHECK-ASAN-FREEBSD-NOT: libclang_rt.asan_cxx // CHECK-ASAN-FREEBSD-NOT: "--dynamic-list" // CHECK-ASAN-FREEBSD: "--export-dynamic" @@ -135,8 +135,8 @@ // // CHECK-ASAN-LINUX-CXX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-ASAN-LINUX-CXX-NOT: "-lc" -// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan-i386.a" "--no-whole-archive" -// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan_cxx-i386.a" "--no-whole-archive" +// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan.a" "--no-whole-archive" +// CHECK-ASAN-LINUX-CXX: "--whole-archive" "{{.*}}libclang_rt.asan_cxx.a" "--no-whole-archive" // CHECK-ASAN-LINUX-CXX-NOT: "--dynamic-list" // CHECK-ASAN-LINUX-CXX: "--export-dynamic" // CHECK-ASAN-LINUX-CXX: stdc++ @@ -163,7 +163,7 @@ // // CHECK-ASAN-ARM: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-ASAN-ARM-NOT: "-lc" -// CHECK-ASAN-ARM: libclang_rt.asan-arm.a" +// CHECK-ASAN-ARM: libclang_rt.asan.a" // // RUN: %clang -### %s 2>&1 \ // RUN: --target=armv7l-linux-gnueabi -fuse-ld=ld -fsanitize=address \ @@ -172,7 +172,7 @@ // // CHECK-ASAN-ARMv7: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}" // CHECK-ASAN-ARMv7-NOT: "-lc" -// CHECK-ASAN-ARMv7: libclang_rt.asan-arm.a" +// CHECK-ASAN-ARMv7: libclang_rt.asan.a" // RUN: %clang -### %s 2>&1 \ // RUN: --target=arm-linux-androideabi -fuse-ld=ld -fsanitize=address \ @@ -184,7 +184,7 @@ // CHECK-ASAN-ANDROID-NOT: "-lc" // CHECK-ASAN-ANDROID-NOT: "-lpthread" // CHECK-ASAN-ANDROID-NOT: "-lresolv" -// CHECK-ASAN-ANDROID: libclang_rt.asan-arm-android.so" +// CHECK-ASAN-ANDROID: libclang_rt.asan.so" // CHECK-ASAN-ANDROID-NOT: "-lpthread" // CHECK-ASAN-ANDROID-NOT: "-lresolv" @@ -195,7 +195,7 @@ // RUN: | FileCheck --check-prefix=CHECK-ASAN-ANDROID-STATICLIBASAN %s // // CHECK-ASAN-ANDROID-STATICLIBASAN: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" -// CHECK-ASAN-ANDROID-STATICLIBASAN: libclang_rt.asan-arm-android.a" +// CHECK-ASAN-ANDROID-STATICLIBASAN: libclang_rt.asan.a" // CHECK-ASAN-ANDROID-STATICLIBASAN-NOT: "-lpthread" // CHECK-ASAN-ANDROID-STATICLIBASAN-NOT: "-lrt" // CHECK-ASAN-ANDROID-STATICLIBASAN-NOT: "-lresolv" @@ -210,7 +210,7 @@ // CHECK-UBSAN-ANDROID-NOT: "-lc" // CHECK-UBSAN-ANDROID-NOT: "-lpthread" // CHECK-UBSAN-ANDROID-NOT: "-lresolv" -// CHECK-UBSAN-ANDROID: libclang_rt.ubsan_standalone-arm-android.so" +// CHECK-UBSAN-ANDROID: libclang_rt.ubsan_standalone.so" // CHECK-UBSAN-ANDROID-NOT: "-lpthread" // CHECK-UBSAN-ANDROID-NOT: "-lresolv" @@ -221,7 +221,7 @@ // RUN: | FileCheck --check-prefix=CHECK-UBSAN-ANDROID-STATICLIBASAN %s // // CHECK-UBSAN-ANDROID-STATICLIBASAN: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" -// CHECK-UBSAN-ANDROID-STATICLIBASAN: libclang_rt.ubsan_standalone-arm-android.a" +// CHECK-UBSAN-ANDROID-STATICLIBASAN: libclang_rt.ubsan_standalone.a" // CHECK-UBSAN-ANDROID-STATICLIBASAN-NOT: "-lpthread" // CHECK-UBSAN-ANDROID-STATICLIBASAN-NOT: "-lrt" // CHECK-UBSAN-ANDROID-STATICLIBASAN-NOT: "-lresolv" @@ -237,7 +237,7 @@ // CHECK-ASAN-ANDROID-X86-NOT: "-lc" // CHECK-ASAN-ANDROID-X86-NOT: "-lpthread" // CHECK-ASAN-ANDROID-X86-NOT: "-lresolv" -// CHECK-ASAN-ANDROID-X86: libclang_rt.asan-i686-android.so" +// CHECK-ASAN-ANDROID-X86: libclang_rt.asan.so" // CHECK-ASAN-ANDROID-X86-NOT: "-lpthread" // CHECK-ASAN-ANDROID-X86-NOT: "-lresolv" // @@ -257,7 +257,7 @@ // // CHECK-ASAN-ANDROID-SHARED: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" // CHECK-ASAN-ANDROID-SHARED-NOT: "-lc" -// CHECK-ASAN-ANDROID-SHARED: libclang_rt.asan-arm-android.so" +// CHECK-ASAN-ANDROID-SHARED: libclang_rt.asan.so" // CHECK-ASAN-ANDROID-SHARED-NOT: "-lpthread" // CHECK-ASAN-ANDROID-SHARED-NOT: "-lresolv" @@ -347,7 +347,7 @@ // CHECK-UBSAN-LINUX: "{{.*}}ld{{(.exe)?}}" // CHECK-UBSAN-LINUX-NOT: libclang_rt.asan // CHECK-UBSAN-LINUX-NOT: libclang_rt.ubsan_standalone_cxx -// CHECK-UBSAN-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone-x32.a" "--no-whole-archive" +// CHECK-UBSAN-LINUX: "--whole-archive" "{{.*}}libclang_rt.ubsan_standalone.a" "--no-whole-archive" // CHECK-UBSAN-LINUX-NOT: libclang_rt.asan // CHECK-UBSAN-LINUX-NOT: libclang_rt.ubsan_standalone_cxx // CHECK-UBSAN-LINUX-NOT: "-lstdc++" @@ -678,7 +678,7 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree \ // RUN: | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-DIAG-ANDROID %s // CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{.*}}ld{{(.exe)?}}" -// CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{[^"]*}}libclang_rt.ubsan_standalone-aarch64-android.so" +// CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "{{[^"]*}}libclang_rt.ubsan_standalone.so" // CHECK-CFI-CROSS-DSO-DIAG-ANDROID: "--export-dynamic-symbol=__cfi_check" // RUN: %clangxx -fsanitize=address -### %s 2>&1 \ @@ -929,7 +929,7 @@ // CHECK-SCUDO-ANDROID: "-pie" // CHECK-SCUDO-ANDROID-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-NOT: "-lresolv" -// CHECK-SCUDO-ANDROID: libclang_rt.scudo_standalone-arm-android.so" +// CHECK-SCUDO-ANDROID: libclang_rt.scudo_standalone.so" // CHECK-SCUDO-ANDROID-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-NOT: "-lresolv" @@ -940,7 +940,7 @@ // RUN: | FileCheck --check-prefix=CHECK-SCUDO-ANDROID-STATIC %s // CHECK-SCUDO-ANDROID-STATIC: "{{(.*[^.0-9A-Z_a-z])?}}ld.lld{{(.exe)?}}" // CHECK-SCUDO-ANDROID-STATIC: "-pie" -// CHECK-SCUDO-ANDROID-STATIC: "--whole-archive" "{{.*}}libclang_rt.scudo_standalone-arm-android.a" "--no-whole-archive" +// CHECK-SCUDO-ANDROID-STATIC: "--whole-archive" "{{.*}}libclang_rt.scudo_standalone.a" "--no-whole-archive" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lstdc++" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lpthread" // CHECK-SCUDO-ANDROID-STATIC-NOT: "-lrt" diff --git a/clang/test/Driver/version-build-config.test b/clang/test/Driver/version-build-config.test new file mode 100644 index 0000000000000..4cedf1e63181f --- /dev/null +++ b/clang/test/Driver/version-build-config.test @@ -0,0 +1,6 @@ +# REQUIRES: asserts +# RUN: %clang --version 2>&1 | FileCheck %s + +# CHECK: clang version +# When assertions are enabled, we should have a build configuration line that reflects that +# CHECK: Build config: {{.*}}+assertions diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index 88590a3ba4c45..dabf0ac2433bb 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -17,42 +17,42 @@ // RUN: %clang -### --target=wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK %s // LINK: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // A basic C link command-line with optimization with unknown OS. // RUN: %clang -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT %s // LINK_OPT: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // A basic C link command-line with known OS. // RUN: %clang -### --target=wasm32-wasi --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN %s // LINK_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // -shared should be passed through to `wasm-ld` and include crt1-reactor.o with a known OS. // RUN: %clang -### -shared -mexec-model=reactor --target=wasm32-wasi --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN_SHARED %s // LINK_KNOWN_SHARED: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_KNOWN_SHARED: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1-reactor.o" "--entry" "_initialize" "-shared" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_KNOWN_SHARED: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1-reactor.o" "--entry" "_initialize" "-shared" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // -shared should be passed through to `wasm-ld` and include crt1-reactor.o with an unknown OS. // RUN: %clang -### -shared -mexec-model=reactor --target=wasm32-unknown-unknown --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_UNKNOWN_SHARED %s // LINK_UNKNOWN_SHARED: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_UNKNOWN_SHARED: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "-shared" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_UNKNOWN_SHARED: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "-shared" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // A basic C link command-line with optimization with known OS. // RUN: %clang -### -O2 --target=wasm32-wasi --sysroot=/foo %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_KNOWN %s // LINK_OPT_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // A basic C compile command-line with known OS. @@ -180,12 +180,12 @@ // RUN: %clang -### %s --target=wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -mexec-model=command 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-COMMAND %s // CHECK-COMMAND: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// CHECK-COMMAND: wasm-ld{{.*}}" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// CHECK-COMMAND: wasm-ld{{.*}}" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // RUN: %clang -### %s --target=wasm32-unknown-unknown --sysroot=%s/no-sysroot-there -mexec-model=reactor 2>&1 \ // RUN: | FileCheck -check-prefix=CHECK-REACTOR %s // CHECK-REACTOR: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// CHECK-REACTOR: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// CHECK-REACTOR: wasm-ld{{.*}}" "crt1-reactor.o" "--entry" "_initialize" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // -fPIC implies +mutable-globals @@ -204,7 +204,7 @@ // RUN: %clang -### -O2 --target=wasm32-wasip2 %s --sysroot /foo 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_WASIP2 %s // LINK_WASIP2: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_WASIP2: wasm-component-ld{{.*}}" "-L/foo/lib/wasm32-wasip2" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_WASIP2: wasm-component-ld{{.*}}" "-L/foo/lib/wasm32-wasip2" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // Test that on `wasm32-wasip2` the `wasm-component-ld` programs is told where // to find `wasm-ld` by default. diff --git a/clang/test/Driver/wasm-toolchain.cpp b/clang/test/Driver/wasm-toolchain.cpp index 4af011097021f..ba1c55b33edca 100644 --- a/clang/test/Driver/wasm-toolchain.cpp +++ b/clang/test/Driver/wasm-toolchain.cpp @@ -17,48 +17,48 @@ // RUN: %clangxx -### --target=wasm32-unknown-unknown --sysroot=/foo --stdlib=libc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK %s // LINK: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // RUN: %clangxx -### --target=wasm32-unknown-unknown --sysroot=/foo --stdlib=libstdc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_STDCXX %s // LINK_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // A basic C++ link command-line with optimization with unknown OS. // RUN: %clangxx -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT %s // LINK_OPT: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // RUN: %clangxx -### -O2 --target=wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libstdc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_STDCXX %s // LINK_OPT_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_OPT_STDCXX: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // A basic C++ link command-line with known OS. // RUN: %clangxx -### --target=wasm32-wasi --sysroot=/foo --stdlib=libc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN %s // LINK_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // RUN: %clangxx -### --target=wasm32-wasi --sysroot=/foo --stdlib=libstdc++ %s 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_KNOWN_STDCXX %s // LINK_KNOWN_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // A basic C++ link command-line with optimization with known OS. // RUN: %clangxx -### -O2 --target=wasm32-wasi --sysroot=/foo %s --stdlib=libc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_KNOWN %s // LINK_OPT_KNOWN: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // RUN: %clangxx -### -O2 --target=wasm32-wasi --sysroot=/foo %s --stdlib=libstdc++ 2>&1 \ // RUN: | FileCheck -check-prefix=LINK_OPT_KNOWN_STDCXX %s // LINK_OPT_KNOWN_STDCXX: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" -// LINK_OPT_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out" +// LINK_OPT_KNOWN_STDCXX: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lstdc++" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" // A basic C++ compile command-line with known OS. diff --git a/clang/test/Driver/windows-cross.c b/clang/test/Driver/windows-cross.c index 75490b992d78d..f6e831f00e13a 100644 --- a/clang/test/Driver/windows-cross.c +++ b/clang/test/Driver/windows-cross.c @@ -11,32 +11,32 @@ // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libstdc++ -o /dev/null %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-RTLIB -// CHECK-RTLIB: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" +// CHECK-RTLIB: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libc++ -o /dev/null %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-C-LIBCXX -// CHECK-C-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" +// CHECK-C-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" // RUN: %clangxx -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libc++ -o /dev/null %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-LIBCXX -// CHECK-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lc++" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" +// CHECK-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lc++" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -o shared.dll %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SHARED -// CHECK-SHARED: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" +// CHECK-SHARED: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -static -o shared.dll %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SHARED-STATIC -// CHECK-SHARED-STATIC: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" +// CHECK-SHARED-STATIC: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -o shared.dll %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-NOSTARTFILES -// CHECK-NOSTARTFILES: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib" +// CHECK-NOSTARTFILES: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -nodefaultlibs -o shared.dll %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-STANDALONE @@ -52,19 +52,19 @@ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS // CHECK-SANITIZE-ADDRESS: "-fsanitize=address" -// CHECK-SANITIZE-ADDRESS: "{{.*}}clang_rt.asan_dll_thunk-arm.lib" +// CHECK-SANITIZE-ADDRESS: "{{.*}}clang_rt.asan_dll_thunk.lib" // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -o test.exe -fsanitize=address -x c++ %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS-EXE // CHECK-SANITIZE-ADDRESS-EXE: "-fsanitize=address" -// CHECK-SANITIZE-ADDRESS-EXE: "{{.*}}clang_rt.asan_dynamic-arm.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk-arm.lib" "--undefined" "__asan_seh_interceptor" +// CHECK-SANITIZE-ADDRESS-EXE: "{{.*}}clang_rt.asan_dynamic.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" "--undefined" "__asan_seh_interceptor" // RUN: %clang -### -target i686-windows-itanium -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -o test.exe -fsanitize=address -x c++ %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS-EXE-X86 // CHECK-SANITIZE-ADDRESS-EXE-X86: "-fsanitize=address" -// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic-i386.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk-i386.lib" "--undefined" "___asan_seh_interceptor" +// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk.lib" "--undefined" "___asan_seh_interceptor" // RUN: not %clang -### --target=armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -fsanitize=tsan -x c++ %s 2>&1 \ // RUN: | FileCheck %s --check-prefix CHECK-SANITIZE-TSAN diff --git a/clang/test/Driver/zos-ld.c b/clang/test/Driver/zos-ld.c index 4d4decdd0e65b..87d169936e129 100644 --- a/clang/test/Driver/zos-ld.c +++ b/clang/test/Driver/zos-ld.c @@ -14,7 +14,7 @@ // C-LD-SAME: "-S" "//'SYS1.CSSLIB'" // C-LD-SAME: "//'CEE.SCEELIB(CELQS001)'" // C-LD-SAME: "//'CEE.SCEELIB(CELQS003)'" -// C-LD-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" +// C-LD-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" // 2. General C link for dll // RUN: %clang -### --shared --target=s390x-ibm-zos %s 2>&1 \ @@ -30,7 +30,7 @@ // C-LD-DLL-SAME: "-S" "//'SYS1.CSSLIB'" // C-LD-DLL-SAME: "//'CEE.SCEELIB(CELQS001)'" // C-LD-DLL-SAME: "//'CEE.SCEELIB(CELQS003)'" -// C-LD-DLL-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" +// C-LD-DLL-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" // 3. General C++ link for executable // RUN: %clangxx -### --target=s390x-ibm-zos %s 2>&1 \ @@ -52,7 +52,7 @@ // CXX-LD-SAME: "//'CEE.SCEELIB(CRTDQCXA)'" // CXX-LD-SAME: "//'CEE.SCEELIB(CRTDQXLA)'" // CXX-LD-SAME: "//'CEE.SCEELIB(CRTDQUNW)'" -// CXX-LD-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" +// CXX-LD-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" // 4. General C++ link for dll // RUN: %clangxx -### --shared --target=s390x-ibm-zos %s 2>&1 \ @@ -74,7 +74,7 @@ // CXX-LD-DLL-SAME: "//'CEE.SCEELIB(CRTDQCXA)'" // CXX-LD-DLL-SAME: "//'CEE.SCEELIB(CRTDQXLA)'" // CXX-LD-DLL-SAME: "//'CEE.SCEELIB(CRTDQUNW)'" -// CXX-LD-DLL-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" +// CXX-LD-DLL-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" // 5. C++ link for executable w/ -mzos-hlq-le=, -mzos-hlq-csslib= // RUN: %clangxx -### --target=s390x-ibm-zos %s 2>&1 \ @@ -97,7 +97,7 @@ // CXX-LD5-SAME: "//'AAAA.SCEELIB(CRTDQCXA)'" // CXX-LD5-SAME: "//'AAAA.SCEELIB(CRTDQXLA)'" // CXX-LD5-SAME: "//'AAAA.SCEELIB(CRTDQUNW)'" -// CXX-LD5-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" +// CXX-LD5-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" // 6. C++ link for executable w/ -mzos-hlq-clang= // RUN: %clangxx -### --target=s390x-ibm-zos %s 2>&1 \ @@ -120,4 +120,4 @@ // CXX-LD6-SAME: "//'AAAA.SCEELIB(CRTDQCXA)'" // CXX-LD6-SAME: "//'AAAA.SCEELIB(CRTDQXLA)'" // CXX-LD6-SAME: "//'AAAA.SCEELIB(CRTDQUNW)'" -// CXX-LD6-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}zos{{/|\\\\}}libclang_rt.builtins-s390x.a" +// CXX-LD6-SAME: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}s390x-ibm-zos{{/|\\\\}}libclang_rt.builtins.a" diff --git a/clang/test/Headers/Inputs/include/stdint.h b/clang/test/Headers/Inputs/include/stdint.h index 5bf26a7b67b06..67b27b8dfc7b9 100644 --- a/clang/test/Headers/Inputs/include/stdint.h +++ b/clang/test/Headers/Inputs/include/stdint.h @@ -16,4 +16,12 @@ typedef unsigned __INTPTR_TYPE__ uintptr_t; #error Every target should have __INTPTR_TYPE__ #endif +#ifdef __INTPTR_MAX__ +#define INTPTR_MAX __INTPTR_MAX__ +#endif + +#ifdef __UINTPTR_MAX__ +#define UINTPTR_MAX __UINTPTR_MAX__ +#endif + #endif /* STDINT_H */ diff --git a/clang/test/Headers/ms-intrin.cpp b/clang/test/Headers/ms-intrin.cpp index d3b6a1a278dab..cb7cd47956205 100644 --- a/clang/test/Headers/ms-intrin.cpp +++ b/clang/test/Headers/ms-intrin.cpp @@ -18,6 +18,16 @@ // RUN: -ffreestanding -fsyntax-only -Werror \ // RUN: -isystem %S/Inputs/include %s +// RUN: %clang_cc1 -triple aarch64--windows \ +// RUN: -fms-compatibility -fms-compatibility-version=17.00 \ +// RUN: -ffreestanding -fsyntax-only -Werror \ +// RUN: -isystem %S/Inputs/include %s + +// RUN: %clang_cc1 -triple arm64ec--windows \ +// RUN: -fms-compatibility -fms-compatibility-version=17.00 \ +// RUN: -ffreestanding -fsyntax-only -Werror \ +// RUN: -isystem %S/Inputs/include %s + // REQUIRES: x86-registered-target // intrin.h needs size_t, but -ffreestanding prevents us from getting it from @@ -41,7 +51,7 @@ void f() { __stosd(0, 0, 0); __stosw(0, 0, 0); -#ifdef _M_X64 +#if defined(_M_X64) && !defined(_M_ARM64EC) __movsq(0, 0, 0); __stosq(0, 0, 0); #endif @@ -49,7 +59,7 @@ void f() { int info[4]; __cpuid(info, 0); __cpuidex(info, 0, 0); -#if defined(_M_X64) || defined(_M_IX86) +#if (defined(_M_X64) && !defined(_M_ARM64EC)) || defined(_M_IX86) _xgetbv(0); #endif __halt(); diff --git a/clang/test/InstallAPI/Inputs/Foundation/Foundation.framework/Headers/Foundation.h b/clang/test/InstallAPI/Inputs/Foundation/Foundation.framework/Headers/Foundation.h new file mode 100644 index 0000000000000..e731b59ac5308 --- /dev/null +++ b/clang/test/InstallAPI/Inputs/Foundation/Foundation.framework/Headers/Foundation.h @@ -0,0 +1,19 @@ +@interface NSObject +@end + +typedef unsigned char BOOL; +#ifndef NS_AVAILABLE +#define NS_AVAILABLE(x,y) __attribute__((availability(macosx,introduced=x))) +#endif +#ifndef NS_UNAVAILABLE +#define NS_UNAVAILABLE __attribute__((unavailable)) +#endif +#ifndef NS_DEPRECATED_MAC +#define NS_DEPRECATED_MAC(x,y) __attribute__((availability(macosx,introduced=x,deprecated=y,message="" ))); +#endif + +@interface NSManagedObject +@end + +@interface NSSet +@end diff --git a/clang/test/InstallAPI/binary-attributes.test b/clang/test/InstallAPI/binary-attributes.test new file mode 100644 index 0000000000000..d97c7a14a98d7 --- /dev/null +++ b/clang/test/InstallAPI/binary-attributes.test @@ -0,0 +1,76 @@ +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: mkdir -p %t/System/Library/Frameworks +; RUN: cp -r %S/Inputs/Simple/Simple.framework %t/System/Library/Frameworks/ +; RUN: yaml2obj %S/Inputs/Simple/Simple.yaml -o %t/Simple + +; RUN: not clang-installapi -target x86_64h-apple-macos10.12 \ +; RUN: -install_name Simple -current_version 3 -compatibility_version 2 \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=ARCHITECTURE %s +; ARCHITECTURE: error: architectures do not match: 'x86_64h' (provided) vs 'x86_64' (found) + +; RUN: not clang-installapi -target x86_64-apple-macos10.12 \ +; RUN: -install_name Simple -current_version 3 -compatibility_version 2 \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=INSTALL_NAME %s +; INSTALL_NAME: error: install_name does not match: 'Simple' (provided) vs '/System/Library/Frameworks/Simple.framework/Versions/A/Simple' (found) + +; RUN: not clang-installapi -target x86_64-apple-macos10.12 \ +; RUN: -install_name /System/Library/Frameworks/Simple.framework/Versions/A/Simple \ +; RUN: -current_version 3 -compatibility_version 2 \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=CURRENT_VERSION %s +; CURRENT_VERSION: error: current_version does not match: '3' (provided) vs '1.2.3' (found) + +; RUN: not clang-installapi -target x86_64-apple-macos10.12 \ +; RUN: -install_name /System/Library/Frameworks/Simple.framework/Versions/A/Simple \ +; RUN: -current_version 1.2.3 -compatibility_version 2 \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=COMPATIBILITY_VERSION %s +; COMPATIBILITY_VERSION: error: compatibility_version does not match: '2' (provided) vs '1' (found) + +; RUN: not clang-installapi -target x86_64-apple-macos10.12 \ +; RUN: -install_name /System/Library/Frameworks/Simple.framework/Versions/A/Simple \ +; RUN: -current_version 1.2.3 -compatibility_version 1 -fapplication-extension \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=APPEXTSAFE %s +; APPEXTSAFE: error: ApplicationExtensionSafe flag does not match: 'true' (provided) vs 'false' (found) + +; RUN: not clang-installapi -target x86_64-apple-macos10.12 \ +; RUN: -install_name /System/Library/Frameworks/Simple.framework/Versions/A/Simple \ +; RUN: -current_version 1.2.3 -compatibility_version 1 -not_for_dyld_shared_cache \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=SHARED_CACHE %s +; SHARED_CACHE: error: NotForDyldSharedCache flag does not match: 'true' (provided) vs 'false' (found) + +; RUN: not clang-installapi -target x86_64-apple-macos10.12 \ +; RUN: -install_name /System/Library/Frameworks/Simple.framework/Versions/A/Simple \ +; RUN: -current_version 1.2.3 -compatibility_version 1 \ +; RUN: -allowable_client Foo -allowable_client Bar \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=ALLOWABLE %s +; ALLOWABLE: error: allowable client missing from binary file: 'Foo [ x86_64 ]' + +; RUN: not clang-installapi -target x86_64-apple-macos10.12 \ +; RUN: -install_name /System/Library/Frameworks/Simple.framework/Versions/A/Simple \ +; RUN: -current_version 1.2.3 -compatibility_version 1 -reexport_library %t/Foo.tbd \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=REEXPORT %s +; REEXPORT: error: re-exported library missing from binary file: 'Foo [ x86_64 ]' + +; RUN: not clang-installapi -target x86_64-apple-macos10.12 \ +; RUN: -install_name /System/Library/Frameworks/Simple.framework/Versions/A/Simple \ +; RUN: -current_version 1.2.3 -compatibility_version 1 -umbrella Bogus \ +; RUN: -o tmp.tbd --verify-against=%t/Simple 2>&1 | FileCheck -check-prefix=UMBRELLA %s +; UMBRELLA: error: parent umbrella missing from binary file: 'Bogus' + +;--- Foo.tbd +{ + "main_library": { + "install_names": [ + { + "name": "Foo" + } + ], + "target_info": [ + { + "min_deployment": "13.0", + "target": "arm64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} diff --git a/clang/test/InstallAPI/driver-invalid-options.test b/clang/test/InstallAPI/driver-invalid-options.test index 69f3b2d66ab8b..2b2c551fca202 100644 --- a/clang/test/InstallAPI/driver-invalid-options.test +++ b/clang/test/InstallAPI/driver-invalid-options.test @@ -7,3 +7,15 @@ // RUN: not clang-installapi -target x86_64-apple-ios-simulator %s -o tmp.tbd 2> %t // RUN: FileCheck --check-prefix INVALID_INSTALL_NAME -input-file %t %s // INVALID_INSTALL_NAME: error: no install name specified: add -install_name + +/// Check invalid verification mode. +// RUN: not clang-installapi -install_name Foo -target arm64-apple-ios13 \ +// RUN: --verify-mode=Invalid -o tmp.tbd 2> %t +// RUN: FileCheck --check-prefix INVALID_VERIFY_MODE -input-file %t %s +// INVALID_VERIFY_MODE: error: invalid value 'Invalid' in '--verify-mode=Invalid' + +/// Check that invalid sysroot is fatal. +// RUN: not clang-installapi -install_name Foo -target arm64-apple-ios13 \ +// RUN: -isysroot /no/such/path -o tmp.tbd 2> %t +// RUN: FileCheck --check-prefix INVALID_ISYSROOT -input-file %t %s +// INVALID_ISYSROOT: error: no such sysroot directory: {{.*}}no/such/path' diff --git a/clang/test/InstallAPI/extra-exclude-headers.test b/clang/test/InstallAPI/extra-exclude-headers.test index 663ca1a5d5000..addb81f5386f3 100644 --- a/clang/test/InstallAPI/extra-exclude-headers.test +++ b/clang/test/InstallAPI/extra-exclude-headers.test @@ -2,6 +2,7 @@ ; RUN: split-file %s %t ; RUN: mkdir -p %t/System/Library/Frameworks ; RUN: cp -r %S/Inputs/Simple/Simple.framework %t/System/Library/Frameworks/ +; RUN: cp -r %S/Inputs/Foundation/Foundation.framework %t/System/Library/Frameworks/ ; RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json ; RUN: yaml2obj %S/Inputs/Simple/Simple.yaml -o %t/Simple @@ -184,24 +185,3 @@ ], "version": "3" } - -;--- System/Library/Frameworks/Foundation.framework/Headers/Foundation.h -@interface NSObject -@end - -typedef unsigned char BOOL; -#ifndef NS_AVAILABLE -#define NS_AVAILABLE(x,y) __attribute__((availability(macosx,introduced=x))) -#endif -#ifndef NS_UNAVAILABLE -#define NS_UNAVAILABLE __attribute__((unavailable)) -#endif -#ifndef NS_DEPRECATED_MAC -#define NS_DEPRECATED_MAC(x,y) __attribute__((availability(macosx,introduced=x,deprecated=y,message="" ))); -#endif - -@interface NSManagedObject -@end - -@interface NSSet -@end diff --git a/clang/test/InstallAPI/forwarded-search-paths.test b/clang/test/InstallAPI/forwarded-search-paths.test new file mode 100644 index 0000000000000..dc1e9006060f4 --- /dev/null +++ b/clang/test/InstallAPI/forwarded-search-paths.test @@ -0,0 +1,34 @@ +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: sed -e "s|DSTROOT|%/t|g" %t/input.json.in > %t/input.json + +; RUN: mkdir -p %t/System/Library/Frameworks +; RUN: cp -r %S/Inputs/Foundation/Foundation.framework %t/System/Library/Frameworks/ +; RUN: cp -r %S/Inputs/Simple/Simple.framework %t/System/Library/Frameworks/ +; RUN: yaml2obj %S/Inputs/Simple/Simple.yaml -o %t/Simple +; RUN: mkdir -p %t/usr/include/after + +; RUN: clang-installapi -target x86_64-apple-macosx10.12 \ +; RUN: -install_name /System/Library/Frameworks/Simple.framework/Versions/A/Simple \ +; RUN: -current_version 1.2.3 -compatibility_version 1 -o %t/Simple.tbd \ +; RUN: -idirafter %t/usr/include/after \ +; RUN: -F %t/System/Library/Frameworks \ +; RUN: --verify-against=%t/Simple --verify-mode=ErrorsOnly \ +; RUN: %t/input.json -v 2>&1 | FileCheck %s + +; CHECK: "-idirafter" {{.*}}/usr/include/after" +; CHECK: #include "..." search starts here: +; CHECK: #include <...> search starts here: +; CHECK: usr/include/after +; CHECK-NEXT: End of search list. + +;--- input.json.in +{ + "version" : "3", + "headers" : [ + { + "type" : "public", + "path" : "DSTROOT/System/Library/Frameworks/Simple.framework/Headers/Basic.h" + } + ] +} diff --git a/clang/test/InstallAPI/reexported-frameworks.test b/clang/test/InstallAPI/reexported-frameworks.test new file mode 100644 index 0000000000000..41c4f539c0b1f --- /dev/null +++ b/clang/test/InstallAPI/reexported-frameworks.test @@ -0,0 +1,638 @@ +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: sed -e "s|DSTROOT|%/t|g" %t/inputs.json.in > %t/inputs.json + +; RUN: yaml2obj %t/Umbrella.yaml -o %t/Umbrella +; RUN: mkdir -p %t/System/Library/Frameworks/Bar.framework +; RUN: yaml2obj %t/Bar.yaml -o %t/System/Library/Frameworks/Bar.framework/Bar + +; RUN: clang-installapi -target x86_64-apple-macosx13 -install_name \ +; RUN: /System/Library/Frameworks/Umbrella3.framework/Versions/A/Umbrella3 \ +; RUN: -current_version 1 -compatibility_version 1 \ +; RUN: --verify-against=%t/Umbrella \ +; RUN: -F %t/System/Library/Frameworks -L %t/usr/lib \ +; RUN: %t/inputs.json --verify-mode=Pedantic \ +; RUN: -reexport_framework Foo -reexport_framework Bar -reexport-lBaz \ +; RUN: -o %t/Umbrella.tbd 2>&1 | FileCheck -allow-empty %s +; RUN: llvm-readtapi -compare %t/Umbrella.tbd %t/expected.tbd 2>&1 | FileCheck -allow-empty %s + +// Checks that one of the reexported frameworks found earlier doesn't resolve +// a missing export from a declaration. +; RUN: not clang-installapi -target x86_64-apple-macosx13 -install_name \ +; RUN: /System/Library/Frameworks/Umbrella3.framework/Versions/A/Umbrella3 \ +; RUN: -current_version 1 -compatibility_version 1 \ +; RUN: --verify-against=%t/Umbrella \ +; RUN: %t/inputs.json -F %t/BadFoo \ +; RUN: -F %t/System/Library/Frameworks -L %t/usr/lib \ +; RUN: --verify-mode=ErrorsOnly \ +; RUN: -reexport_framework Foo -reexport_framework Bar -reexport-lBaz \ +; RUN: -o %t/Umbrella.tbd 2>&1 | FileCheck %s --check-prefix MISSING_SYMBOL + +; MISSING_SYMBOL: error: declaration has external linkage, but dynamic library doesn't have symbol 'foo' +; MISSING_SYMBOL-NEXT: extern int foo(); + + +; CHECK-NOT: error +; CHECK-NOT: warning + +;--- System/Library/Frameworks/Umbrella.framework/Headers/Bar.h +extern int bar(); + +;--- System/Library/Frameworks/Umbrella.framework/Headers/Baz.h +extern int baz(); + +;--- System/Library/Frameworks/Umbrella.framework/Headers/Foo.h +extern int foo(); + +;--- System/Library/Frameworks/Umbrella.framework/Headers/Umbrella.h +#import +#import +#import + +;--- inputs.json.in +{ + "headers": [ { + "path" : "DSTROOT/System/Library/Frameworks/Umbrella.framework/Headers/Bar.h", + "type" : "public" + }, + { + "path" : "DSTROOT/System/Library/Frameworks/Umbrella.framework/Headers/Baz.h", + "type" : "public" + }, + { + "path" : "DSTROOT/System/Library/Frameworks/Umbrella.framework/Headers/Umbrella.h", + "type" : "public" + }, + { + "path" : "DSTROOT/System/Library/Frameworks/Umbrella.framework/Headers/Foo.h", + "type" : "public" + } + ], + "version": "3" +} + +;--- Umbrella.yaml +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x6 + ncmds: 18 + sizeofcmds: 1184 + flags: 0x85 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 0 + vmsize: 12288 + fileoff: 0 + filesize: 12288 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x24C0 + size: 0 + offset: 0x24C0 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x80000000 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '' + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA_CONST + vmaddr: 12288 + vmsize: 4096 + fileoff: 12288 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 16 + Sections: + - sectname: __objc_imageinfo + segname: __DATA_CONST + addr: 0x3000 + size: 8 + offset: 0x3000 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0000000040000000' + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 16384 + vmsize: 48 + fileoff: 16384 + filesize: 48 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 0 + rebase_size: 0 + bind_off: 0 + bind_size: 0 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 0 + lazy_bind_size: 0 + export_off: 0 + export_size: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 16392 + nsyms: 1 + stroff: 16408 + strsize: 24 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 0 + iundefsym: 0 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_ID_DYLIB + cmdsize: 96 + dylib: + name: 24 + timestamp: 0 + current_version: 65536 + compatibility_version: 65536 + Content: '/System/Library/Frameworks/Umbrella3.framework/Versions/A/Umbrella3' + ZeroPadBytes: 5 + - cmd: LC_UUID + cmdsize: 24 + uuid: 4C4C44AE-5555-3144-A1D3-33A5C6F7B36A + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 851968 + sdk: 983040 + ntools: 1 + Tools: + - tool: 4 + version: 1245184 + - cmd: LC_LOAD_DYLIB + cmdsize: 80 + dylib: + name: 24 + timestamp: 0 + current_version: 65536 + compatibility_version: 65536 + Content: '/System/Library/Frameworks/Foo.framework/Versions/A/Foo' + ZeroPadBytes: 1 + - cmd: LC_REEXPORT_DYLIB + cmdsize: 80 + dylib: + name: 24 + timestamp: 0 + current_version: 0 + compatibility_version: 0 + Content: '/System/Library/Frameworks/Foo.framework/Versions/A/Foo' + ZeroPadBytes: 1 + - cmd: LC_LOAD_DYLIB + cmdsize: 80 + dylib: + name: 24 + timestamp: 0 + current_version: 65536 + compatibility_version: 65536 + Content: '/System/Library/Frameworks/Bar.framework/Versions/A/Bar' + ZeroPadBytes: 1 + - cmd: LC_REEXPORT_DYLIB + cmdsize: 80 + dylib: + name: 24 + timestamp: 0 + current_version: 0 + compatibility_version: 0 + Content: '/System/Library/Frameworks/Bar.framework/Versions/A/Bar' + ZeroPadBytes: 1 + - cmd: LC_LOAD_DYLIB + cmdsize: 48 + dylib: + name: 24 + timestamp: 0 + current_version: 65536 + compatibility_version: 65536 + Content: '/usr/lib/libBaz.1.dylib' + ZeroPadBytes: 1 + - cmd: LC_REEXPORT_DYLIB + cmdsize: 48 + dylib: + name: 24 + timestamp: 0 + current_version: 0 + compatibility_version: 0 + Content: '/usr/lib/libBaz.1.dylib' + ZeroPadBytes: 1 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 0 + current_version: 88539136 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 16384 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 16392 + datasize: 0 +LinkEditData: + NameList: + - n_strx: 2 + n_type: 0x1 + n_sect: 0 + n_desc: 1024 + n_value: 0 + StringTable: + - ' ' + - dyld_stub_binder + - '' + - '' + - '' + - '' + - '' +... + +;--- System/Library/Frameworks/Foo.framework/Foo.tbd +{ + "main_library": { + "exported_symbols": [ + { + "text": { + "global": [ + "_foo" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "/System/Library/Frameworks/Foo.framework/Versions/A/Foo" + } + ], + "target_info": [ + { + "min_deployment": "13", + "target": "x86_64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} + +;--- Bar.yaml +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x6 + ncmds: 12 + sizeofcmds: 912 + flags: 0x100085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 312 + segname: __TEXT + vmaddr: 0 + vmsize: 8192 + fileoff: 0 + filesize: 8192 + maxprot: 5 + initprot: 5 + nsects: 3 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0xBB0 + size: 8 + offset: 0xBB0 + align: 4 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 554889E531C05DC3 + - sectname: __unwind_info + segname: __TEXT + addr: 0xBB8 + size: 4152 + offset: 0xBB8 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 010000001C000000010000002000000000000000200000000200000000000001B00B00003800000038000000B80B00000000000038000000030000000C0001001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + - sectname: __eh_frame + segname: __TEXT + addr: 0x1BF0 + size: 24 + offset: 0x1BF0 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x6000000B + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: 1400000000000000017A520001781001100C070890010000 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA_CONST + vmaddr: 8192 + vmsize: 4096 + fileoff: 8192 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 16 + Sections: + - sectname: __objc_imageinfo + segname: __DATA_CONST + addr: 0x2000 + size: 8 + offset: 0x2000 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0000000040000000' + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 12288 + vmsize: 80 + fileoff: 12288 + filesize: 80 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 0 + rebase_size: 0 + bind_off: 0 + bind_size: 0 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 0 + lazy_bind_size: 0 + export_off: 12288 + export_size: 16 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 12312 + nsyms: 2 + stroff: 12344 + strsize: 24 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_ID_DYLIB + cmdsize: 80 + dylib: + name: 24 + timestamp: 0 + current_version: 65536 + compatibility_version: 65536 + Content: '/System/Library/Frameworks/Bar.framework/Versions/A/Bar' + ZeroPadBytes: 1 + - cmd: LC_UUID + cmdsize: 24 + uuid: 4C4C4415-5555-3144-A11E-3C68D85CC061 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 851968 + sdk: 983040 + ntools: 1 + Tools: + - tool: 4 + version: 1245184 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 0 + current_version: 88539136 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 12304 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 12312 + datasize: 0 +LinkEditData: + ExportTrie: + TerminalSize: 0 + NodeOffset: 0 + Name: '' + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 3 + NodeOffset: 8 + Name: _bar + Flags: 0x0 + Address: 0xBB0 + Other: 0x0 + ImportName: '' + NameList: + - n_strx: 2 + n_type: 0xF + n_sect: 1 + n_desc: 0 + n_value: 2992 + - n_strx: 7 + n_type: 0x1 + n_sect: 0 + n_desc: 256 + n_value: 0 + StringTable: + - ' ' + - _bar + - dyld_stub_binder + FunctionStarts: [ 0xBB0 ] +... + +;--- usr/lib/libBaz.tbd +{ + "main_library": { + "exported_symbols": [ + { + "text": { + "global": [ + "_baz" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "/usr/lib/libBaz.1.dylib" + } + ], + "target_info": [ + { + "min_deployment": "13", + "target": "x86_64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} + +;--- BadFoo/Foo.framework/Foo.tbd +{ + "main_library": { + "exported_symbols": [ + { + "text": { + "global": [ + "_not_so_foo" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "/System/Library/Frameworks/Foo.framework/Versions/A/Foo" + } + ], + "target_info": [ + { + "min_deployment": "13", + "target": "x86_64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} + +;--- expected.tbd +{ + "main_library": { + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "/System/Library/Frameworks/Umbrella3.framework/Versions/A/Umbrella3" + } + ], + "reexported_libraries": [ + { + "names": [ + "/System/Library/Frameworks/Bar.framework/Versions/A/Bar", + "/System/Library/Frameworks/Foo.framework/Versions/A/Foo", + "/usr/lib/libBaz.1.dylib" + ] + } + ], + "target_info": [ + { + "min_deployment": "13", + "target": "x86_64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} diff --git a/clang/test/InstallAPI/rpath.test b/clang/test/InstallAPI/rpath.test new file mode 100644 index 0000000000000..083a15419abaa --- /dev/null +++ b/clang/test/InstallAPI/rpath.test @@ -0,0 +1,663 @@ +; RUN: rm -rf %t +; RUN: split-file %s %t +; RUN: yaml2obj %t/RPath.yaml -o %t/RPath + +; RUN: clang-installapi --filetype=tbd-v5 \ +; RUN: -target arm64-apple-macos13.0 -target x86_64-apple-macos13.0 \ +; RUN: -install_name @rpath/Frameworks/RPath.framework/Versions/A/RPath \ +; RUN: -current_version 1 -compatibility_version 1 \ +; RUN: --extra-public-header=%t/public.h \ +; RUN: -o %t/RPath_warnings.tbd \ +; RUN: --verify-against=%t/RPath \ +; RUN: --verify-mode=Pedantic 2>&1 | FileCheck %s --check-prefix=MISSING +; RUN: llvm-readtapi --compare %t/RPath_warnings.tbd %t/expected_no_rpaths.tbd + +; MISSING: warning: runpath search paths missing from installAPI option: '@loader_path/../../../SharedFrameworks/ [ x86_64 arm64 ]' +; MISSING: warning: runpath search paths missing from installAPI option: '@loader_path/../../PrivateFrameworks/ [ x86_64 arm64 ]' + +; RUN: clang-installapi --filetype=tbd-v5 \ +; RUN: -target arm64-apple-macos13.0 -target x86_64-apple-macos13.0 \ +; RUN: -install_name @rpath/Frameworks/RPath.framework/Versions/A/RPath \ +; RUN: -current_version 1 -compatibility_version 1 \ +; RUN: --extra-public-header=%t/public.h \ +; RUN: -Xarch_arm64 -rpath @loader_path/../../../SharedFrameworks/ \ +; RUN: -o %t/RPath_Xarch.tbd \ +; RUN: --verify-against=%t/RPath \ +; RUN: --verify-mode=Pedantic 2>&1 | FileCheck %s --check-prefix=XARCH +; RUN: llvm-readtapi --compare %t/RPath_Xarch.tbd %t/expected_xarch_rpaths.tbd + +; XARCH: warning: runpath search paths do not match: '@loader_path/../../../SharedFrameworks/ [ arm64 ]' (provided) vs '@loader_path/../../../SharedFrameworks/ [ x86_64 arm64 ]' +; XARCH: warning: runpath search paths missing from installAPI option: '@loader_path/../../PrivateFrameworks/ [ x86_64 arm64 ]' + +; RUN: clang-installapi --filetype=tbd-v5 \ +; RUN: -target arm64-apple-macos13.0 -target x86_64-apple-macos13.0 \ +; RUN: -install_name @rpath/Frameworks/RPath.framework/Versions/A/RPath \ +; RUN: -current_version 1 -compatibility_version 1 \ +; RUN: --extra-public-header=%t/public.h \ +; RUN: -rpath @loader_path/../../../SharedFrameworks/ \ +; RUN: -rpath @loader_path/../../PrivateFrameworks/ \ +; RUN: --verify-against=%t/RPath --verify-mode=Pedantic \ +; RUN: -o %t/RPath.tbd 2>&1 | FileCheck -allow-empty %s +; RUN: llvm-readtapi --compare %t/RPath.tbd %t/expected.tbd + +CHECK-NOT: error +CHECK-NOT: warning + +;--- public.h +extern int publicGlobalVariable; + +;--- expected.tbd +{ + "main_library": { + "exported_symbols": [ + { + "data": { + "global": [ + "_publicGlobalVariable" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "@rpath/Frameworks/RPath.framework/Versions/A/RPath" + } + ], + "rpaths": [ + { + "paths": [ + "@loader_path/../../../SharedFrameworks/", + "@loader_path/../../PrivateFrameworks/" + ] + } + ], + "target_info": [ + { + "min_deployment": "13.0", + "target": "x86_64-macos" + }, + { + "min_deployment": "13.0", + "target": "arm64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} + +;--- expected_no_rpaths.tbd +{ + "main_library": { + "exported_symbols": [ + { + "data": { + "global": [ + "_publicGlobalVariable" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "@rpath/Frameworks/RPath.framework/Versions/A/RPath" + } + ], + "target_info": [ + { + "min_deployment": "13.0", + "target": "x86_64-macos" + }, + { + "min_deployment": "13.0", + "target": "arm64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} + +;--- expected_xarch_rpaths.tbd +{ + "main_library": { + "exported_symbols": [ + { + "data": { + "global": [ + "_publicGlobalVariable" + ] + } + } + ], + "flags": [ + { + "attributes": [ + "not_app_extension_safe" + ] + } + ], + "install_names": [ + { + "name": "@rpath/Frameworks/RPath.framework/Versions/A/RPath" + } + ], + "rpaths": [ + { + "paths": [ + "@loader_path/../../../SharedFrameworks/" + ], + "targets": [ + "arm64-macos" + ] + } + ], + "target_info": [ + { + "min_deployment": "13.0", + "target": "x86_64-macos" + }, + { + "min_deployment": "13.0", + "target": "arm64-macos" + } + ] + }, + "tapi_tbd_version": 5 +} + +;--- RPath.yaml +--- !fat-mach-o +FatHeader: + magic: 0xCAFEBABE + nfat_arch: 2 +FatArchs: + - cputype: 0x1000007 + cpusubtype: 0x3 + offset: 0x1000 + size: 12408 + align: 12 + - cputype: 0x100000C + cpusubtype: 0x0 + offset: 0x8000 + size: 33312 + align: 14 +Slices: + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x1000007 + cpusubtype: 0x3 + filetype: 0x6 + ncmds: 16 + sizeofcmds: 1072 + flags: 0x100085 + reserved: 0x0 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 0 + vmsize: 8192 + fileoff: 0 + filesize: 8192 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x1050 + size: 0 + offset: 0x1050 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x80000000 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '' + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA_CONST + vmaddr: 8192 + vmsize: 4096 + fileoff: 8192 + filesize: 4096 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 16 + Sections: + - sectname: __objc_imageinfo + segname: __DATA_CONST + addr: 0x2000 + size: 8 + offset: 0x2000 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0000000040000000' + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA + vmaddr: 12288 + vmsize: 4096 + fileoff: 12288 + filesize: 0 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 0 + Sections: + - sectname: __common + segname: __DATA + addr: 0x3000 + size: 4 + offset: 0x0 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x1 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 16384 + vmsize: 120 + fileoff: 12288 + filesize: 120 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 0 + rebase_size: 0 + bind_off: 0 + bind_size: 0 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 0 + lazy_bind_size: 0 + export_off: 12288 + export_size: 32 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 12328 + nsyms: 2 + stroff: 12360 + strsize: 48 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_RPATH + cmdsize: 56 + path: 12 + Content: '@loader_path/../../../SharedFrameworks/' + ZeroPadBytes: 5 + - cmd: LC_RPATH + cmdsize: 56 + path: 12 + Content: '@loader_path/../../PrivateFrameworks/' + ZeroPadBytes: 7 + - cmd: LC_ID_DYLIB + cmdsize: 80 + dylib: + name: 24 + timestamp: 0 + current_version: 65536 + compatibility_version: 65536 + Content: '@rpath/Frameworks/RPath.framework/Versions/A/RPath' + ZeroPadBytes: 6 + - cmd: LC_UUID + cmdsize: 24 + uuid: 4C4C4489-5555-3144-A1D1-28C8EA66FB24 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 851968 + sdk: 983040 + ntools: 1 + Tools: + - tool: 4 + version: 1245184 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 0 + current_version: 14942208 + compatibility_version: 65536 + Content: '/usr/lib/libobjc.A.dylib' + ZeroPadBytes: 8 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 0 + current_version: 88539136 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 12320 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 12328 + datasize: 0 + LinkEditData: + ExportTrie: + TerminalSize: 0 + NodeOffset: 0 + Name: '' + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 3 + NodeOffset: 25 + Name: _publicGlobalVariable + Flags: 0x0 + Address: 0x3000 + Other: 0x0 + ImportName: '' + NameList: + - n_strx: 2 + n_type: 0xF + n_sect: 3 + n_desc: 0 + n_value: 12288 + - n_strx: 24 + n_type: 0x1 + n_sect: 0 + n_desc: 512 + n_value: 0 + StringTable: + - ' ' + - _publicGlobalVariable + - dyld_stub_binder + - '' + - '' + - '' + - '' + - '' + - '' + - '' + - !mach-o + FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x6 + ncmds: 17 + sizeofcmds: 1088 + flags: 0x100085 + reserved: 0x0 + LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 0 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x1060 + size: 0 + offset: 0x1060 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x80000000 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '' + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA_CONST + vmaddr: 16384 + vmsize: 16384 + fileoff: 16384 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 16 + Sections: + - sectname: __objc_imageinfo + segname: __DATA_CONST + addr: 0x4000 + size: 8 + offset: 0x4000 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0000000040000000' + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA + vmaddr: 32768 + vmsize: 16384 + fileoff: 32768 + filesize: 0 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 0 + Sections: + - sectname: __common + segname: __DATA + addr: 0x8000 + size: 4 + offset: 0x0 + align: 2 + reloff: 0x0 + nreloc: 0 + flags: 0x1 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 49152 + vmsize: 544 + fileoff: 32768 + filesize: 544 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_DYLD_INFO_ONLY + cmdsize: 48 + rebase_off: 0 + rebase_size: 0 + bind_off: 0 + bind_size: 0 + weak_bind_off: 0 + weak_bind_size: 0 + lazy_bind_off: 0 + lazy_bind_size: 0 + export_off: 32768 + export_size: 32 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 32808 + nsyms: 2 + stroff: 32840 + strsize: 48 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_RPATH + cmdsize: 56 + path: 12 + Content: '@loader_path/../../../SharedFrameworks/' + ZeroPadBytes: 5 + - cmd: LC_RPATH + cmdsize: 56 + path: 12 + Content: '@loader_path/../../PrivateFrameworks/' + ZeroPadBytes: 7 + - cmd: LC_ID_DYLIB + cmdsize: 80 + dylib: + name: 24 + timestamp: 0 + current_version: 65536 + compatibility_version: 65536 + Content: '@rpath/Frameworks/RPath.framework/Versions/A/RPath' + ZeroPadBytes: 6 + - cmd: LC_UUID + cmdsize: 24 + uuid: 4C4C440D-5555-3144-A18B-DB67A0A12202 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 1 + minos: 851968 + sdk: 983040 + ntools: 1 + Tools: + - tool: 4 + version: 1245184 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 0 + current_version: 14942208 + compatibility_version: 65536 + Content: '/usr/lib/libobjc.A.dylib' + ZeroPadBytes: 8 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 0 + current_version: 88539136 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 32800 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 32808 + datasize: 0 + - cmd: LC_CODE_SIGNATURE + cmdsize: 16 + dataoff: 32896 + datasize: 416 + LinkEditData: + ExportTrie: + TerminalSize: 0 + NodeOffset: 0 + Name: '' + Flags: 0x0 + Address: 0x0 + Other: 0x0 + ImportName: '' + Children: + - TerminalSize: 4 + NodeOffset: 25 + Name: _publicGlobalVariable + Flags: 0x0 + Address: 0x8000 + Other: 0x0 + ImportName: '' + NameList: + - n_strx: 2 + n_type: 0xF + n_sect: 3 + n_desc: 0 + n_value: 32768 + - n_strx: 24 + n_type: 0x1 + n_sect: 0 + n_desc: 512 + n_value: 0 + StringTable: + - ' ' + - _publicGlobalVariable + - dyld_stub_binder + - '' + - '' + - '' + - '' + - '' + - '' + - '' + FunctionStarts: [ 0x1060 ] +... diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index 9496746c6fd66..1de32498cd345 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -85,7 +85,7 @@ #error "wrong value for __cpp_char8_t" #endif -#if check(concepts, 0, 0, 0, 0, 201907, 201907, 201907) +#if check(concepts, 0, 0, 0, 0, 202002, 202002, 202002) #error "wrong value for __cpp_concepts" #endif diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c index bb3c7d816d2f0..dd73331913c6f 100644 --- a/clang/test/Misc/warning-flags.c +++ b/clang/test/Misc/warning-flags.c @@ -18,7 +18,7 @@ This test serves two purposes: The list of warnings below should NEVER grow. It should gradually shrink to 0. -CHECK: Warnings without flags (67): +CHECK: Warnings without flags (66): CHECK-NEXT: ext_expected_semi_decl_list CHECK-NEXT: ext_explicit_specialization_storage_class @@ -80,7 +80,6 @@ CHECK-NEXT: warn_register_objc_catch_parm CHECK-NEXT: warn_related_result_type_compatibility_class CHECK-NEXT: warn_related_result_type_compatibility_protocol CHECK-NEXT: warn_template_export_unsupported -CHECK-NEXT: warn_tentative_incomplete_array CHECK-NEXT: warn_typecheck_function_qualifiers CHECK-NEXT: warn_undef_interface CHECK-NEXT: warn_undef_interface_suggest diff --git a/clang/test/Modules/builtin-import.mm b/clang/test/Modules/builtin-import.mm index 8a27cb358484c..52db9c15803ce 100644 --- a/clang/test/Modules/builtin-import.mm +++ b/clang/test/Modules/builtin-import.mm @@ -1,4 +1,6 @@ // RUN: rm -rf %t +// RUN: %clang_cc1 -fsyntax-only -nobuiltininc -nostdinc++ -isysroot %S/Inputs/libc-libcxx/sysroot -isystem %S/Inputs/libc-libcxx/sysroot/usr/include/c++/v1 -isystem %S/Inputs/libc-libcxx/sysroot/usr/include -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x objective-c++ %s +// RUN: rm -rf %t // RUN: %clang_cc1 -fsyntax-only -nobuiltininc -nostdinc++ -isysroot %S/Inputs/libc-libcxx/sysroot -isystem %S/Inputs/libc-libcxx/sysroot/usr/include/c++/v1 -isystem %S/Inputs/libc-libcxx/sysroot/usr/include -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -x objective-c++ -fmodules-local-submodule-visibility %s #include diff --git a/clang/test/Modules/import-textual-noguard.mm b/clang/test/Modules/import-textual-noguard.mm index dd124b6609d00..ffc506117f519 100644 --- a/clang/test/Modules/import-textual-noguard.mm +++ b/clang/test/Modules/import-textual-noguard.mm @@ -1,7 +1,11 @@ // RUN: rm -rf %t +// RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps -I%S/Inputs/import-textual/M2 -fmodules-cache-path=%t -x objective-c++ %s -verify +// RUN: rm -rf %t // RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps -I%S/Inputs/import-textual/M2 -fmodules-cache-path=%t -x objective-c++ -fmodules-local-submodule-visibility %s -verify -#include "A/A.h" // expected-error {{could not build module 'M'}} +// expected-no-diagnostics + +#include "A/A.h" #include "B/B.h" typedef aint xxx; diff --git a/clang/test/Modules/import-textual.mm b/clang/test/Modules/import-textual.mm index 6593239d7fd70..94a6aa448cdc2 100644 --- a/clang/test/Modules/import-textual.mm +++ b/clang/test/Modules/import-textual.mm @@ -1,4 +1,6 @@ // RUN: rm -rf %t +// RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps -I%S/Inputs/import-textual/M -fmodules-cache-path=%t -x objective-c++ %s -verify +// RUN: rm -rf %t // RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps -I%S/Inputs/import-textual/M -fmodules-cache-path=%t -x objective-c++ -fmodules-local-submodule-visibility %s -verify // expected-no-diagnostics diff --git a/clang/test/Modules/multiple-import.m b/clang/test/Modules/multiple-import.m new file mode 100644 index 0000000000000..95ca7dbcd438d --- /dev/null +++ b/clang/test/Modules/multiple-import.m @@ -0,0 +1,43 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -std=c17 -fmodules-cache-path=%t/no-lsv -fmodules -fimplicit-module-maps -I%t %t/multiple-imports.m -verify +// RUN: %clang_cc1 -std=c17 -fmodules-cache-path=%t/lsv -fmodules -fimplicit-module-maps -fmodules-local-submodule-visibility -I%t %t/multiple-imports.m -verify + +//--- multiple-imports.m +// expected-no-diagnostics +#import +#import +void test(void) { + assert(0); +} + +//--- module.modulemap +module Submodules [system] { + module one { + header "one.h" + export * + } + module two { + header "two.h" + export * + } +} + +module libc [system] { + textual header "assert.h" +} + +//--- one.h +#ifndef one_h +#define one_h +#endif + +//--- two.h +#ifndef two_h +#define two_h +#include +#endif + +//--- assert.h +#undef assert +#define assert(expression) ((void)0) diff --git a/clang/test/OpenMP/nesting_of_regions.cpp b/clang/test/OpenMP/Inputs/nesting_of_regions.cpp similarity index 99% rename from clang/test/OpenMP/nesting_of_regions.cpp rename to clang/test/OpenMP/Inputs/nesting_of_regions.cpp index 9442fb20647d0..e671f9b0cf412 100644 --- a/clang/test/OpenMP/nesting_of_regions.cpp +++ b/clang/test/OpenMP/Inputs/nesting_of_regions.cpp @@ -1,15 +1,3 @@ -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -fno-openmp-extensions -verify=expected,omp45,omp45warn,omp %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fno-openmp-extensions -verify=expected,omp50,omp %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-extensions -verify=expected,omp50 %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -verify=expected,omp45,omp -fno-openmp-extensions -Wno-openmp %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -verify=expected,omp45,omp -fno-openmp-extensions -Wno-source-uses-openmp %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=51 -verify=expected,omp51,omp -fno-openmp-extensions -Wno-source-uses-openmp %s - -// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=45 -fno-openmp-extensions -verify=expected,omp45,omp45warn,omp %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify=expected,omp50,omp -fno-openmp-extensions %s -// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=51 -verify=expected,omp51,omp -fno-openmp-extensions %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} - void bar(); template @@ -19577,4 +19565,3 @@ void foo() { return foo(); } - diff --git a/clang/test/OpenMP/assumes_codegen.cpp b/clang/test/OpenMP/assumes_codegen.cpp index 6a5871c303aad..4a2518a51ec34 100644 --- a/clang/test/OpenMP/assumes_codegen.cpp +++ b/clang/test/OpenMP/assumes_codegen.cpp @@ -67,7 +67,7 @@ int lambda_outer() { } #pragma omp end assumes -// AST: __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) void foo() { +// AST: void foo() __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) { // AST-NEXT: } // AST-NEXT: class BAR { // AST-NEXT: public: @@ -81,7 +81,7 @@ int lambda_outer() { // AST-NEXT: __attribute__((assume("ompx_range_bar_only"))) __attribute__((assume("ompx_range_bar_only_2"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) void bar() { // AST-NEXT: BAR b; // AST-NEXT: } -// AST-NEXT: void baz() __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))); +// AST-NEXT: __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) void baz(); // AST-NEXT: template class BAZ { // AST-NEXT: public: // AST-NEXT: __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) BAZ() { @@ -95,8 +95,8 @@ int lambda_outer() { // AST-NEXT: public: // AST-NEXT: __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) BAZ() { // AST-NEXT: } -// AST-NEXT: void baz1() __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))); -// AST-NEXT: static void baz2() __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))); +// AST-NEXT: __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) void baz1(); +// AST-NEXT: __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) static void baz2(); // AST-NEXT: }; // AST-NEXT: __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines,ompx_another_warning,ompx_after_invalid_clauses"))) __attribute__((assume("omp_no_openmp"))) void baz() { // AST-NEXT: BAZ b; diff --git a/clang/test/OpenMP/assumes_print.cpp b/clang/test/OpenMP/assumes_print.cpp index a7f04edb3b1af..da3629f70408d 100644 --- a/clang/test/OpenMP/assumes_print.cpp +++ b/clang/test/OpenMP/assumes_print.cpp @@ -37,7 +37,7 @@ void baz() { } #pragma omp end assumes -// CHECK: __attribute__((assume("omp_no_openmp_routines"))) __attribute__((assume("omp_no_openmp"))) void foo() +// CHECK: void foo() __attribute__((assume("omp_no_openmp_routines"))) __attribute__((assume("omp_no_openmp"))) // CHECK: __attribute__((assume("ompx_range_bar_only"))) __attribute__((assume("ompx_range_bar_only_2"))) __attribute__((assume("omp_no_openmp_routines"))) __attribute__((assume("omp_no_openmp"))) void bar() // CHECK: __attribute__((assume("ompx_1234"))) __attribute__((assume("omp_no_openmp_routines"))) __attribute__((assume("omp_no_openmp"))) void baz() diff --git a/clang/test/OpenMP/assumes_template_print.cpp b/clang/test/OpenMP/assumes_template_print.cpp index bd1100fbefffc..e0bc3e9884ca5 100644 --- a/clang/test/OpenMP/assumes_template_print.cpp +++ b/clang/test/OpenMP/assumes_template_print.cpp @@ -17,7 +17,7 @@ template struct S { int a; // CHECK: template struct S { -// CHECK: __attribute__((assume("ompx_global_assumption"))) void foo() { +// CHECK: void foo() __attribute__((assume("ompx_global_assumption"))) { void foo() { #pragma omp parallel {} @@ -25,15 +25,15 @@ struct S { }; // CHECK: template<> struct S { -// CHECK: __attribute__((assume("ompx_global_assumption"))) void foo() { +// CHECK: void foo() __attribute__((assume("ompx_global_assumption"))) { #pragma omp begin assumes no_openmp -// CHECK: __attribute__((assume("omp_no_openmp"))) __attribute__((assume("ompx_global_assumption"))) void S_with_assumes_no_call() { +// CHECK: __attribute__((assume("omp_no_openmp"))) void S_with_assumes_no_call() __attribute__((assume("ompx_global_assumption"))) { void S_with_assumes_no_call() { S s; s.a = 0; } -// CHECK: __attribute__((assume("omp_no_openmp"))) __attribute__((assume("ompx_global_assumption"))) void S_with_assumes_call() { +// CHECK: __attribute__((assume("omp_no_openmp"))) void S_with_assumes_call() __attribute__((assume("ompx_global_assumption"))) { void S_with_assumes_call() { S s; s.a = 0; @@ -42,7 +42,7 @@ void S_with_assumes_call() { } #pragma omp end assumes -// CHECK: __attribute__((assume("ompx_global_assumption"))) void S_without_assumes() { +// CHECK: void S_without_assumes() __attribute__((assume("ompx_global_assumption"))) { void S_without_assumes() { S s; s.foo(); diff --git a/clang/test/OpenMP/declare_simd_ast_print.cpp b/clang/test/OpenMP/declare_simd_ast_print.cpp index 1adf95226c8be..565dc2dfc04d1 100644 --- a/clang/test/OpenMP/declare_simd_ast_print.cpp +++ b/clang/test/OpenMP/declare_simd_ast_print.cpp @@ -60,11 +60,11 @@ void h(int *hp, int *hp2, int *hq, int *lin) class VV { // CHECK: #pragma omp declare simd uniform(this, a) linear(val(b): a) - // CHECK-NEXT: __attribute__((cold)) int add(int a, int b) { + // CHECK-NEXT: int add(int a, int b) __attribute__((cold)) { // CHECK-NEXT: return a + b; // CHECK-NEXT: } #pragma omp declare simd uniform(this, a) linear(val(b): a) - __attribute__((cold)) int add(int a, int b) { return a + b; } + int add(int a, int b) __attribute__((cold)) { return a + b; } // CHECK: #pragma omp declare simd aligned(b: 4) aligned(a) linear(ref(b): 4) linear(val(this)) linear(val(a)) // CHECK-NEXT: float taddpf(float *a, float *&b) { diff --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp index 2372b2738b5be..dd1ac813efaaf 100644 --- a/clang/test/OpenMP/declare_target_link_codegen.cpp +++ b/clang/test/OpenMP/declare_target_link_codegen.cpp @@ -26,12 +26,12 @@ // DEVICE: @c_decl_tgt_ref_ptr = weak global ptr null // HOST: [[SIZES:@.+]] = private unnamed_addr constant [3 x i64] [i64 4, i64 4, i64 4] // HOST: [[MAPTYPES:@.+]] = private unnamed_addr constant [3 x i64] [i64 35, i64 531, i64 531] -// HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_ref_ptr\00" -// HOST: @.omp_offloading.entry.c_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @c_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 -// HOST-COFF: @.omp_offloading.entry.{{.*}} = weak constant %struct.__tgt_offload_entry { ptr @.{{.*}}, ptr @.{{.*}}, i64 0, i32 0, i32 0 }, section "omp_offloading_entries$OE", align 1 +// HOST: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_ref_ptr\00" +// HOST: @.offloading.entry.c_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @c_decl_tgt_ref_ptr, ptr @.offloading.entry_name, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 +// HOST-COFF: @.offloading.entry.{{.*}} = weak constant %struct.__tgt_offload_entry { ptr @.{{.*}}, ptr @.{{.*}}, i64 0, i32 0, i32 0 }, section "omp_offloading_entries$OE", align 1 // DEVICE-NOT: internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_{{.*}}_decl_tgt_ref_ptr\00" -// HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"_{{.*}}d_{{.*}}_decl_tgt_ref_ptr\00" -// HOST: @.omp_offloading.entry.[[D_PTR]] = weak constant %struct.__tgt_offload_entry { ptr @[[D_PTR]], ptr @.omp_offloading.entry_name{{.*}} +// HOST: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"_{{.*}}d_{{.*}}_decl_tgt_ref_ptr\00" +// HOST: @.offloading.entry.[[D_PTR]] = weak constant %struct.__tgt_offload_entry { ptr @[[D_PTR]], ptr @.offloading.entry_name{{.*}} extern int c; #pragma omp declare target link(c) diff --git a/clang/test/OpenMP/declare_target_visibility_codegen.cpp b/clang/test/OpenMP/declare_target_visibility_codegen.cpp index 6518bac50623a..6643a9bb4f0cb 100644 --- a/clang/test/OpenMP/declare_target_visibility_codegen.cpp +++ b/clang/test/OpenMP/declare_target_visibility_codegen.cpp @@ -8,10 +8,10 @@ class C { // HOST: @[[X:.+]] = internal global i32 0, align 4 // HOST: @y = hidden global i32 0 // HOST: @z = global i32 0 -// HOST-NOT: @.omp_offloading.entry.c -// HOST-NOT: @.omp_offloading.entry.x -// HOST-NOT: @.omp_offloading.entry.y -// HOST: @.omp_offloading.entry.z +// HOST-NOT: @.offloading.entry.c +// HOST-NOT: @.offloading.entry.x +// HOST-NOT: @.offloading.entry.y +// HOST: @.offloading.entry.z C() : x(0) {} int x; diff --git a/clang/test/OpenMP/nesting_of_regions_45.cpp b/clang/test/OpenMP/nesting_of_regions_45.cpp new file mode 100644 index 0000000000000..d5870ec36486e --- /dev/null +++ b/clang/test/OpenMP/nesting_of_regions_45.cpp @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -fno-openmp-extensions -verify=expected,omp45,omp45warn,omp %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=45 -verify=expected,omp45,omp -fno-openmp-extensions -Wno-openmp %s + +#include "Inputs/nesting_of_regions.cpp" diff --git a/clang/test/OpenMP/nesting_of_regions_50.cpp b/clang/test/OpenMP/nesting_of_regions_50.cpp new file mode 100644 index 0000000000000..f2061553a8046 --- /dev/null +++ b/clang/test/OpenMP/nesting_of_regions_50.cpp @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fno-openmp-extensions -verify=expected,omp50,omp %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-extensions -verify=expected,omp50 %s + +#include "Inputs/nesting_of_regions.cpp" diff --git a/clang/test/OpenMP/nesting_of_regions_51.cpp b/clang/test/OpenMP/nesting_of_regions_51.cpp new file mode 100644 index 0000000000000..856489b042821 --- /dev/null +++ b/clang/test/OpenMP/nesting_of_regions_51.cpp @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=51 -verify=expected,omp51,omp -fno-openmp-extensions %s +// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=51 -verify=expected,omp51,omp -fno-openmp-extensions -Wno-source-uses-openmp %s + +#include "Inputs/nesting_of_regions.cpp" diff --git a/clang/test/OpenMP/nesting_of_regions_simd_45.cpp b/clang/test/OpenMP/nesting_of_regions_simd_45.cpp new file mode 100644 index 0000000000000..fb0d8bbfe3e48 --- /dev/null +++ b/clang/test/OpenMP/nesting_of_regions_simd_45.cpp @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=45 -fno-openmp-extensions -verify=expected,omp45,omp45warn,omp %s + +#include "Inputs/nesting_of_regions.cpp" diff --git a/clang/test/OpenMP/nesting_of_regions_simd_50.cpp b/clang/test/OpenMP/nesting_of_regions_simd_50.cpp new file mode 100644 index 0000000000000..ba87ba38e74a6 --- /dev/null +++ b/clang/test/OpenMP/nesting_of_regions_simd_50.cpp @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -verify=expected,omp50,omp -fno-openmp-extensions %s + +#include "Inputs/nesting_of_regions.cpp" diff --git a/clang/test/OpenMP/target_codegen_registration.cpp b/clang/test/OpenMP/target_codegen_registration.cpp index 5313da30c4ecf..4927147d080f2 100644 --- a/clang/test/OpenMP/target_codegen_registration.cpp +++ b/clang/test/OpenMP/target_codegen_registration.cpp @@ -119,54 +119,54 @@ // CHECK-NTARGET-NOT: private unnamed_addr constant [1 x i // CHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME1]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR1]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME1]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR1]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME2]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR2]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME2]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR2]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME3]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR3]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME3]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR3]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME4]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR4]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME4]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR4]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME5]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR5]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME5]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR5]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME6]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR6]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME6]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR6]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME7]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR7]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME7]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR7]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME8]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR8]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME8]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR8]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME9]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR9]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME9]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR9]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME10]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR10]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME10]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR10]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME11]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR11]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME11]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR11]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" -// CHECK-DAG: @.omp_offloading.entry.[[NAME12]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR12]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-DAG: @.offloading.entry.[[NAME12]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR12]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR1:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME1:__omp_offloading_[0-9a-f]+_[0-9a-f]+__Z.+_l[0-9]+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME1]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR1]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME1]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR1]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR2:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME2:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME2]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR2]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME2]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR2]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR3:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME3:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME3]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR3]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME3]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR3]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR4:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME4:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME4]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR4]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME4]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR4]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR5:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME5:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME5]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR5]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME5]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR5]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR6:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME6:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME6]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR6]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME6]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR6]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR7:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME7:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME7]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR7]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME7]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR7]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR8:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME8:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME8]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR8]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME8]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR8]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR9:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME9:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME9]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR9]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME9]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR9]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR10:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME10:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME10]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR10]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME10]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR10]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR11:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME11:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME11]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR11]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME11]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR11]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // TCHECK-DAG: [[NAMEPTR12:@.+]] = internal unnamed_addr constant [{{.*}} x i8] c"[[NAME12:.+]]\00" -// TCHECK-DAG: @.omp_offloading.entry.[[NAME12]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR12]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// TCHECK-DAG: @.offloading.entry.[[NAME12]] = weak{{.*}} constant [[ENTTY]] { ptr @{{.*}}, ptr [[NAMEPTR12]], i[[SZ]] 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // We have 4 initializers, one for the 500 priority, another one for 501, or more for the default priority, and the last one for the offloading registration function. // CHECK: @llvm.global_ctors = appending global [3 x { i32, ptr, ptr }] [ diff --git a/clang/test/OpenMP/target_indirect_codegen.cpp b/clang/test/OpenMP/target_indirect_codegen.cpp index 0ecdb7279e20a..bc0aa45417036 100644 --- a/clang/test/OpenMP/target_indirect_codegen.cpp +++ b/clang/test/OpenMP/target_indirect_codegen.cpp @@ -11,13 +11,13 @@ //. // HOST: @[[VAR:.+]] = global i8 0, align 1 // HOST: @[[FOO_ENTRY_NAME:.+]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[FOO_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_foo_l[0-9]+]]\00" -// HOST: @.omp_offloading.entry.[[FOO_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @[[FOO_ENTRY_NAME]], i64 8, i32 8, i32 0 }, section "omp_offloading_entries", align 1 +// HOST: @.offloading.entry.[[FOO_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @[[FOO_ENTRY_NAME]], i64 8, i32 8, i32 0 }, section "omp_offloading_entries", align 1 // HOST: @[[BAZ_ENTRY_NAME:.+]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[BAZ_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_baz_l[0-9]+]]\00" -// HOST: @.omp_offloading.entry.[[BAZ_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_Z3bazv, ptr @[[BAZ_ENTRY_NAME]], i64 8, i32 8, i32 0 }, section "omp_offloading_entries", align 1 +// HOST: @.offloading.entry.[[BAZ_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_Z3bazv, ptr @[[BAZ_ENTRY_NAME]], i64 8, i32 8, i32 0 }, section "omp_offloading_entries", align 1 // HOST: @[[VAR_ENTRY_NAME:.+]] = internal unnamed_addr constant [4 x i8] c"var\00" -// HOST: @.omp_offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @[[VAR]], ptr @[[VAR_ENTRY_NAME]], i64 1, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// HOST: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @[[VAR]], ptr @[[VAR_ENTRY_NAME]], i64 1, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // HOST: @[[BAR_ENTRY_NAME:.+]] = internal unnamed_addr constant [{{[0-9]+}} x i8] c"[[BAR_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_bar_l[0-9]+]]\00" -// HOST: @.omp_offloading.entry.[[BAR_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_ZL3barv, ptr @[[BAR_ENTRY_NAME]], i64 8, i32 8, i32 0 }, section "omp_offloading_entries", align 1 +// HOST: @.offloading.entry.[[BAR_NAME]] = weak constant %struct.__tgt_offload_entry { ptr @_ZL3barv, ptr @[[BAR_ENTRY_NAME]], i64 8, i32 8, i32 0 }, section "omp_offloading_entries", align 1 //. // DEVICE: @[[FOO_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_foo_l[0-9]+]] = protected addrspace(1) constant ptr @_Z3foov // DEVICE: @[[BAZ_NAME:__omp_offloading_[0-9a-z]+_[0-9a-z]+_baz_l[0-9]+]] = protected addrspace(1) constant ptr @_Z3bazv diff --git a/clang/test/OpenMP/thread_limit_amdgpu.c b/clang/test/OpenMP/thread_limit_amdgpu.c new file mode 100644 index 0000000000000..f884eeb73c3ff --- /dev/null +++ b/clang/test/OpenMP/thread_limit_amdgpu.c @@ -0,0 +1,34 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void foo(int N) { +#pragma omp target teams distribute parallel for simd + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd thread_limit(4) + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) num_threads(22) + for (int i = 0; i < N; ++i) + ; +} + +#endif + +// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l10({{.*}}) #[[ATTR1:.+]] { +// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l13({{.*}}) #[[ATTR2:.+]] { +// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l16({{.*}}) #[[ATTR3:.+]] { +// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l19({{.*}}) #[[ATTR4:.+]] { + +// CHECK: attributes #[[ATTR1]] = { {{.*}} "amdgpu-flat-work-group-size"="1,256" {{.*}} } +// CHECK: attributes #[[ATTR2]] = { {{.*}} "amdgpu-flat-work-group-size"="1,4" {{.*}} } +// CHECK: attributes #[[ATTR3]] = { {{.*}} "amdgpu-flat-work-group-size"="1,42" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} } +// CHECK: attributes #[[ATTR4]] = { {{.*}} "amdgpu-flat-work-group-size"="1,22" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} } diff --git a/clang/test/PCH/cxx23-deducing-this-lambda.cpp b/clang/test/PCH/cxx23-deducing-this-lambda.cpp new file mode 100644 index 0000000000000..21b4bf0d633f2 --- /dev/null +++ b/clang/test/PCH/cxx23-deducing-this-lambda.cpp @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -emit-pch -std=c++23 -o %t %s +// RUN: %clang_cc1 -include-pch %t -verify -fsyntax-only -DTEST -std=c++23 %s + +// Test that dependence of 'this' and DREs due to by-value capture by a +// lambda with an explicit object parameter is serialised/deserialised +// properly. + +#ifndef HEADER +#define HEADER +struct S { + int x; + auto f() { + return [*this] (this auto&&) { + int y; + x = 42; + + const auto l = [y] (this auto&&) { y = 42; }; + l(); + }; + } +}; +#endif + +// expected-error@* {{read-only variable is not assignable}} +// expected-error@* {{cannot assign to a variable captured by copy in a non-mutable lambda}} +// expected-note@* 2 {{in instantiation of}} + +#ifdef TEST +void f() { + const auto l = S{}.f(); + l(); // expected-note {{in instantiation of}} +} +#endif + + diff --git a/clang/test/ParserOpenACC/parse-clauses.c b/clang/test/ParserOpenACC/parse-clauses.c index a82c3662f2ad9..b58b332ad3245 100644 --- a/clang/test/ParserOpenACC/parse-clauses.c +++ b/clang/test/ParserOpenACC/parse-clauses.c @@ -86,27 +86,23 @@ void func() { // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop seq, - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'collapse' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop collapse for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'collapse' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop collapse() for(;;){} - // expected-error@+4{{invalid tag 'unknown' on 'collapse' clause}} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'collapse' not yet implemented, clause ignored}} + // expected-error@+3{{invalid tag 'unknown' on 'collapse' clause}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop collapse(unknown:) for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'collapse' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop collapse(force:) for(;;){} @@ -127,62 +123,53 @@ void func() { #pragma acc loop collapse(5) for(;;){} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'collapse' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop collapse(5, 6) for(;;){} } void DefaultClause() { - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC construct 'serial loop' not yet implemented, pragma ignored}} #pragma acc serial loop default for(;;){} - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc serial default seq for(;;){} - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial default, seq for(;;){} - // expected-error@+4{{expected identifier}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+3{{expected identifier}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial default( for(;;){} - // expected-error@+4{{invalid value for 'default' clause; expected 'present' or 'none'}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+3{{invalid value for 'default' clause; expected 'present' or 'none'}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial default( seq for(;;){} - // expected-error@+4{{expected identifier}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+3{{expected identifier}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial default(, seq for(;;){} - // expected-error@+3{{expected '('}} - // expected-error@+2{{expected identifier}} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} + // expected-error@+1{{expected identifier}} #pragma acc serial default) for(;;){} - // expected-error@+3{{expected '('}} - // expected-error@+2{{expected identifier}} - // expected-warning@+1{{OpenACC clause 'default' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} + // expected-error@+1{{expected identifier}} #pragma acc serial default), seq for(;;){} @@ -231,87 +218,73 @@ void DefaultClause() { } void IfClause() { - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC construct 'serial loop' not yet implemented, pragma ignored}} #pragma acc serial loop if for(;;){} - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc serial if seq for(;;){} - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial if, seq for(;;){} - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial if( for(;;){} - // expected-error@+4{{use of undeclared identifier 'seq'}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+3{{use of undeclared identifier 'seq'}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial if( seq for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{use of undeclared identifier 'seq'}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{use of undeclared identifier 'seq'}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial if(, seq for(;;){} - // expected-error@+3{{expected '('}} - // expected-error@+2{{expected identifier}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} + // expected-error@+1{{expected identifier}} #pragma acc serial if) for(;;){} - // expected-error@+3{{expected '('}} - // expected-error@+2{{expected identifier}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} + // expected-error@+1{{expected identifier}} #pragma acc serial if) seq for(;;){} - // expected-error@+3{{expected '('}} - // expected-error@+2{{expected identifier}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} + // expected-error@+1{{expected identifier}} #pragma acc serial if), seq for(;;){} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+1{{expected expression}} #pragma acc serial if() for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial if() seq for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial if(), seq for(;;){} - // expected-error@+2{{use of undeclared identifier 'invalid_expr'}} - // expected-warning@+1{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+1{{use of undeclared identifier 'invalid_expr'}} #pragma acc serial if(invalid_expr) for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'if' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial if() seq for(;;){} @@ -340,27 +313,24 @@ void SelfClause() { #pragma acc serial loop self, seq for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'serial loop' not yet implemented, pragma ignored}} #pragma acc serial loop self( for(;;){} - // expected-error@+5{{use of undeclared identifier 'seq'}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+4{{use of undeclared identifier 'seq'}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'serial loop' not yet implemented, pragma ignored}} #pragma acc serial loop self( seq for(;;){} - // expected-error@+6{{expected expression}} - // expected-error@+5{{use of undeclared identifier 'seq'}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+5{{expected expression}} + // expected-error@+4{{use of undeclared identifier 'seq'}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'serial loop' not yet implemented, pragma ignored}} #pragma acc serial loop self(, seq for(;;){} @@ -384,23 +354,20 @@ void SelfClause() { for(;;){} - // expected-error@+4{{expected expression}} - // expected-warning@+3{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+3{{expected expression}} // expected-warning@+2{{OpenACC clause 'seq' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'serial loop' not yet implemented, pragma ignored}} #pragma acc serial loop self(), seq for(;;){} - // expected-error@+5{{expected expression}} // expected-error@+4{{expected expression}} - // expected-warning@+3{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+3{{expected expression}} // expected-warning@+2{{OpenACC clause 'seq' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'serial loop' not yet implemented, pragma ignored}} #pragma acc serial loop self(,), seq for(;;){} - // expected-error@+4{{use of undeclared identifier 'invalid_expr'}} - // expected-warning@+3{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+3{{use of undeclared identifier 'invalid_expr'}} // expected-warning@+2{{OpenACC clause 'seq' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'serial loop' not yet implemented, pragma ignored}} #pragma acc serial loop self(invalid_expr), seq @@ -408,16 +375,14 @@ void SelfClause() { int i, j; - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial self(i > j for(;;){} - // expected-error@+4{{use of undeclared identifier 'seq'}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+3{{use of undeclared identifier 'seq'}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial self(i > j, seq for(;;){} @@ -448,14 +413,12 @@ struct HasMembersArray { void SelfUpdate() { struct Members s; - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC construct 'update' not yet implemented, pragma ignored}} #pragma acc update self for(;;){} - // expected-error@+4{{use of undeclared identifier 'zero'}} - // expected-warning@+3{{OpenACC clause 'self' not yet implemented, clause ignored}} + // expected-error@+3{{use of undeclared identifier 'zero'}} // expected-warning@+2{{OpenACC clause 'seq' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'update' not yet implemented, pragma ignored}} #pragma acc update self(zero : s.array[s.value : 5], s.value), seq @@ -469,50 +432,42 @@ void SelfUpdate() { } void VarListClauses() { - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc serial copy for(;;){} - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copy, seq for(;;){} - // expected-error@+3{{expected '('}} - // expected-error@+2{{expected identifier}} - // expected-warning@+1{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} + // expected-error@+1{{expected identifier}} #pragma acc serial copy) for(;;){} - // expected-error@+3{{expected '('}} - // expected-error@+2{{expected identifier}} - // expected-warning@+1{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} + // expected-error@+1{{expected identifier}} #pragma acc serial copy), seq for(;;){} - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial copy( for(;;){} - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc serial copy(, seq for(;;){} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+1{{expected expression}} #pragma acc serial copy() for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copy(), seq for(;;){} @@ -552,28 +507,24 @@ void VarListClauses() { #pragma acc serial copy(HasMem.MemArr[1:3].array[1:2]), seq for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copy(HasMem.MemArr[:]), seq for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copy(HasMem.MemArr[::]), seq for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ']'}} - // expected-note@+3{{to match this '['}} - // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ']'}} + // expected-note@+2{{to match this '['}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copy(HasMem.MemArr[: :]), seq for(;;){} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'copy' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copy(HasMem.MemArr[3:]), seq for(;;){} @@ -753,8 +704,7 @@ void VarListClauses() { #pragma acc serial copyout(zero : s.array[s.value : 5], s.value), seq for(;;){} - // expected-error@+3{{use of undeclared identifier 'zero'}} - // expected-warning@+2{{OpenACC clause 'copyout' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'zero'}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copyout(zero s.array[s.value : 5], s.value), seq for(;;){} @@ -777,8 +727,7 @@ void VarListClauses() { #pragma acc serial copyout(invalid:s.array[s.value : 5], s.value), seq for(;;){} - // expected-error@+3{{use of undeclared identifier 'invalid'}} - // expected-warning@+2{{OpenACC clause 'copyout' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'invalid'}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copyout(invalid s.array[s.value : 5], s.value), seq for(;;){} @@ -804,8 +753,7 @@ void VarListClauses() { #pragma acc serial create(zero : s.array[s.value : 5], s.value), seq for(;;){} - // expected-error@+3{{use of undeclared identifier 'zero'}} - // expected-warning@+2{{OpenACC clause 'create' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'zero'}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial create(zero s.array[s.value : 5], s.value), seq for(;;){} @@ -828,8 +776,7 @@ void VarListClauses() { #pragma acc serial create(invalid:s.array[s.value : 5], s.value), seq for(;;){} - // expected-error@+3{{use of undeclared identifier 'invalid'}} - // expected-warning@+2{{OpenACC clause 'create' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'invalid'}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial create(invalid s.array[s.value : 5], s.value), seq for(;;){} @@ -855,8 +802,7 @@ void VarListClauses() { #pragma acc serial copyin(readonly : s.array[s.value : 5], s.value), seq for(;;){} - // expected-error@+3{{use of undeclared identifier 'readonly'}} - // expected-warning@+2{{OpenACC clause 'copyin' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'readonly'}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copyin(readonly s.array[s.value : 5], s.value), seq for(;;){} @@ -879,8 +825,7 @@ void VarListClauses() { #pragma acc serial copyin(invalid:s.array[s.value : 5], s.value), seq for(;;){} - // expected-error@+3{{use of undeclared identifier 'invalid'}} - // expected-warning@+2{{OpenACC clause 'copyin' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'invalid'}} // expected-warning@+1{{OpenACC clause 'seq' not yet implemented, clause ignored}} #pragma acc serial copyin(invalid s.array[s.value : 5], s.value), seq for(;;){} @@ -888,13 +833,11 @@ void VarListClauses() { void ReductionClauseParsing() { char *Begin, *End; - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'reduction' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc serial reduction for(;;){} - // expected-error@+3{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'reduction' not yet implemented, clause ignored}} + // expected-error@+2{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}} + // expected-error@+1{{expected expression}} #pragma acc serial reduction() for(;;){} // expected-error@+2{{missing reduction operator, expected '+', '*', 'max', 'min', '&', '|', '^', '&&', or '||', follwed by a ':'}} @@ -946,24 +889,20 @@ void ReductionClauseParsing() { int returns_int(); void IntExprParsing() { - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc parallel vector_length {} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented, clause ignored}} + // expected-error@+1{{expected expression}} #pragma acc parallel vector_length() {} - // expected-error@+2{{use of undeclared identifier 'invalid'}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented, clause ignored}} + // expected-error@+1{{use of undeclared identifier 'invalid'}} #pragma acc parallel vector_length(invalid) {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'vector_length' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel vector_length(5, 4) {} @@ -975,24 +914,20 @@ void IntExprParsing() { #pragma acc parallel vector_length(returns_int()) {} - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc parallel num_gangs {} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented, clause ignored}} + // expected-error@+1{{expected expression}} #pragma acc parallel num_gangs() {} - // expected-error@+2{{use of undeclared identifier 'invalid'}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented, clause ignored}} + // expected-error@+1{{use of undeclared identifier 'invalid'}} #pragma acc parallel num_gangs(invalid) {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'num_gangs' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel num_gangs(5, 4) {} @@ -1004,24 +939,20 @@ void IntExprParsing() { #pragma acc parallel num_gangs(returns_int()) {} - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc parallel num_workers {} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented, clause ignored}} + // expected-error@+1{{expected expression}} #pragma acc parallel num_workers() {} - // expected-error@+2{{use of undeclared identifier 'invalid'}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented, clause ignored}} + // expected-error@+1{{use of undeclared identifier 'invalid'}} #pragma acc parallel num_workers(invalid) {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'num_workers' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel num_workers(5, 4) {} @@ -1033,24 +964,20 @@ void IntExprParsing() { #pragma acc parallel num_workers(returns_int()) {} - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'device_num' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC construct 'init' not yet implemented, pragma ignored}} #pragma acc init device_num - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'device_num' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'init' not yet implemented, pragma ignored}} #pragma acc init device_num() - // expected-error@+3{{use of undeclared identifier 'invalid'}} - // expected-warning@+2{{OpenACC clause 'device_num' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'invalid'}} // expected-warning@+1{{OpenACC construct 'init' not yet implemented, pragma ignored}} #pragma acc init device_num(invalid) - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'device_num' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'init' not yet implemented, pragma ignored}} #pragma acc init device_num(5, 4) @@ -1062,24 +989,20 @@ void IntExprParsing() { // expected-warning@+1{{OpenACC construct 'init' not yet implemented, pragma ignored}} #pragma acc init device_num(returns_int()) - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'default_async' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC construct 'set' not yet implemented, pragma ignored}} #pragma acc set default_async - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'default_async' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'set' not yet implemented, pragma ignored}} #pragma acc set default_async() - // expected-error@+3{{use of undeclared identifier 'invalid'}} - // expected-warning@+2{{OpenACC clause 'default_async' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'invalid'}} // expected-warning@+1{{OpenACC construct 'set' not yet implemented, pragma ignored}} #pragma acc set default_async(invalid) - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'default_async' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'set' not yet implemented, pragma ignored}} #pragma acc set default_async(5, 4) @@ -1095,42 +1018,35 @@ void IntExprParsing() { // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector() - // expected-error@+4{{invalid tag 'invalid' on 'vector' clause}} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} + // expected-error@+3{{invalid tag 'invalid' on 'vector' clause}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector(invalid:) // expected-error@+3{{invalid tag 'invalid' on 'vector' clause}} // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector(invalid:5) - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector(length:) - // expected-error@+4{{invalid tag 'num' on 'vector' clause}} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} + // expected-error@+3{{invalid tag 'num' on 'vector' clause}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector(num:) - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector(5, 4) - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector(length:6,4) - // expected-error@+5{{invalid tag 'num' on 'vector' clause}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} + // expected-error@+4{{invalid tag 'num' on 'vector' clause}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop vector(num:6,4) // expected-warning@+2{{OpenACC clause 'vector' not yet implemented, clause ignored}} @@ -1153,42 +1069,35 @@ void IntExprParsing() { // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker() - // expected-error@+4{{invalid tag 'invalid' on 'worker' clause}} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} + // expected-error@+3{{invalid tag 'invalid' on 'worker' clause}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker(invalid:) // expected-error@+3{{invalid tag 'invalid' on 'worker' clause}} // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker(invalid:5) - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker(num:) - // expected-error@+4{{invalid tag 'length' on 'worker' clause}} - // expected-error@+3{{expected expression}} - // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} + // expected-error@+3{{invalid tag 'length' on 'worker' clause}} + // expected-error@+2{{expected expression}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker(length:) - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker(5, 4) - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker(num:6,4) - // expected-error@+5{{invalid tag 'length' on 'worker' clause}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} + // expected-error@+4{{invalid tag 'length' on 'worker' clause}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop worker(length:6,4) // expected-warning@+2{{OpenACC clause 'worker' not yet implemented, clause ignored}} @@ -1211,25 +1120,21 @@ void IntExprParsing() { } void device_type() { - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'device_type' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc parallel device_type {} - // expected-error@+2{{expected '('}} - // expected-warning@+1{{OpenACC clause 'dtype' not yet implemented, clause ignored}} + // expected-error@+1{{expected '('}} #pragma acc parallel dtype {} - // expected-error@+4{{expected identifier}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'device_type' not yet implemented, clause ignored}} + // expected-error@+3{{expected identifier}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel device_type( {} - // expected-error@+4{{expected identifier}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'dtype' not yet implemented, clause ignored}} + // expected-error@+3{{expected identifier}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel dtype( {} @@ -1242,49 +1147,41 @@ void device_type() { #pragma acc parallel dtype() {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'device_type' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel device_type(* {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'dtype' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel dtype(* {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'device_type' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel device_type(ident {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'dtype' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel dtype(ident {} - // expected-error@+4{{expected ','}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'device_type' not yet implemented, clause ignored}} + // expected-error@+3{{expected ','}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel device_type(ident ident2 {} - // expected-error@+4{{expected ','}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'dtype' not yet implemented, clause ignored}} + // expected-error@+3{{expected ','}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel dtype(ident ident2 {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'device_type' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel device_type(ident, ident2 {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'dtype' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel dtype(ident, ident2 {} @@ -1297,25 +1194,21 @@ void device_type() { #pragma acc parallel dtype(ident, ident2,) {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'device_type' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel device_type(*,) {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'dtype' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel dtype(*,) {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'device_type' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel device_type(*,ident) {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'dtype' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel dtype(*,ident) {} @@ -1356,19 +1249,16 @@ void AsyncArgument() { #pragma acc parallel async {} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}} + // expected-error@+1{{expected expression}} #pragma acc parallel async() {} - // expected-error@+2{{use of undeclared identifier 'invalid'}} - // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}} + // expected-error@+1{{use of undeclared identifier 'invalid'}} #pragma acc parallel async(invalid) {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'async' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel async(4, 3) {} @@ -1388,15 +1278,13 @@ void AsyncArgument() { void Tile() { int* Foo; - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'tile' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop tile for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'tile' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop tile( for(;;){} @@ -1405,10 +1293,9 @@ void Tile() { // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop tile() for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'tile' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop tile(, for(;;){} @@ -1466,10 +1353,9 @@ void Gang() { // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang( for(;;){} @@ -1552,77 +1438,67 @@ void Gang() { #pragma acc loop gang(static:45, 5) for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(static:45, for(;;){} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(static:45 for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(static:*, for(;;){} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(static:* for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(45, for(;;){} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(45 for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(num:45, for(;;){} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(num:45 for(;;){} - // expected-error@+5{{expected expression}} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+4{{expected expression}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(dim:45, for(;;){} - // expected-error@+4{{expected ')'}} - // expected-note@+3{{to match this '('}} - // expected-warning@+2{{OpenACC clause 'gang' not yet implemented, clause ignored}} + // expected-error@+3{{expected ')'}} + // expected-note@+2{{to match this '('}} // expected-warning@+1{{OpenACC construct 'loop' not yet implemented, pragma ignored}} #pragma acc loop gang(dim:45 for(;;){} @@ -1657,14 +1533,12 @@ void bar(); // Bind Clause Parsing. - // expected-error@+3{{expected '('}} - // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} + // expected-error@+2{{expected '('}} // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine bind void BCP1(); - // expected-error@+3{{expected identifier or string literal}} - // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} + // expected-error@+2{{expected identifier or string literal}} // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(BCP1) bind() @@ -1677,7 +1551,6 @@ void BCP2(); // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(BCP1) bind(BCP2) - // expected-error@+3{{use of undeclared identifier 'unknown_thing'}} - // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'unknown_thing'}} // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(BCP1) bind(unknown_thing) diff --git a/clang/test/ParserOpenACC/parse-clauses.cpp b/clang/test/ParserOpenACC/parse-clauses.cpp index 497b1c7bcd0da..09a90726c6939 100644 --- a/clang/test/ParserOpenACC/parse-clauses.cpp +++ b/clang/test/ParserOpenACC/parse-clauses.cpp @@ -56,9 +56,8 @@ void function(); // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(use) bind(NS::NSFunc) - // expected-error@+4{{'RecordTy' does not refer to a value}} + // expected-error@+3{{'RecordTy' does not refer to a value}} // expected-note@#RecTy{{declared here}} - // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(use) bind(NS::RecordTy) // expected-error@+4{{'Value' is a private member of 'NS::RecordTy'}} @@ -72,8 +71,7 @@ void function(); // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(use) bind(NS::TemplTy) - // expected-error@+3{{no member named 'unknown' in namespace 'NS'}} - // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} + // expected-error@+2{{no member named 'unknown' in namespace 'NS'}} // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(use) bind(NS::unknown) // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} @@ -88,8 +86,7 @@ void function(); // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(use) bind(NS::RecordTy::mem_function) - // expected-error@+3{{string literal with user-defined suffix cannot be used here}} - // expected-warning@+2{{OpenACC clause 'bind' not yet implemented, clause ignored}} + // expected-error@+2{{string literal with user-defined suffix cannot be used here}} // expected-warning@+1{{OpenACC construct 'routine' not yet implemented, pragma ignored}} #pragma acc routine(use) bind("unknown udl"_UDL) diff --git a/clang/test/ParserOpenACC/parse-wait-clause.c b/clang/test/ParserOpenACC/parse-wait-clause.c index cce050d5da984..f3e651de45837 100644 --- a/clang/test/ParserOpenACC/parse-wait-clause.c +++ b/clang/test/ParserOpenACC/parse-wait-clause.c @@ -12,9 +12,8 @@ void func() { #pragma acc parallel wait clause-list {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait ( {} @@ -27,45 +26,38 @@ void func() { #pragma acc parallel wait () clause-list {} - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait (devnum: {} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+1{{expected expression}} #pragma acc parallel wait (devnum:) {} - // expected-error@+3{{expected expression}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc parallel wait (devnum:) clause-list {} - // expected-error@+4{{expected ':'}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+3{{expected ':'}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait (devnum: i + j {} - // expected-error@+2{{expected ':'}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+1{{expected ':'}} #pragma acc parallel wait (devnum: i + j) {} - // expected-error@+3{{expected ':'}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected ':'}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc parallel wait (devnum: i + j) clause-list {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait (queues: {} @@ -78,9 +70,8 @@ void func() { #pragma acc parallel wait (queues:) clause-list {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait (devnum: i + j:queues: {} @@ -93,27 +84,23 @@ void func() { #pragma acc parallel wait (devnum: i + j:queues:) clause-list {} - // expected-error@+4{{use of undeclared identifier 'devnum'}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+3{{use of undeclared identifier 'devnum'}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait (queues:devnum: i + j {} - // expected-error@+2{{use of undeclared identifier 'devnum'}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+1{{use of undeclared identifier 'devnum'}} #pragma acc parallel wait (queues:devnum: i + j) {} - // expected-error@+3{{use of undeclared identifier 'devnum'}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{use of undeclared identifier 'devnum'}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc parallel wait (queues:devnum: i + j) clause-list {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait(i, j, 1+1, 3.3 {} @@ -125,34 +112,29 @@ void func() { #pragma acc parallel wait(i, j, 1+1, 3.3) clause-list {} - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait(, {} - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+1{{expected expression}} #pragma acc parallel wait(,) {} - // expected-error@+3{{expected expression}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected expression}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc parallel wait(,) clause-list {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait(queues:i, j, 1+1, 3.3 {} - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait(queues:i, j, 1+1, 3.3, {} @@ -165,9 +147,8 @@ void func() { #pragma acc parallel wait(queues:i, j, 1+1, 3.3) clause-list {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait(devnum:3:i, j, 1+1, 3.3 {} // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} @@ -178,9 +159,8 @@ void func() { #pragma acc parallel wait(devnum:3:i, j, 1+1, 3.3) clause-list {} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc parallel wait(devnum:3:queues:i, j, 1+1, 3.3 {} // expected-warning@+1{{OpenACC clause 'wait' not yet implemented, clause ignored}} diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index dfc6d18dee504..ec7764bb53818 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -92,6 +92,7 @@ // CHECK-NOT: __riscv_zcd {{.*$}} // CHECK-NOT: __riscv_zce {{.*$}} // CHECK-NOT: __riscv_zcf {{.*$}} +// CHECK-NOT: __riscv_zcmop {{.*$}} // CHECK-NOT: __riscv_zcmp {{.*$}} // CHECK-NOT: __riscv_zcmt {{.*$}} // CHECK-NOT: __riscv_zdinx {{.*$}} @@ -116,6 +117,7 @@ // CHECK-NOT: __riscv_zihintntl {{.*$}} // CHECK-NOT: __riscv_zihintpause {{.*$}} // CHECK-NOT: __riscv_zihpm {{.*$}} +// CHECK-NOT: __riscv_zimop {{.*$}} // CHECK-NOT: __riscv_zk {{.*$}} // CHECK-NOT: __riscv_zkn {{.*$}} // CHECK-NOT: __riscv_zknd {{.*$}} @@ -173,11 +175,9 @@ // CHECK-NOT: __riscv_zaamo {{.*$}} // CHECK-NOT: __riscv_zalasr {{.*$}} // CHECK-NOT: __riscv_zalrsc {{.*$}} -// CHECK-NOT: __riscv_zcmop {{.*$}} // CHECK-NOT: __riscv_zfbfmin {{.*$}} // CHECK-NOT: __riscv_zicfilp {{.*$}} // CHECK-NOT: __riscv_zicfiss {{.*$}} -// CHECK-NOT: __riscv_zimop {{.*$}} // CHECK-NOT: __riscv_ztso {{.*$}} // CHECK-NOT: __riscv_zvfbfmin {{.*$}} // CHECK-NOT: __riscv_zvfbfwma {{.*$}} @@ -830,6 +830,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZCF-EXT %s // CHECK-ZCF-EXT: __riscv_zcf 1000000{{$}} +// RUN: %clang --target=riscv32-unknown-linux-gnu \ +// RUN: -march=rv32i_zcmop1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZCMOP-EXT %s +// RUN: %clang --target=riscv64-unknown-linux-gnu \ +// RUN: -march=rv64i_zcmop1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZCMOP-EXT %s +// CHECK-ZCMOP-EXT: __riscv_zcmop 1000000{{$}} + // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32izcmp1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZCMP-EXT %s @@ -1018,6 +1026,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZIHPM-EXT %s // CHECK-ZIHPM-EXT: __riscv_zihpm 2000000{{$}} +// RUN: %clang --target=riscv32-unknown-linux-gnu \ +// RUN: -march=rv32i_zimop1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZIMOP-EXT %s +// RUN: %clang --target=riscv64-unknown-linux-gnu \ +// RUN: -march=rv64i_zimop1p0 -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-ZIMOP-EXT %s +// CHECK-ZIMOP-EXT: __riscv_zimop 1000000{{$}} + // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32izk1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZK-EXT %s @@ -1561,22 +1577,6 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZICFILP-EXT %s // CHECK-ZICFILP-EXT: __riscv_zicfilp 4000{{$}} -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ -// RUN: -march=rv32i_zimop0p1 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZIMOP-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ -// RUN: -march=rv64i_zimop0p1 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZIMOP-EXT %s -// CHECK-ZIMOP-EXT: __riscv_zimop 1000{{$}} - -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ -// RUN: -march=rv32i_zcmop0p2 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZCMOP-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ -// RUN: -march=rv64i_zcmop0p2 -E -dM %s \ -// RUN: -o - | FileCheck --check-prefix=CHECK-ZCMOP-EXT %s -// CHECK-ZCMOP-EXT: __riscv_zcmop 2000{{$}} - // RUN: %clang --target=riscv32-unknown-linux-gnu -menable-experimental-extensions \ // RUN: -march=rv32iztso0p1 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-ZTSO-EXT %s diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c index 55c97c73e8b69..6a1feeb9bf539 100644 --- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c +++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \ -// RUN: -target-feature +sme2 -target-feature +sve2 -target-feature +neon -fsyntax-only -verify %s +// RUN: -target-feature +sme2 -target-feature +sve2 -target-feature +neon -Waarch64-sme-attributes -fsyntax-only -verify %s // REQUIRES: aarch64-registered-target @@ -33,6 +33,7 @@ svuint32_t incompat_sve_sm(svbool_t pg, svuint32_t a, int16_t b) __arm_streaming return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b); } +// expected-warning@+1 {{passing/returning a VL-dependent argument to/from a __arm_locally_streaming function. The streaming and non-streaming vector lengths may be different}} __arm_locally_streaming svuint32_t incompat_sve_ls(svbool_t pg, svuint32_t a, int64_t b) { // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming function}} return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b); @@ -48,6 +49,7 @@ svuint32_t incompat_sve2_sm(svbool_t pg, svuint32_t a, int64_t b) __arm_streamin return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); } +// expected-warning@+1 {{passing/returning a VL-dependent argument to/from a __arm_locally_streaming function. The streaming and non-streaming vector lengths may be different}} __arm_locally_streaming svuint32_t incompat_sve2_ls(svbool_t pg, svuint32_t a, int64_t b) { // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming function}} return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); @@ -68,6 +70,7 @@ svfloat64_t streaming_caller_sve(svbool_t pg, svfloat64_t a, float64_t b) __arm_ return svadd_n_f64_m(pg, a, b); } +// expected-warning@+1 {{passing/returning a VL-dependent argument to/from a __arm_locally_streaming function. The streaming and non-streaming vector lengths may be different}} __arm_locally_streaming svfloat64_t locally_streaming_caller_sve(svbool_t pg, svfloat64_t a, float64_t b) { // expected-no-warning return svadd_n_f64_m(pg, a, b); @@ -83,6 +86,7 @@ svint16_t streaming_caller_sve2(svint16_t op1, svint16_t op2) __arm_streaming { return svmul_lane_s16(op1, op2, 0); } +// expected-warning@+1 {{passing/returning a VL-dependent argument to/from a __arm_locally_streaming function. The streaming and non-streaming vector lengths may be different}} __arm_locally_streaming svint16_t locally_streaming_caller_sve2(svint16_t op1, svint16_t op2) { // expected-no-warning return svmul_lane_s16(op1, op2, 0); diff --git a/clang/test/Sema/aarch64-sme-func-attrs.c b/clang/test/Sema/aarch64-sme-func-attrs.c index bfc8768c3f36e..12de16509ccb8 100644 --- a/clang/test/Sema/aarch64-sme-func-attrs.c +++ b/clang/test/Sema/aarch64-sme-func-attrs.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -fsyntax-only -verify %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -fsyntax-only -verify=expected-cpp -x c++ %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-cpp -x c++ %s // Valid attributes @@ -496,3 +496,135 @@ void fmv_caller() { just_fine(); incompatible_locally_streaming(); } + +void sme_streaming_with_vl_arg(__SVInt8_t a) __arm_streaming { } + +__SVInt8_t sme_streaming_returns_vl(void) __arm_streaming { __SVInt8_t r; return r; } + +void sme_streaming_compatible_with_vl_arg(__SVInt8_t a) __arm_streaming_compatible { } + +__SVInt8_t sme_streaming_compatible_returns_vl(void) __arm_streaming_compatible { __SVInt8_t r; return r; } + +void sme_no_streaming_with_vl_arg(__SVInt8_t a) { } + +__SVInt8_t sme_no_streaming_returns_vl(void) { __SVInt8_t r; return r; } + +// expected-warning@+2 {{passing/returning a VL-dependent argument to/from a __arm_locally_streaming function. The streaming and non-streaming vector lengths may be different}} +// expected-cpp-warning@+1 {{passing/returning a VL-dependent argument to/from a __arm_locally_streaming function. The streaming and non-streaming vector lengths may be different}} +__arm_locally_streaming void sme_locally_streaming_with_vl_arg(__SVInt8_t a) { } + +// expected-warning@+2 {{passing/returning a VL-dependent argument to/from a __arm_locally_streaming function. The streaming and non-streaming vector lengths may be different}} +// expected-cpp-warning@+1 {{passing/returning a VL-dependent argument to/from a __arm_locally_streaming function. The streaming and non-streaming vector lengths may be different}} +__arm_locally_streaming __SVInt8_t sme_locally_streaming_returns_vl(void) { __SVInt8_t r; return r; } + +void sme_no_streaming_calling_streaming_with_vl_args() { + __SVInt8_t a; + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + sme_streaming_with_vl_arg(a); +} + +void sme_no_streaming_calling_streaming_with_return_vl() { + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + __SVInt8_t r = sme_streaming_returns_vl(); +} + +void sme_streaming_calling_non_streaming_with_vl_args(void) __arm_streaming { + __SVInt8_t a; + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + sme_no_streaming_with_vl_arg(a); +} + +void sme_streaming_calling_non_streaming_with_return_vl(void) __arm_streaming { + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + __SVInt8_t r = sme_no_streaming_returns_vl(); +} + +void sme_no_streaming_calling_streaming_with_vl_args_param(__SVInt8_t arg, void (*sc)( __SVInt8_t arg) __arm_streaming) { + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + sc(arg); +} + +__SVInt8_t sme_no_streaming_calling_streaming_return_vl_param(__SVInt8_t (*s)(void) __arm_streaming) { + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + return s(); +} + +void sme_streaming_compatible_calling_streaming_with_vl_args(__SVInt8_t arg) __arm_streaming_compatible { + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + sme_streaming_with_vl_arg(arg); +} + +void sme_streaming_compatible_calling_sme_streaming_return_vl(void) __arm_streaming_compatible { + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + __SVInt8_t r = sme_streaming_returns_vl(); +} + +void sme_streaming_compatible_calling_no_streaming_with_vl_args(__SVInt8_t arg) __arm_streaming_compatible { + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + sme_no_streaming_with_vl_arg(arg); +} + +void sme_streaming_compatible_calling_no_sme_streaming_return_vl(void) __arm_streaming_compatible { + // expected-warning@+2 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + // expected-cpp-warning@+1 {{passing a VL-dependent argument to/from a function that has a different streaming-mode. The streaming and non-streaming vector lengths may be different}} + __SVInt8_t r = sme_no_streaming_returns_vl(); +} + +void sme_streaming_calling_streaming(__SVInt8_t arg, void (*s)( __SVInt8_t arg) __arm_streaming) __arm_streaming { + s(arg); +} + +__SVInt8_t sme_streaming_calling_streaming_return_vl(__SVInt8_t (*s)(void) __arm_streaming) __arm_streaming { + return s(); +} + +void sme_streaming_calling_streaming_with_vl_args(__SVInt8_t a) __arm_streaming { + sme_streaming_with_vl_arg(a); +} + +void sme_streaming_calling_streaming_with_return_vl(void) __arm_streaming { + __SVInt8_t r = sme_streaming_returns_vl(); +} + +void sme_streaming_calling_streaming_compatible_with_vl_args(__SVInt8_t a) __arm_streaming { + sme_streaming_compatible_with_vl_arg(a); +} + +void sme_streaming_calling_streaming_compatible_with_return_vl(void) __arm_streaming { + __SVInt8_t r = sme_streaming_compatible_returns_vl(); +} + +void sme_no_streaming_calling_streaming_compatible_with_vl_args() { + __SVInt8_t a; + sme_streaming_compatible_with_vl_arg(a); +} + +void sme_no_streaming_calling_streaming_compatible_with_return_vl() { + __SVInt8_t r = sme_streaming_compatible_returns_vl(); +} + +void sme_no_streaming_calling_non_streaming_compatible_with_vl_args() { + __SVInt8_t a; + sme_no_streaming_with_vl_arg(a); +} + +void sme_no_streaming_calling_non_streaming_compatible_with_return_vl() { + __SVInt8_t r = sme_no_streaming_returns_vl(); +} + +void sme_streaming_compatible_calling_streaming_compatible_with_vl_args(__SVInt8_t arg) __arm_streaming_compatible { + sme_streaming_compatible_with_vl_arg(arg); +} + +void sme_streaming_compatible_calling_streaming_compatible_with_return_vl(void) __arm_streaming_compatible { + __SVInt8_t r = sme_streaming_compatible_returns_vl(); +} diff --git a/clang/test/Sema/tentative-array-decl.c b/clang/test/Sema/tentative-array-decl.c new file mode 100644 index 0000000000000..77ede14c0be2c --- /dev/null +++ b/clang/test/Sema/tentative-array-decl.c @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -verify %s +// RUN: %clang_cc1 -verify=good -Wno-tentative-definition-array %s +// good-no-diagnostics + +int foo[]; // expected-warning {{tentative array definition assumed to have one element}} diff --git a/clang/test/SemaCXX/PR86790.cpp b/clang/test/SemaCXX/PR86790.cpp new file mode 100644 index 0000000000000..09e9bb3505e1b --- /dev/null +++ b/clang/test/SemaCXX/PR86790.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s + +enum {A, S, D, F}; +int main() { + using asdf = decltype(A); + using enum asdf; // this line causes the crash + return 0; +} + +namespace N1 { + enum {A, S, D, F}; + constexpr struct T { + using asdf = decltype(A); + using enum asdf; + } t; + + static_assert(t.D == D); + static_assert(T::S == S); +} + +namespace N2 { + enum {A, S, D, F}; + constexpr struct T { + struct { + using asdf = decltype(A); + using enum asdf; + } inner; + } t; + + static_assert(t.inner.D == D); + static_assert(t.D == D); // expected-error {{no member named 'D' in 'N2::T'}} +} diff --git a/clang/test/SemaCXX/attr-no-sanitize.cpp b/clang/test/SemaCXX/attr-no-sanitize.cpp index a464947fe5a34..8951f616ce0f0 100644 --- a/clang/test/SemaCXX/attr-no-sanitize.cpp +++ b/clang/test/SemaCXX/attr-no-sanitize.cpp @@ -16,12 +16,12 @@ int f3() __attribute__((no_sanitize("address"))); // DUMP-LABEL: FunctionDecl {{.*}} f4 // DUMP: NoSanitizeAttr {{.*}} thread -// PRINT: int f4() {{\[\[}}clang::no_sanitize("thread")]] +// PRINT: {{\[\[}}clang::no_sanitize("thread")]] int f4() [[clang::no_sanitize("thread")]] int f4(); // DUMP-LABEL: FunctionDecl {{.*}} f4 // DUMP: NoSanitizeAttr {{.*}} hwaddress -// PRINT: int f4() {{\[\[}}clang::no_sanitize("hwaddress")]] +// PRINT: {{\[\[}}clang::no_sanitize("hwaddress")]] int f4() [[clang::no_sanitize("hwaddress")]] int f4(); // DUMP-LABEL: FunctionDecl {{.*}} f5 @@ -36,5 +36,5 @@ int f6() __attribute__((no_sanitize("unknown"))); // expected-warning{{unknown s // DUMP-LABEL: FunctionDecl {{.*}} f7 // DUMP: NoSanitizeAttr {{.*}} memtag -// PRINT: int f7() {{\[\[}}clang::no_sanitize("memtag")]] +// PRINT: {{\[\[}}clang::no_sanitize("memtag")]] int f7() [[clang::no_sanitize("memtag")]] int f7(); diff --git a/clang/test/SemaCXX/cxx11-attr-print.cpp b/clang/test/SemaCXX/cxx11-attr-print.cpp index c988972aeb1a5..a169d1b4409b4 100644 --- a/clang/test/SemaCXX/cxx11-attr-print.cpp +++ b/clang/test/SemaCXX/cxx11-attr-print.cpp @@ -24,10 +24,10 @@ int d [[deprecated("warning")]]; // CHECK: __attribute__((deprecated("warning", "fixit"))); int e __attribute__((deprecated("warning", "fixit"))); -// CHECK: int cxx11_alignas alignas(4); +// CHECK: alignas(4) int cxx11_alignas; alignas(4) int cxx11_alignas; -// CHECK: int c11_alignas _Alignas(int); +// CHECK: _Alignas(int) int c11_alignas; _Alignas(int) int c11_alignas; // CHECK: int foo() __attribute__((const)); @@ -66,7 +66,7 @@ void f8 (void *, const char *, ...) __attribute__ ((format (printf, 2, 3))); // CHECK: int n alignas(4 // CHECK: int p alignas(int // CHECK: __attribute__((pure)) static int f() -// CHECK: static int g() {{\[}}[gnu::pure]] +// CHECK: {{\[}}[gnu::pure]] static int g() template struct S { __attribute__((aligned(4))) int m; alignas(4) int n; @@ -82,7 +82,7 @@ template struct S { // CHECK: int m __attribute__((aligned(4 // CHECK: int n alignas(4 // CHECK: __attribute__((pure)) static int f() -// CHECK: static int g() {{\[}}[gnu::pure]] +// CHECK: {{\[}}[gnu::pure]] static int g() template struct S; // CHECK: using Small2 {{\[}}[gnu::mode(byte)]] = int; diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp index b8ddb9ad30003..5f29a955e053c 100644 --- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp +++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp @@ -200,6 +200,130 @@ void TestMutationInLambda() { [i = 0](this auto){ i++; }(); [i = 0](this const auto&){ i++; }(); // expected-error@-1 {{cannot assign to a variable captured by copy in a non-mutable lambda}} + // expected-note@-2 {{in instantiation of}} + + int x; + const auto l1 = [x](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + const auto l2 = [=](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + + const auto l3 = [&x](this auto&) { + const auto l3a = [x](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + l3a(); // expected-note {{in instantiation of}} + }; + + const auto l4 = [&x](this auto&) { + const auto l4a = [=](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + l4a(); // expected-note {{in instantiation of}} + }; + + const auto l5 = [x](this auto&) { + const auto l5a = [x](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + l5a(); // expected-note {{in instantiation of}} + }; + + const auto l6 = [=](this auto&) { + const auto l6a = [=](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + l6a(); // expected-note {{in instantiation of}} + }; + + const auto l7 = [x](this auto&) { + const auto l7a = [=](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + l7a(); // expected-note {{in instantiation of}} + }; + + const auto l8 = [=](this auto&) { + const auto l8a = [x](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + l8a(); // expected-note {{in instantiation of}} + }; + + const auto l9 = [&](this auto&) { + const auto l9a = [x](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + l9a(); // expected-note {{in instantiation of}} + }; + + const auto l10 = [&](this auto&) { + const auto l10a = [=](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + l10a(); // expected-note {{in instantiation of}} + }; + + const auto l11 = [x](this auto&) { + const auto l11a = [&x](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} expected-note {{while substituting}} + l11a(); + }; + + const auto l12 = [x](this auto&) { + const auto l12a = [&](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} expected-note {{while substituting}} + l12a(); + }; + + const auto l13 = [=](this auto&) { + const auto l13a = [&x](this auto&) { x = 42; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} expected-note {{while substituting}} + l13a(); + }; + + struct S { + int x; + auto f() { + return [*this] (this auto&&) { + x = 42; // expected-error {{read-only variable is not assignable}} + [*this] () mutable { x = 42; } (); + [*this] (this auto&&) { x = 42; } (); + [*this] () { x = 42; } (); // expected-error {{read-only variable is not assignable}} + const auto l = [*this] (this auto&&) { x = 42; }; // expected-error {{read-only variable is not assignable}} + l(); // expected-note {{in instantiation of}} + + struct T { + int x; + auto g() { + return [&] (this auto&&) { + x = 42; + const auto l = [*this] (this auto&&) { x = 42; }; // expected-error {{read-only variable is not assignable}} + l(); // expected-note {{in instantiation of}} + }; + } + }; + + const auto l2 = T{}.g(); + l2(); // expected-note {{in instantiation of}} + }; + } + }; + + const auto l14 = S{}.f(); + + l1(); // expected-note {{in instantiation of}} + l2(); // expected-note {{in instantiation of}} + l3(); // expected-note {{in instantiation of}} + l4(); // expected-note {{in instantiation of}} + l5(); // expected-note {{in instantiation of}} + l6(); // expected-note {{in instantiation of}} + l7(); // expected-note {{in instantiation of}} + l8(); // expected-note {{in instantiation of}} + l9(); // expected-note {{in instantiation of}} + l10(); // expected-note {{in instantiation of}} + l11(); // expected-note {{in instantiation of}} + l12(); // expected-note {{in instantiation of}} + l13(); // expected-note {{in instantiation of}} + l14(); // expected-note 3 {{in instantiation of}} + + { + const auto l1 = [&x](this auto&) { x = 42; }; + const auto l2 = [&](this auto&) { x = 42; }; + l1(); + l2(); + } + + // Check that we don't crash if the lambda has type sugar. + const auto l15 = [=](this auto&&) [[clang::annotate_type("foo")]] [[clang::annotate_type("bar")]] { + return x; + }; + + const auto l16 = [=]() [[clang::annotate_type("foo")]] [[clang::annotate_type("bar")]] { + return x; + }; + + l15(); + l16(); } struct Over_Call_Func_Example { @@ -650,3 +774,67 @@ int bug() { S{}.f(0); } } + +namespace GH84163 { +struct S { + int x; + + auto foo() { + return [*this](this auto&&) { + x = 10; // expected-error {{read-only variable is not assignable}} + }; + } +}; + +int f() { + S s{ 5 }; + const auto l = s.foo(); + l(); // expected-note {{in instantiation of}} + + const auto g = [x = 10](this auto&& self) { x = 20; }; // expected-error {{cannot assign to a variable captured by copy in a non-mutable lambda}} + g(); // expected-note {{in instantiation of}} +} +} + +namespace GH86054 { +template +struct unique_lock { + unique_lock(M&) {} +}; +int f() { + struct mutex {} cursor_guard; + [&cursor_guard](this auto self) { + unique_lock a(cursor_guard); + }(); +} +} + +namespace GH86398 { +struct function {}; // expected-note 2 {{not viable}} +int f() { + function list; + [&list](this auto self) { + list = self; // expected-error {{no viable overloaded '='}} + }(); // expected-note {{in instantiation of}} +} + +struct function2 { + function2& operator=(function2 const&) = delete; // expected-note {{candidate function not viable}} +}; +int g() { + function2 list; + [&list](this auto self) { + list = self; // expected-error {{no viable overloaded '='}} + }(); // expected-note {{in instantiation of}} +} + +struct function3 { + function3& operator=(function3 const&) = delete; // expected-note {{has been explicitly deleted}} +}; +int h() { + function3 list; + [&list](this auto self) { + list = function3{}; // expected-error {{selected deleted operator '='}} + }(); +} +} diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp index 151d74f21d64d..6be338064452e 100644 --- a/clang/test/SemaCXX/lambda-expressions.cpp +++ b/clang/test/SemaCXX/lambda-expressions.cpp @@ -762,3 +762,12 @@ template auto t::operator()(int a) const; // expected-note {{in instantiati } #endif + +namespace GH84473_bug { +void f1() { + int b; + (void) [=] [[gnu::regcall]] () { // expected-warning {{an attribute specifier sequence in this position is a C++23 extension}} + (void) b; + }; +} +} diff --git a/clang/test/SemaCXX/type-traits.cpp b/clang/test/SemaCXX/type-traits.cpp index 14ec17989ec7c..421d3007d27ff 100644 --- a/clang/test/SemaCXX/type-traits.cpp +++ b/clang/test/SemaCXX/type-traits.cpp @@ -740,7 +740,7 @@ void is_bounded_array(int n) { static_assert(!__is_bounded_array(cvoid *)); int t32[n]; - (void)__is_bounded_array(decltype(t32)); // expected-error{{variable length arrays are not supported for '__is_bounded_array'}} + (void)__is_bounded_array(decltype(t32)); // expected-error{{variable length arrays are not supported in '__is_bounded_array'}} } void is_unbounded_array(int n) { @@ -772,7 +772,7 @@ void is_unbounded_array(int n) { static_assert(!__is_unbounded_array(cvoid *)); int t32[n]; - (void)__is_unbounded_array(decltype(t32)); // expected-error{{variable length arrays are not supported for '__is_unbounded_array'}} + (void)__is_unbounded_array(decltype(t32)); // expected-error{{variable length arrays are not supported in '__is_unbounded_array'}} } void is_referenceable() { @@ -1622,7 +1622,7 @@ enum class EnumClassLayout {}; enum EnumForward : int; enum class EnumClassForward; -struct CStructIncomplete; +struct CStructIncomplete; // #CStructIncomplete struct CStructNested { int a; @@ -1719,6 +1719,20 @@ struct StructWithAnonUnion3 { } u; }; +struct CStructWithArrayAtTheEnd { + int a; + int b[4]; +}; + +struct CStructWithFMA { + int c; + int d[]; +}; + +struct CStructWithFMA2 { + int e; + int f[]; +}; void is_layout_compatible(int n) { @@ -1741,8 +1755,11 @@ void is_layout_compatible(int n) static_assert(!__is_layout_compatible(unsigned char, signed char)); static_assert(__is_layout_compatible(int[], int[])); static_assert(__is_layout_compatible(int[2], int[2])); - static_assert(!__is_layout_compatible(int[n], int[2])); // FIXME: VLAs should be rejected - static_assert(!__is_layout_compatible(int[n], int[n])); // FIXME: VLAs should be rejected + static_assert(!__is_layout_compatible(int[n], int[2])); + // expected-error@-1 {{variable length arrays are not supported in '__is_layout_compatible'}} + static_assert(!__is_layout_compatible(int[n], int[n])); + // expected-error@-1 {{variable length arrays are not supported in '__is_layout_compatible'}} + // expected-error@-2 {{variable length arrays are not supported in '__is_layout_compatible'}} static_assert(__is_layout_compatible(int&, int&)); static_assert(!__is_layout_compatible(int&, char&)); static_assert(__is_layout_compatible(void(int), void(int))); @@ -1798,15 +1815,28 @@ void is_layout_compatible(int n) static_assert(__is_layout_compatible(EnumLayout, EnumClassLayout)); static_assert(__is_layout_compatible(EnumForward, EnumForward)); static_assert(__is_layout_compatible(EnumForward, EnumClassForward)); - // Layout compatibility for enums might be relaxed in the future. See https://github.com/cplusplus/CWG/issues/39#issuecomment-1184791364 + static_assert(__is_layout_compatible(CStructIncomplete, CStructIncomplete)); + // expected-error@-1 {{incomplete type 'CStructIncomplete' where a complete type is required}} + // expected-note@#CStructIncomplete {{forward declaration of 'CStructIncomplete'}} + // expected-error@-3 {{incomplete type 'CStructIncomplete' where a complete type is required}} + // expected-note@#CStructIncomplete {{forward declaration of 'CStructIncomplete'}} + static_assert(!__is_layout_compatible(CStruct, CStructIncomplete)); + // expected-error@-1 {{incomplete type 'CStructIncomplete' where a complete type is required}} + // expected-note@#CStructIncomplete {{forward declaration of 'CStructIncomplete'}} + static_assert(__is_layout_compatible(CStructIncomplete[2], CStructIncomplete[2])); + // expected-error@-1 {{incomplete type 'CStructIncomplete[2]' where a complete type is required}} + // expected-note@#CStructIncomplete {{forward declaration of 'CStructIncomplete'}} + // expected-error@-3 {{incomplete type 'CStructIncomplete[2]' where a complete type is required}} + // expected-note@#CStructIncomplete {{forward declaration of 'CStructIncomplete'}} + static_assert(__is_layout_compatible(CStructIncomplete[], CStructIncomplete[])); + static_assert(!__is_layout_compatible(CStructWithArrayAtTheEnd, CStructWithFMA)); + static_assert(__is_layout_compatible(CStructWithFMA, CStructWithFMA)); + static_assert(__is_layout_compatible(CStructWithFMA, CStructWithFMA2)); + // Layout compatibility rules for enums might be relaxed in the future. See https://github.com/cplusplus/CWG/issues/39#issuecomment-1184791364 static_assert(!__is_layout_compatible(EnumLayout, int)); static_assert(!__is_layout_compatible(EnumClassLayout, int)); static_assert(!__is_layout_compatible(EnumForward, int)); static_assert(!__is_layout_compatible(EnumClassForward, int)); - // FIXME: the following should be rejected (array of unknown bound and void are the only allowed incomplete types) - static_assert(__is_layout_compatible(CStructIncomplete, CStructIncomplete)); - static_assert(!__is_layout_compatible(CStruct, CStructIncomplete)); - static_assert(__is_layout_compatible(CStructIncomplete[2], CStructIncomplete[2])); } void is_signed() diff --git a/clang/test/SemaCXX/warn-unused-but-set-variables-cpp.cpp b/clang/test/SemaCXX/warn-unused-but-set-variables-cpp.cpp index 418baa78aa964..eaedb53bf4726 100644 --- a/clang/test/SemaCXX/warn-unused-but-set-variables-cpp.cpp +++ b/clang/test/SemaCXX/warn-unused-but-set-variables-cpp.cpp @@ -69,3 +69,11 @@ template void f5() { SWarnUnused swu; ++swu; } + +void f6() { + if (int x = 123) {} // expected-warning{{variable 'x' set but not used}} + + while (int x = 123) {} // expected-warning{{variable 'x' set but not used}} + + for (; int x = 123;) {} // expected-warning{{variable 'x' set but not used}} +} diff --git a/clang/test/SemaHLSL/literal_suffixes.hlsl b/clang/test/SemaHLSL/literal_suffixes.hlsl new file mode 100644 index 0000000000000..c8b69590195c1 --- /dev/null +++ b/clang/test/SemaHLSL/literal_suffixes.hlsl @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-library -fnative-half-type -Wconversion -Wno-implicit-float-size-conversion -verify %s + +void literal_assignments() { + half h; + + h = 2.0h; // No conversion, no diagnostic expected. + + // Literal conversions that don't lose precision also don't cause diagnostics. + // Conversion from double (no diagnostic expected) + h = 2.0l; + h = 2.0; + h = 2.0f; + + // Literal assignments with conversions that lose precision produce + // diagnostics under `-Wconversion`. + + // Lose precision on assignment. + h = 3.1415926535897932384626433h; // No diagnostic expected because this isn't a conversion. + + // Lose precision on assignment converting float to half. + h = 3.1415926535897932384626433f; // expected-warning {{implicit conversion loses floating-point precision: 'float' to 'half'}} + + // Lose precision on assignment converting float to half. + h = 3.1415926535897932384626433f * 2.0f; // expected-warning {{implicit conversion loses floating-point precision: 'float' to 'half'}} + + // Lose precision on assignment converting double to half. + h = 3.1415926535897932384626433l; // expected-warning {{implicit conversion loses floating-point precision: 'double' to 'half'}} + + // Lose precision on assignment converting double to half. + h = 3.1415926535897932384626433l * 2.0l; // expected-warning {{implicit conversion loses floating-point precision: 'double' to 'half'}} + + // Literal assinments of values out of the representable range produce + // warnings. + + h = 66000.h; // expected-warning {{magnitude of floating-point constant too large for type 'half'; maximum is 65504}} + h = -66000.h; // expected-warning {{magnitude of floating-point constant too large for type 'half'; maximum is 65504}} + + // The `h` suffix is invalid on integer literals. + h = 66000h; // expected-error {{invalid suffix 'h' on integer constant}} +} + +template +struct is_same { + static const bool value = false; +}; + +template +struct is_same { + static const bool value = true; +}; + +// The no-suffix behavior is currently wrong. The behavior in DXC is complicated +// and undocumented. We have a language change planned to address this, and an +// issue tracking: https://github.com/llvm/llvm-project/issues/85714. +_Static_assert(is_same::value, "1.0f literal is double (should be float)"); + +_Static_assert(is_same::value, "1.0h literal is half"); +_Static_assert(is_same::value, "1.0f literal is float"); +_Static_assert(is_same::value, "1.0l literal is double"); diff --git a/clang/test/SemaHLSL/literal_suffixes_no_16bit.hlsl b/clang/test/SemaHLSL/literal_suffixes_no_16bit.hlsl new file mode 100644 index 0000000000000..17ff8bceda592 --- /dev/null +++ b/clang/test/SemaHLSL/literal_suffixes_no_16bit.hlsl @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.2-library -Wconversion -Wno-implicit-float-size-conversion -verify %s + +void literal_assignments() { + half h; + + h = 2.0h; // No conversion, no diagnostic expected. + + // Literal conversions that don't lose precision also don't cause diagnostics. + // Conversion from double (no diagnostic expected) + h = 2.0l; + h = 2.0; + h = 2.0f; + + // Literal assignments with conversions that lose precision produce + // diagnostics under `-Wconversion`. + + // Lose precision on assignment. + h = 3.1415926535897932384626433h; // No diagnostic expected because this isn't a conversion. + + // Lose precision on assignment converting float to half. + h = 3.1415926535897932384626433f; // No diagnostic expected because half and float are the same size. + + // Lose precision on assignment converting float to half. + h = 3.1415926535897932384626433f * 2.0f; // No diagnostic expected because half and float are the same size. + + // Lose precision on assignment converting double to half. + h = 3.1415926535897932384626433l; // expected-warning {{implicit conversion loses floating-point precision: 'double' to 'half'}} + + // Lose precision on assignment converting double to half. + h = 3.1415926535897932384626433l * 2.0l; // expected-warning {{implicit conversion loses floating-point precision: 'double' to 'half'}} + + // Literal assinments of values out of the representable range produce + // warnings. + + h = 66000.h; // No diagnostic expected because half is 32-bit. + h = -66000.h; // No diagnostic expected because half is 32-bit. + + // The `h` suffix is invalid on integer literals. + h = 66000h; // expected-error {{invalid suffix 'h' on integer constant}} +} + +template +struct is_same { + static const bool value = false; +}; + +template +struct is_same { + static const bool value = true; +}; + +// The no-suffix behavior is currently wrong. The behavior in DXC is complicated +// and undocumented. We have a language change planned to address this, and an +// issue tracking: https://github.com/llvm/llvm-project/issues/85714. +_Static_assert(is_same::value, "1.0f literal is double (should be float)"); + +_Static_assert(is_same::value, "1.0h literal is half"); +_Static_assert(is_same::value, "1.0f literal is float"); +_Static_assert(is_same::value, "1.0l literal is double"); diff --git a/clang/test/SemaTemplate/alias-template-with-lambdas.cpp b/clang/test/SemaTemplate/alias-template-with-lambdas.cpp new file mode 100644 index 0000000000000..ff94031e4d86f --- /dev/null +++ b/clang/test/SemaTemplate/alias-template-with-lambdas.cpp @@ -0,0 +1,105 @@ +// RUN: %clang_cc1 -std=c++2c -fsyntax-only -verify %s +namespace lambda_calls { + +template +concept True = true; + +template +concept False = false; // #False + +template struct S { + template using type = decltype([](U...) {}(U()...)); + template using type2 = decltype([](auto) {}(1)); + template using type3 = decltype([](True auto) {}(1)); + template + using type4 = decltype([](auto... pack) { return sizeof...(pack); }(1, 2)); + + template using type5 = decltype([](False auto...) {}(1)); // #Type5 + + template + using type6 = decltype([] {}.template operator()()); + template + using type7 = decltype([] {}.template operator()()); // #Type7 + + template + using type8 = decltype([]() // #Type8 + requires(sizeof(U) == 32) // #Type8-requirement + {}()); + + template + using type9 = decltype([](U...) {}.template operator()(U()...)); + // https://github.com/llvm/llvm-project/issues/76674 + template + using type10 = decltype([] { return V(); }.template operator()()); + + template using type11 = decltype([] { return U{}; }); +}; + +template using Meow = decltype([] {}.template operator()()); + +template +using MeowMeow = decltype([](U...) {}.template operator()(U()...)); + +// https://github.com/llvm/llvm-project/issues/70601 +template using U = decltype([] {}.template operator()()); + +U foo(); + +void bar() { + using T = S::type; + using T2 = S::type2; + using T3 = S::type3; + using T4 = S::type4; + using T5 = S::type5; // #T5 + // expected-error@#Type5 {{no matching function for call}} + // expected-note@#T5 {{type alias 'type5' requested here}} + // expected-note@#Type5 {{constraints not satisfied [with auto:1 = ]}} + // expected-note@#Type5 {{because 'int' does not satisfy 'False'}} + // expected-note@#False {{because 'false' evaluated to false}} + + using T6 = S::type6; + using T7 = S::type7; // #T7 + // expected-error@#Type7 {{no matching member function for call}} + // expected-note@#T7 {{type alias 'type7' requested here}} + // expected-note@#Type7 {{constraints not satisfied [with $0 = char]}} + // expected-note@#Type7 {{because 'char' does not satisfy 'False'}} + // expected-note@#False {{because 'false' evaluated to false}} + + using T8 = S::type8; // #T8 + // expected-error@#Type8 {{no matching function for call}} + // expected-note@#T8 {{type alias 'type8' requested here}} + // expected-note@#Type8 {{constraints not satisfied}} + // expected-note@#Type8-requirement {{because 'sizeof(char) == 32' (1 == 32) evaluated to false}} + + using T9 = S::type9; + using T10 = S::type10; + using T11 = S::type11; + int x = T11()(); + using T12 = Meow; + using T13 = MeowMeow; + + static_assert(__is_same(T, void)); + static_assert(__is_same(T2, void)); + static_assert(__is_same(T3, void)); + static_assert(__is_same(T4, decltype(sizeof(0)))); + static_assert(__is_same(T6, void)); + static_assert(__is_same(T9, void)); + static_assert(__is_same(T10, int)); + static_assert(__is_same(T12, void)); + static_assert(__is_same(T13, void)); +} + +namespace GH82104 { + +template int Zero = 0; + +template +using T14 = decltype([]() { return Zero; }()); + +template using T15 = T14; + +static_assert(__is_same(T15, int)); + +} // namespace GH82104 + +} // namespace lambda_calls diff --git a/clang/test/SemaTemplate/deduction-guide.cpp b/clang/test/SemaTemplate/deduction-guide.cpp index 0caef78fedbfd..58f08aa1eed65 100644 --- a/clang/test/SemaTemplate/deduction-guide.cpp +++ b/clang/test/SemaTemplate/deduction-guide.cpp @@ -248,3 +248,15 @@ G g = {1}; // CHECK: FunctionTemplateDecl // CHECK: |-CXXDeductionGuideDecl {{.*}} implicit 'auto (T) -> G' aggregate // CHECK: `-CXXDeductionGuideDecl {{.*}} implicit used 'auto (int) -> G' implicit_instantiation aggregate + +template +using AG = G; +AG ag = {1}; +// Verify that the aggregate deduction guide for alias templates is built. +// CHECK-LABEL: Dumping +// CHECK: FunctionTemplateDecl +// CHECK: |-CXXDeductionGuideDecl {{.*}} 'auto (type-parameter-0-0) -> G' +// CHECK: `-CXXDeductionGuideDecl {{.*}} 'auto (int) -> G' implicit_instantiation +// CHECK: |-TemplateArgument type 'int' +// CHECK: | `-BuiltinType {{.*}} 'int' +// CHECK: `-ParmVarDecl {{.*}} 'int' diff --git a/clang/test/SemaTemplate/instantiate-pure-virtual-function.cpp b/clang/test/SemaTemplate/instantiate-pure-virtual-function.cpp new file mode 100644 index 0000000000000..caec42b6b77f9 --- /dev/null +++ b/clang/test/SemaTemplate/instantiate-pure-virtual-function.cpp @@ -0,0 +1,67 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Wundefined-func-template %s + +namespace GH74016 { + template class B { + public: + constexpr void foo(const T &) { bar(1); } + virtual constexpr void bar(unsigned int) = 0; + }; + + template class D : public B { + public: + constexpr void bar(unsigned int) override {} + }; + + void test() { + auto t = D(); + t.foo(0); + } +}; + +namespace call_pure_virtual_function_from_virtual { + template class B { + public: + const void foo(const T &) { B::bar(1); } // expected-warning {{instantiation of function 'call_pure_virtual_function_from_virtual::B::bar' required here, but no definition is available}} + // expected-note@-1 {{add an explicit instantiation declaration to suppress this warning if 'call_pure_virtual_function_from_virtual::B::bar' is explicitly instantiated in another translation unit}} + virtual const void bar(unsigned int) = 0; // expected-note {{forward declaration of template entity is here}} + }; + + template class D : public B { + public: + const void bar(unsigned int) override {} + }; + + void test() { + auto t = D(); + t.foo(0); // expected-note {{in instantiation of member function 'call_pure_virtual_function_from_virtual::B::foo' requested here}} + } +}; + +namespace non_pure_virtual_function { + template class B { + public: + constexpr void foo(const T &) { bar(1); } + + virtual constexpr void bar(unsigned int); // expected-warning {{inline function 'non_pure_virtual_function::B::bar' is not defined}} + // expected-note@-1 {{forward declaration of template entity is here}} + // expected-note@-2 {{forward declaration of template entity is here}} + // expected-note@-3 {{forward declaration of template entity is here}} + }; + + template class D : public B { // expected-warning {{instantiation of function 'non_pure_virtual_function::B::bar' required here, but no definition is available}} +// expected-warning@-1 {{instantiation of function 'non_pure_virtual_function::B::bar' required here, but no definition is available}} +// expected-warning@-2 {{instantiation of function 'non_pure_virtual_function::B::bar' required here, but no definition is available}} +// expected-note@-3 {{add an explicit instantiation declaration to suppress this warning if 'non_pure_virtual_function::B::bar' is explicitly instantiated in another translation unit}} +// expected-note@-4 {{add an explicit instantiation declaration to suppress this warning if 'non_pure_virtual_function::B::bar' is explicitly instantiated in another translation unit}} +// expected-note@-5 {{add an explicit instantiation declaration to suppress this warning if 'non_pure_virtual_function::B::bar' is explicitly instantiated in another translation unit}} +// expected-note@-6 {{used here}} + + public: + constexpr void bar(unsigned int) override { } + }; + + void test() { + auto t = D(); + t.foo(0); + } +}; diff --git a/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp b/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp index dcab9bfaeabcb..6977623a0816e 100644 --- a/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp +++ b/clang/test/SemaTemplate/ms-function-specialization-class-scope.cpp @@ -1,7 +1,6 @@ -// RUN: %clang_cc1 -fms-extensions -fsyntax-only -verify %s -// RUN: %clang_cc1 -fms-extensions -fdelayed-template-parsing -fsyntax-only -verify %s +// RUN: %clang_cc1 -fms-extensions -fsyntax-only -Wno-unused-value -verify %s +// RUN: %clang_cc1 -fms-extensions -fdelayed-template-parsing -fsyntax-only -Wno-unused-value -verify %s -// expected-no-diagnostics class A { public: template A(U p) {} @@ -76,3 +75,42 @@ struct S { int f<0>(int); }; } + +namespace UsesThis { + template + struct A { + int x; + + template + static void f(); + + template<> + void f() { + this->x; // expected-error {{invalid use of 'this' outside of a non-static member function}} + x; // expected-error {{invalid use of member 'x' in static member function}} + A::x; // expected-error {{invalid use of member 'x' in static member function}} + +x; // expected-error {{invalid use of member 'x' in static member function}} + +A::x; // expected-error {{invalid use of member 'x' in static member function}} + } + + template + void g(); + + template<> + void g() { + this->x; + x; + A::x; + +x; + +A::x; + } + + template + static auto h() -> A*; + + template<> + auto h() -> decltype(this); // expected-error {{'this' cannot be used in a static member function declaration}} + }; + + template struct A; // expected-note 2{{in instantiation of}} +} diff --git a/clang/tools/clang-installapi/CMakeLists.txt b/clang/tools/clang-installapi/CMakeLists.txt index b90ffc847b155..9c0d9dff7dc7f 100644 --- a/clang/tools/clang-installapi/CMakeLists.txt +++ b/clang/tools/clang-installapi/CMakeLists.txt @@ -1,4 +1,5 @@ set(LLVM_LINK_COMPONENTS + BinaryFormat Support TargetParser TextAPI diff --git a/clang/tools/clang-installapi/ClangInstallAPI.cpp b/clang/tools/clang-installapi/ClangInstallAPI.cpp index 13061cfa36eeb..fd71aaec59435 100644 --- a/clang/tools/clang-installapi/ClangInstallAPI.cpp +++ b/clang/tools/clang-installapi/ClangInstallAPI.cpp @@ -101,6 +101,16 @@ static bool run(ArrayRef Args, const char *ProgName) { if (Diag->hasErrorOccurred()) return EXIT_FAILURE; + if (!Opts.DriverOpts.DylibToVerify.empty()) { + TargetList Targets; + llvm::for_each(Opts.DriverOpts.Targets, + [&](const auto &T) { Targets.push_back(T.first); }); + if (!Ctx.Verifier->verifyBinaryAttrs(Targets, Ctx.BA, Ctx.Reexports, + Opts.LinkerOpts.AllowableClients, + Opts.LinkerOpts.RPaths, Ctx.FT)) + return EXIT_FAILURE; + }; + // Set up compilation. std::unique_ptr CI(new CompilerInstance()); CI->setFileManager(FM.get()); @@ -108,9 +118,10 @@ static bool run(ArrayRef Args, const char *ProgName) { if (!CI->hasDiagnostics()) return EXIT_FAILURE; - // Execute and gather AST results. + // Execute, verify and gather AST results. // An invocation is ran for each unique target triple and for each header // access level. + Records FrontendRecords; for (const auto &[Targ, Trip] : Opts.DriverOpts.Targets) { Ctx.Verifier->setTarget(Targ); Ctx.Slice = std::make_shared(Trip); @@ -121,6 +132,7 @@ static bool run(ArrayRef Args, const char *ProgName) { InMemoryFileSystem.get(), Opts.getClangFrontendArgs())) return EXIT_FAILURE; } + FrontendRecords.emplace_back(std::move(Ctx.Slice)); } if (Ctx.Verifier->verifyRemainingSymbols() == DylibVerifier::Result::Invalid) @@ -136,10 +148,25 @@ static bool run(ArrayRef Args, const char *ProgName) { // Assign attributes for serialization. InterfaceFile IF(Ctx.Verifier->getExports()); + // Assign attributes that are the same per slice first. for (const auto &TargetInfo : Opts.DriverOpts.Targets) { IF.addTarget(TargetInfo.first); IF.setFromBinaryAttrs(Ctx.BA, TargetInfo.first); } + // Then assign potentially different attributes per slice after. + auto assignLibAttrs = + [&IF]( + const auto &Attrs, + std::function Add) { + for (const auto &Lib : Attrs) + for (const auto &T : IF.targets(Lib.getValue())) + Add(&IF, Lib.getKey(), T); + }; + + assignLibAttrs(Opts.LinkerOpts.AllowableClients, + &InterfaceFile::addAllowableClient); + assignLibAttrs(Opts.LinkerOpts.RPaths, &InterfaceFile::addRPath); + assignLibAttrs(Ctx.Reexports, &InterfaceFile::addReexportedLibrary); // Write output file and perform CI cleanup. if (auto Err = TextAPIWriter::writeToStream(*Out, IF, Ctx.FT)) { diff --git a/clang/tools/clang-installapi/InstallAPIOpts.td b/clang/tools/clang-installapi/InstallAPIOpts.td index 010f2507a1d1f..8b1998c280dd6 100644 --- a/clang/tools/clang-installapi/InstallAPIOpts.td +++ b/clang/tools/clang-installapi/InstallAPIOpts.td @@ -17,11 +17,23 @@ include "llvm/Option/OptParser.td" ///////// // Options -// TextAPI options. +// +/// TextAPI options. +// def filetype : Joined<["--"], "filetype=">, HelpText<"Specify the output file type (tbd-v4 or tbd-v5)">; +def not_for_dyld_shared_cache : Joined<["-"], "not_for_dyld_shared_cache">, + HelpText<"Mark library as shared cache ineligible">; + +// +/// Debugging or logging options. +// +def t: Flag<["-"], "t">, + HelpText<"Logs each dylib loaded for InstallAPI. Useful for debugging problems with search paths where the wrong library is loaded.">; -// Verification options. +// +/// Verification options. +// def verify_against : Separate<["-"], "verify-against">, HelpText<"Verify the specified dynamic library/framework against the headers">; def verify_against_EQ : Joined<["--"], "verify-against=">, Alias; @@ -32,7 +44,9 @@ def demangle : Flag<["--", "-"], "demangle">, def dsym: Joined<["--"], "dsym=">, MetaVarName<"">, HelpText<"Specify dSYM path for enriched diagnostics.">; -// Additional input options. +// +/// Additional input options. +// def extra_project_header : Separate<["-"], "extra-project-header">, MetaVarName<"">, HelpText<"Add additional project header location for parsing">; @@ -75,3 +89,23 @@ def project_umbrella_header : Separate<["-"], "project-umbrella-header">, MetaVarName<"">, HelpText<"Specify the project umbrella header location">; def project_umbrella_header_EQ : Joined<["--"], "project-umbrella-header=">, Alias; + +// +/// Overidden clang options for different behavior. +// + +// Clang's Xarch does not support options that require arguments. +// But is supported for InstallAPI generation. +def Xarch__ : Joined<["-"], "Xarch_">; +def allowable_client : Separate<["-"], "allowable_client">, + HelpText<"Restricts what can link against the dynamic library being created">; +def rpath: Separate<["-"], "rpath">, + HelpText<"Add path to the runpath search path list for the dynamic library being created.">; +def reexport_l : Joined<["-"], "reexport-l">, + HelpText<"Re-export the specified library">; +def reexport_library : Separate<["-"], "reexport_library">, MetaVarName<"">, + HelpText<"Re-export the specified library">; +def reexport_framework : Separate<["-"], "reexport_framework">, + HelpText<"Re-export the specified framework">; + + diff --git a/clang/tools/clang-installapi/Options.cpp b/clang/tools/clang-installapi/Options.cpp index c4f39b7c84174..d1f9118128009 100644 --- a/clang/tools/clang-installapi/Options.cpp +++ b/clang/tools/clang-installapi/Options.cpp @@ -7,14 +7,17 @@ //===----------------------------------------------------------------------===// #include "Options.h" +#include "clang/Basic/DiagnosticIDs.h" #include "clang/Driver/Driver.h" -#include "clang/Frontend/FrontendDiagnostic.h" #include "clang/InstallAPI/FileList.h" #include "clang/InstallAPI/HeaderFile.h" #include "clang/InstallAPI/InstallAPIDiagnostic.h" +#include "llvm/BinaryFormat/Magic.h" #include "llvm/Support/Program.h" #include "llvm/TargetParser/Host.h" #include "llvm/TextAPI/DylibReader.h" +#include "llvm/TextAPI/TextAPIError.h" +#include "llvm/TextAPI/TextAPIReader.h" #include "llvm/TextAPI/TextAPIWriter.h" using namespace llvm; @@ -45,9 +48,21 @@ static constexpr const ArrayRef /// Create table mapping all options defined in InstallAPIOpts.td. static constexpr OptTable::Info InfoTable[] = { #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) \ - {PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, Option::KIND##Class, \ - PARAM, FLAGS, VISIBILITY, OPT_##GROUP, OPT_##ALIAS, ALIASARGS, \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ + VALUES) \ + {PREFIX, \ + NAME, \ + HELPTEXT, \ + HELPTEXTSFORVARIANTS, \ + METAVAR, \ + OPT_##ID, \ + Option::KIND##Class, \ + PARAM, \ + FLAGS, \ + VISIBILITY, \ + OPT_##GROUP, \ + OPT_##ALIAS, \ + ALIASARGS, \ VALUES}, #include "InstallAPIOpts.inc" #undef OPTION @@ -125,6 +140,56 @@ bool Options::processDriverOptions(InputArgList &Args) { return true; } +bool Options::processInstallAPIXOptions(InputArgList &Args) { + for (arg_iterator It = Args.begin(), End = Args.end(); It != End; ++It) { + if ((*It)->getOption().matches(OPT_Xarch__)) { + if (!processXarchOption(Args, It)) + return false; + } + } + // TODO: Add support for the all of the X* options installapi supports. + + return true; +} + +bool Options::processXarchOption(InputArgList &Args, arg_iterator Curr) { + Arg *CurrArg = *Curr; + Architecture Arch = getArchitectureFromName(CurrArg->getValue(0)); + if (Arch == AK_unknown) { + Diags->Report(diag::err_drv_invalid_arch_name) + << CurrArg->getAsString(Args); + return false; + } + + auto NextIt = std::next(Curr); + if (NextIt == Args.end()) { + Diags->Report(diag::err_drv_missing_argument) + << CurrArg->getAsString(Args) << 1; + return false; + } + + // InstallAPI has a limited understanding of supported Xarch options. + // Currently this is restricted to linker inputs. + const Arg *NextArg = *NextIt; + switch (NextArg->getOption().getID()) { + case OPT_allowable_client: + case OPT_reexport_l: + case OPT_reexport_framework: + case OPT_reexport_library: + case OPT_rpath: + break; + default: + Diags->Report(diag::err_drv_invalid_argument_to_option) + << NextArg->getAsString(Args) << CurrArg->getAsString(Args); + return false; + } + + ArgToArchMap[NextArg] = Arch; + CurrArg->claim(); + + return true; +} + bool Options::processLinkerOptions(InputArgList &Args) { // Handle required arguments. if (const Arg *A = Args.getLastArg(drv::OPT_install__name)) @@ -141,6 +206,12 @@ bool Options::processLinkerOptions(InputArgList &Args) { if (auto *Arg = Args.getLastArg(drv::OPT_compatibility__version)) LinkerOpts.CompatVersion.parse64(Arg->getValue()); + if (auto *Arg = Args.getLastArg(drv::OPT_compatibility__version)) + LinkerOpts.CompatVersion.parse64(Arg->getValue()); + + if (auto *Arg = Args.getLastArg(drv::OPT_umbrella)) + LinkerOpts.ParentUmbrella = Arg->getValue(); + LinkerOpts.IsDylib = Args.hasArg(drv::OPT_dynamiclib); LinkerOpts.AppExtensionSafe = Args.hasFlag( @@ -152,12 +223,24 @@ bool Options::processLinkerOptions(InputArgList &Args) { if (::getenv("LD_APPLICATION_EXTENSION_SAFE") != nullptr) LinkerOpts.AppExtensionSafe = true; + + // Capture library paths. + PathSeq LibraryPaths; + for (const Arg *A : Args.filtered(drv::OPT_L)) { + LibraryPaths.emplace_back(A->getValue()); + A->claim(); + } + + if (!LibraryPaths.empty()) + LinkerOpts.LibPaths = std::move(LibraryPaths); + return true; } +// NOTE: Do not claim any arguments, as they will be passed along for CC1 +// invocations. bool Options::processFrontendOptions(InputArgList &Args) { - // Do not claim any arguments, as they will be passed along for CC1 - // invocations. + // Capture language mode. if (auto *A = Args.getLastArgNoClaim(drv::OPT_x)) { FEOpts.LangMode = llvm::StringSwitch(A->getValue()) .Case("c", clang::Language::C) @@ -179,6 +262,54 @@ bool Options::processFrontendOptions(InputArgList &Args) { FEOpts.LangMode = clang::Language::ObjCXX; } + // Capture Sysroot. + if (const Arg *A = Args.getLastArgNoClaim(drv::OPT_isysroot)) { + SmallString Path(A->getValue()); + FM->makeAbsolutePath(Path); + if (!FM->getOptionalDirectoryRef(Path)) { + Diags->Report(diag::err_missing_sysroot) << Path; + return false; + } + FEOpts.ISysroot = std::string(Path); + } else if (FEOpts.ISysroot.empty()) { + // Mirror CLANG and obtain the isysroot from the SDKROOT environment + // variable, if it wasn't defined by the command line. + if (auto *Env = ::getenv("SDKROOT")) { + if (StringRef(Env) != "/" && llvm::sys::path::is_absolute(Env) && + FM->getOptionalFileRef(Env)) + FEOpts.ISysroot = Env; + } + } + + // Capture system frameworks. + // TODO: Support passing framework paths per platform. + for (const Arg *A : Args.filtered(drv::OPT_iframework)) + FEOpts.SystemFwkPaths.emplace_back(A->getValue()); + + // Capture framework paths. + PathSeq FrameworkPaths; + for (const Arg *A : Args.filtered(drv::OPT_F)) + FrameworkPaths.emplace_back(A->getValue()); + + if (!FrameworkPaths.empty()) + FEOpts.FwkPaths = std::move(FrameworkPaths); + + // Add default framework/library paths. + PathSeq DefaultLibraryPaths = {"/usr/lib", "/usr/local/lib"}; + PathSeq DefaultFrameworkPaths = {"/Library/Frameworks", + "/System/Library/Frameworks"}; + + for (const StringRef LibPath : DefaultLibraryPaths) { + SmallString Path(FEOpts.ISysroot); + sys::path::append(Path, LibPath); + LinkerOpts.LibPaths.emplace_back(Path.str()); + } + for (const StringRef FwkPath : DefaultFrameworkPaths) { + SmallString Path(FEOpts.ISysroot); + sys::path::append(Path, FwkPath); + FEOpts.SystemFwkPaths.emplace_back(Path.str()); + } + return true; } @@ -212,6 +343,9 @@ Options::processAndFilterOutInstallAPIOptions(ArrayRef Args) { MissingArgCount, Visibility()); // Capture InstallAPI only driver options. + if (!processInstallAPIXOptions(ParsedArgs)) + return {}; + DriverOpts.Demangle = ParsedArgs.hasArg(OPT_demangle); if (auto *A = ParsedArgs.getLastArg(OPT_filetype)) { @@ -244,6 +378,42 @@ Options::processAndFilterOutInstallAPIOptions(ArrayRef Args) { if (const Arg *A = ParsedArgs.getLastArg(OPT_dsym)) DriverOpts.DSYMPath = A->getValue(); + DriverOpts.TraceLibraryLocation = ParsedArgs.hasArg(OPT_t); + + // Linker options not handled by clang driver. + LinkerOpts.OSLibNotForSharedCache = + ParsedArgs.hasArg(OPT_not_for_dyld_shared_cache); + + for (const Arg *A : ParsedArgs.filtered(OPT_allowable_client)) { + LinkerOpts.AllowableClients[A->getValue()] = + ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + A->claim(); + } + + for (const Arg *A : ParsedArgs.filtered(OPT_reexport_l)) { + LinkerOpts.ReexportedLibraries[A->getValue()] = + ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + A->claim(); + } + + for (const Arg *A : ParsedArgs.filtered(OPT_reexport_library)) { + LinkerOpts.ReexportedLibraryPaths[A->getValue()] = + ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + A->claim(); + } + + for (const Arg *A : ParsedArgs.filtered(OPT_reexport_framework)) { + LinkerOpts.ReexportedFrameworks[A->getValue()] = + ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + A->claim(); + } + + for (const Arg *A : ParsedArgs.filtered(OPT_rpath)) { + LinkerOpts.RPaths[A->getValue()] = + ArgToArchMap.count(A) ? ArgToArchMap[A] : ArchitectureSet(); + A->claim(); + } + // Handle exclude & extra header directories or files. auto handleAdditionalInputArgs = [&](PathSeq &Headers, clang::installapi::ID OptID) { @@ -324,6 +494,22 @@ Options::Options(DiagnosticsEngine &Diag, FileManager *FM, if (!processFrontendOptions(ArgList)) return; + // After all InstallAPI necessary arguments have been collected. Go back and + // assign values that were unknown before the clang driver opt table was used. + ArchitectureSet AllArchs; + llvm::for_each(DriverOpts.Targets, + [&AllArchs](const auto &T) { AllArchs.set(T.first.Arch); }); + auto assignDefaultLibAttrs = [&AllArchs](LibAttrs &Attrs) { + for (StringMapEntry &Entry : Attrs) + if (Entry.getValue().empty()) + Entry.setValue(AllArchs); + }; + assignDefaultLibAttrs(LinkerOpts.AllowableClients); + assignDefaultLibAttrs(LinkerOpts.ReexportedFrameworks); + assignDefaultLibAttrs(LinkerOpts.ReexportedLibraries); + assignDefaultLibAttrs(LinkerOpts.ReexportedLibraryPaths); + assignDefaultLibAttrs(LinkerOpts.RPaths); + /// Force cc1 options that should always be on. FrontendArgs = {"-fsyntax-only", "-Wprivate-extern"}; @@ -345,6 +531,89 @@ static StringRef getFrameworkNameFromInstallName(StringRef InstallName) { return Match.back(); } +static Expected> +getInterfaceFile(const StringRef Filename) { + ErrorOr> BufferOrErr = + MemoryBuffer::getFile(Filename); + if (auto Err = BufferOrErr.getError()) + return errorCodeToError(std::move(Err)); + + auto Buffer = std::move(*BufferOrErr); + std::unique_ptr IF; + switch (identify_magic(Buffer->getBuffer())) { + case file_magic::macho_dynamically_linked_shared_lib: + LLVM_FALLTHROUGH; + case file_magic::macho_dynamically_linked_shared_lib_stub: + LLVM_FALLTHROUGH; + case file_magic::macho_universal_binary: + return DylibReader::get(Buffer->getMemBufferRef()); + break; + case file_magic::tapi_file: + return TextAPIReader::get(Buffer->getMemBufferRef()); + default: + return make_error(TextAPIErrorCode::InvalidInputFormat, + "unsupported library file format"); + } + llvm_unreachable("unexpected failure in getInterface"); +} + +std::pair Options::getReexportedLibraries() { + LibAttrs Reexports; + ReexportedInterfaces ReexportIFs; + auto AccumulateReexports = [&](StringRef Path, const ArchitectureSet &Archs) { + auto ReexportIFOrErr = getInterfaceFile(Path); + if (!ReexportIFOrErr) + return false; + std::unique_ptr Reexport = std::move(*ReexportIFOrErr); + StringRef InstallName = Reexport->getInstallName(); + assert(!InstallName.empty() && "Parse error for install name"); + Reexports.insert({InstallName, Archs}); + ReexportIFs.emplace_back(std::move(*Reexport)); + return true; + }; + + // Populate search paths by looking at user paths before system ones. + PathSeq FwkSearchPaths(FEOpts.FwkPaths.begin(), FEOpts.FwkPaths.end()); + // FIXME: System framework paths need to reset if installapi is invoked with + // different platforms. + FwkSearchPaths.insert(FwkSearchPaths.end(), FEOpts.SystemFwkPaths.begin(), + FEOpts.SystemFwkPaths.end()); + + for (const StringMapEntry &Lib : + LinkerOpts.ReexportedLibraries) { + std::string Name = "lib" + Lib.getKey().str() + ".dylib"; + std::string Path = findLibrary(Name, *FM, {}, LinkerOpts.LibPaths, {}); + if (Path.empty()) { + Diags->Report(diag::err_cannot_find_reexport) << true << Lib.getKey(); + return {}; + } + if (DriverOpts.TraceLibraryLocation) + errs() << Path << "\n"; + + AccumulateReexports(Path, Lib.getValue()); + } + + for (const StringMapEntry &Lib : + LinkerOpts.ReexportedLibraryPaths) + AccumulateReexports(Lib.getKey(), Lib.getValue()); + + for (const StringMapEntry &Lib : + LinkerOpts.ReexportedFrameworks) { + std::string Name = (Lib.getKey() + ".framework/" + Lib.getKey()).str(); + std::string Path = findLibrary(Name, *FM, FwkSearchPaths, {}, {}); + if (Path.empty()) { + Diags->Report(diag::err_cannot_find_reexport) << false << Lib.getKey(); + return {}; + } + if (DriverOpts.TraceLibraryLocation) + errs() << Path << "\n"; + + AccumulateReexports(Path, Lib.getValue()); + } + + return {std::move(Reexports), std::move(ReexportIFs)}; +} + InstallAPIContext Options::createContext() { InstallAPIContext Ctx; Ctx.FM = FM; @@ -357,10 +626,17 @@ InstallAPIContext Options::createContext() { Ctx.BA.CurrentVersion = LinkerOpts.CurrentVersion; Ctx.BA.CompatVersion = LinkerOpts.CompatVersion; Ctx.BA.AppExtensionSafe = LinkerOpts.AppExtensionSafe; + Ctx.BA.ParentUmbrella = LinkerOpts.ParentUmbrella; + Ctx.BA.OSLibNotForSharedCache = LinkerOpts.OSLibNotForSharedCache; Ctx.FT = DriverOpts.OutFT; Ctx.OutputLoc = DriverOpts.OutputPath; Ctx.LangMode = FEOpts.LangMode; + auto [Reexports, ReexportedIFs] = getReexportedLibraries(); + if (Diags->hasErrorOccurred()) + return Ctx; + Ctx.Reexports = Reexports; + // Attempt to find umbrella headers by capturing framework name. StringRef FrameworkName; if (!LinkerOpts.IsDylib) @@ -520,13 +796,14 @@ InstallAPIContext Options::createContext() { Expected Slices = DylibReader::readFile((*Buffer)->getMemBufferRef(), PO); if (auto Err = Slices.takeError()) { - Diags->Report(diag::err_cannot_open_file) << DriverOpts.DylibToVerify; + Diags->Report(diag::err_cannot_open_file) + << DriverOpts.DylibToVerify << std::move(Err); return Ctx; } Ctx.Verifier = std::make_unique( - std::move(*Slices), Diags, DriverOpts.VerifyMode, DriverOpts.Demangle, - DriverOpts.DSYMPath); + std::move(*Slices), std::move(ReexportedIFs), Diags, + DriverOpts.VerifyMode, DriverOpts.Demangle, DriverOpts.DSYMPath); return Ctx; } diff --git a/clang/tools/clang-installapi/Options.h b/clang/tools/clang-installapi/Options.h index 82e04b49d1259..6da1469de2c89 100644 --- a/clang/tools/clang-installapi/Options.h +++ b/clang/tools/clang-installapi/Options.h @@ -20,7 +20,6 @@ #include "llvm/Option/Option.h" #include "llvm/Support/Program.h" #include "llvm/TargetParser/Triple.h" -#include #include #include @@ -81,9 +80,30 @@ struct DriverOptions { /// \brief Print verbose output. bool Verbose = false; + + /// \brief Log libraries loaded. + bool TraceLibraryLocation = false; }; struct LinkerOptions { + /// \brief List of allowable clients to use for the dynamic library. + LibAttrs AllowableClients; + + /// \brief List of reexported libraries to use for the dynamic library. + LibAttrs ReexportedLibraries; + + /// \brief List of reexported libraries to use for the dynamic library. + LibAttrs ReexportedLibraryPaths; + + /// \brief List of reexported frameworks to use for the dynamic library. + LibAttrs ReexportedFrameworks; + + /// \brief List of rpaths to use for the dynamic library. + LibAttrs RPaths; + + /// \brief Additional library search paths. + PathSeq LibPaths; + /// \brief The install name to use for the dynamic library. std::string InstallName; @@ -93,18 +113,34 @@ struct LinkerOptions { /// \brief The compatibility version to use for the dynamic library. PackedVersion CompatVersion; + /// \brief Name of the umbrella library. + std::string ParentUmbrella; + /// \brief Is application extension safe. bool AppExtensionSafe = false; /// \brief Set if we should scan for a dynamic library and not a framework. bool IsDylib = false; + + /// \brief Is an OS library that is not shared cache eligible. + bool OSLibNotForSharedCache = false; }; struct FrontendOptions { /// \brief The language mode to parse headers in. Language LangMode = Language::ObjC; + + /// \brief The sysroot to search for SDK headers or libraries. + std::string ISysroot; + + /// \brief Additional framework search paths. + PathSeq FwkPaths; + + /// \brief Additional SYSTEM framework search paths. + PathSeq SystemFwkPaths; }; +using arg_iterator = llvm::opt::arg_iterator; class Options { private: bool processDriverOptions(llvm::opt::InputArgList &Args); @@ -112,6 +148,8 @@ class Options { bool processFrontendOptions(llvm::opt::InputArgList &Args); std::vector processAndFilterOutInstallAPIOptions(ArrayRef Args); + bool processInstallAPIXOptions(llvm::opt::InputArgList &Args); + bool processXarchOption(llvm::opt::InputArgList &Args, arg_iterator Curr); public: /// The various options grouped together. @@ -136,15 +174,19 @@ class Options { bool addFilePaths(llvm::opt::InputArgList &Args, PathSeq &Headers, llvm::opt::OptSpecifier ID); + std::pair getReexportedLibraries(); + DiagnosticsEngine *Diags; FileManager *FM; std::vector FrontendArgs; + llvm::DenseMap ArgToArchMap; }; enum ID { OPT_INVALID = 0, // This is not an option ID. #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ + VALUES) \ OPT_##ID, #include "InstallAPIOpts.inc" LastOption diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 63b685f3d1802..ec0ed432c1335 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -1017,6 +1017,7 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); SmallVector CmdArgs{ *ClangPath, + "--no-default-config", "-o", *TempFileOrErr, Args.MakeArgString("--target=" + Triple.getTriple()), diff --git a/clang/tools/scan-build/libexec/ccc-analyzer b/clang/tools/scan-build/libexec/ccc-analyzer index 60796a543fcd0..c5588814e8f0d 100755 --- a/clang/tools/scan-build/libexec/ccc-analyzer +++ b/clang/tools/scan-build/libexec/ccc-analyzer @@ -361,6 +361,7 @@ sub Analyze { my %CompileOptionMap = ( '-nostdinc' => 0, + '-nostdlibinc' => 0, '-include' => 1, '-idirafter' => 1, '-imacros' => 1, diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 35ab7e3b7fe31..acc596fef87b7 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -5317,6 +5317,34 @@ TEST_P(ASTImporterOptionSpecificTestBase, EXPECT_FALSE(ToX); } +TEST_P(ASTImporterOptionSpecificTestBase, VarTemplateDeclInlineWithCXX17) { + Decl *FromTU = getTuDecl( + R"( + struct S { + template static constexpr bool X = true; + }; + )", + Lang_CXX17, "input1.cc"); + Decl *FromTU2 = getTuDecl( + R"( + struct S { + template static constexpr bool X = true; + template void get() { X; } + }; + template U qvariant_cast(const S &v) { return v.get; } + )", + Lang_CXX17, "input2.cc"); + auto *FromX = FirstDeclMatcher().match( + FromTU, varTemplateDecl(hasName("X"))); + auto *ToX = Import(FromX, Lang_CXX17); + ASSERT_TRUE(ToX); + auto *FromX2 = FirstDeclMatcher().match( + FromTU2, varTemplateDecl(hasName("X"))); + auto *ToX2 = Import(FromX2, Lang_CXX17); + EXPECT_TRUE(ToX2); + EXPECT_EQ(ToX, ToX2); +} + TEST_P(ASTImporterOptionSpecificTestBase, VarTemplateParameterDeclContext) { constexpr auto Code = R"( diff --git a/clang/unittests/AST/Interp/CMakeLists.txt b/clang/unittests/AST/Interp/CMakeLists.txt index 8fa5c85064dbc..ea727cdd4412b 100644 --- a/clang/unittests/AST/Interp/CMakeLists.txt +++ b/clang/unittests/AST/Interp/CMakeLists.txt @@ -1,5 +1,6 @@ add_clang_unittest(InterpTests Descriptor.cpp + toAPValue.cpp ) clang_target_link_libraries(InterpTests diff --git a/clang/unittests/AST/Interp/toAPValue.cpp b/clang/unittests/AST/Interp/toAPValue.cpp new file mode 100644 index 0000000000000..d0dfb40d51495 --- /dev/null +++ b/clang/unittests/AST/Interp/toAPValue.cpp @@ -0,0 +1,90 @@ +#include "../../../lib/AST/Interp/Context.h" +#include "../../../lib/AST/Interp/Descriptor.h" +#include "../../../lib/AST/Interp/Program.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Tooling/Tooling.h" +#include "gtest/gtest.h" + +using namespace clang; +using namespace clang::interp; +using namespace clang::ast_matchers; + +/// Test the various toAPValue implementations. +TEST(ToAPValue, Pointers) { + constexpr char Code[] = + "struct A { bool a; bool z; };\n" + "struct S {\n" + " A a[3];\n" + "};\n" + "constexpr S d = {{{true, false}, {false, true}, {false, false}}};\n" + "constexpr const bool *b = &d.a[1].z;\n"; + + auto AST = tooling::buildASTFromCodeWithArgs( + Code, {"-fexperimental-new-constant-interpreter"}); + + auto &Ctx = AST->getASTContext().getInterpContext(); + Program &Prog = Ctx.getProgram(); + + auto getDecl = [&](const char *Name) -> const ValueDecl * { + auto Nodes = + match(valueDecl(hasName(Name)).bind("var"), AST->getASTContext()); + assert(Nodes.size() == 1); + const auto *D = Nodes[0].getNodeAs("var"); + assert(D); + return D; + }; + auto getGlobalPtr = [&](const char *Name) -> Pointer { + const VarDecl *D = cast(getDecl(Name)); + return Prog.getPtrGlobal(*Prog.getGlobal(D)); + }; + + const Pointer &GP = getGlobalPtr("b"); + const Pointer &P = GP.deref(); + ASSERT_TRUE(P.isLive()); + APValue A = P.toAPValue(); + ASSERT_TRUE(A.isLValue()); + ASSERT_TRUE(A.hasLValuePath()); + const auto &Path = A.getLValuePath(); + ASSERT_EQ(Path.size(), 3u); + ASSERT_EQ(A.getLValueBase(), getDecl("d")); +} + +TEST(ToAPValue, FunctionPointers) { + constexpr char Code[] = " constexpr bool foo() { return true; }\n" + " constexpr bool (*func)() = foo;\n"; + + auto AST = tooling::buildASTFromCodeWithArgs( + Code, {"-fexperimental-new-constant-interpreter"}); + + auto &Ctx = AST->getASTContext().getInterpContext(); + Program &Prog = Ctx.getProgram(); + + auto getDecl = [&](const char *Name) -> const ValueDecl * { + auto Nodes = + match(valueDecl(hasName(Name)).bind("var"), AST->getASTContext()); + assert(Nodes.size() == 1); + const auto *D = Nodes[0].getNodeAs("var"); + assert(D); + return D; + }; + + auto getGlobalPtr = [&](const char *Name) -> Pointer { + const VarDecl *D = cast(getDecl(Name)); + return Prog.getPtrGlobal(*Prog.getGlobal(D)); + }; + + const Pointer &GP = getGlobalPtr("func"); + const FunctionPointer &FP = GP.deref(); + ASSERT_FALSE(FP.isZero()); + APValue A = FP.toAPValue(); + ASSERT_TRUE(A.hasValue()); + ASSERT_TRUE(A.isLValue()); + ASSERT_TRUE(A.hasLValuePath()); + const auto &Path = A.getLValuePath(); + ASSERT_EQ(Path.size(), 0u); + ASSERT_FALSE(A.getLValueBase().isNull()); + ASSERT_EQ(A.getLValueBase().dyn_cast(), getDecl("foo")); +} diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 0edc65162fbe3..b76627cb9be63 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -2321,6 +2321,8 @@ TEST_P(ASTMatchersTest, LambdaCaptureTest_BindsToCaptureOfVarDecl) { matches("int main() { int cc; auto f = [=](){ return cc; }; }", matcher)); EXPECT_TRUE( matches("int main() { int cc; auto f = [&](){ return cc; }; }", matcher)); + EXPECT_TRUE(matches( + "void f(int a) { int cc[a]; auto f = [&](){ return cc;}; }", matcher)); } TEST_P(ASTMatchersTest, LambdaCaptureTest_BindsToCaptureWithInitializer) { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index ec0267da468ab..f312a9e21158a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -1865,6 +1865,13 @@ TEST_F(FormatTest, UnderstandsMacros) { verifyFormat("MACRO(co_return##something)"); verifyFormat("#define A x:"); + + verifyFormat("#define Foo(Bar) {#Bar}", "#define Foo(Bar) \\\n" + " { \\\n" + " #Bar \\\n" + " }"); + verifyFormat("#define Foo(Bar) {#Bar}", "#define Foo(Bar) \\\n" + " { #Bar }"); } TEST_F(FormatTest, ShortBlocksInMacrosDontMergeWithCodeAfterMacro) { @@ -7977,39 +7984,37 @@ TEST_F(FormatTest, AllowAllArgumentsOnNextLineDontAlign) { } TEST_F(FormatTest, BreakFunctionDefinitionParameters) { - FormatStyle Style = getLLVMStyle(); - EXPECT_FALSE(Style.BreakFunctionDefinitionParameters); StringRef Input = "void functionDecl(paramA, paramB, paramC);\n" "void emptyFunctionDefinition() {}\n" "void functionDefinition(int A, int B, int C) {}\n" - "Class::Class(int A, int B) : m_A(A), m_B(B) {}\n"; - verifyFormat(StringRef("void functionDecl(paramA, paramB, paramC);\n" - "void emptyFunctionDefinition() {}\n" - "void functionDefinition(int A, int B, int C) {}\n" - "Class::Class(int A, int B) : m_A(A), m_B(B) {}\n"), - Input, Style); + "Class::Class(int A, int B) : m_A(A), m_B(B) {}"; + verifyFormat(Input); + + FormatStyle Style = getLLVMStyle(); + EXPECT_FALSE(Style.BreakFunctionDefinitionParameters); Style.BreakFunctionDefinitionParameters = true; - verifyFormat(StringRef("void functionDecl(paramA, paramB, paramC);\n" - "void emptyFunctionDefinition() {}\n" - "void functionDefinition(\n" - " int A, int B, int C) {}\n" - "Class::Class(\n" - " int A, int B)\n" - " : m_A(A), m_B(B) {}\n"), + verifyFormat("void functionDecl(paramA, paramB, paramC);\n" + "void emptyFunctionDefinition() {}\n" + "void functionDefinition(\n" + " int A, int B, int C) {}\n" + "Class::Class(\n" + " int A, int B)\n" + " : m_A(A), m_B(B) {}", Input, Style); - // Test the style where all parameters are on their own lines + + // Test the style where all parameters are on their own lines. Style.AllowAllParametersOfDeclarationOnNextLine = false; Style.BinPackParameters = false; - verifyFormat(StringRef("void functionDecl(paramA, paramB, paramC);\n" - "void emptyFunctionDefinition() {}\n" - "void functionDefinition(\n" - " int A,\n" - " int B,\n" - " int C) {}\n" - "Class::Class(\n" - " int A,\n" - " int B)\n" - " : m_A(A), m_B(B) {}\n"), + verifyFormat("void functionDecl(paramA, paramB, paramC);\n" + "void emptyFunctionDefinition() {}\n" + "void functionDefinition(\n" + " int A,\n" + " int B,\n" + " int C) {}\n" + "Class::Class(\n" + " int A,\n" + " int B)\n" + " : m_A(A), m_B(B) {}", Input, Style); } @@ -11035,7 +11040,7 @@ TEST_F(FormatTest, UnderstandsTemplateParameters) { verifyFormat("some_templated_type"); verifyFormat("#define FOO(typeName, realClass) \\\n" - " { #typeName, foo(new foo(#typeName)) }", + " {#typeName, foo(new foo(#typeName))}", getLLVMStyleWithColumns(60)); } @@ -27272,6 +27277,18 @@ TEST_F(FormatTest, PPBranchesInBracedInit) { "};"); } +TEST_F(FormatTest, PPDirectivesAndCommentsInBracedInit) { + verifyFormat("{\n" + " char *a[] = {\n" + " /* abc */ \"abc\",\n" + "#if FOO\n" + " /* xyz */ \"xyz\",\n" + "#endif\n" + " /* last */ \"last\"};\n" + "}", + getLLVMStyleWithColumns(30)); +} + TEST_F(FormatTest, StreamOutputOperator) { verifyFormat("std::cout << \"foo\" << \"bar\" << baz;"); verifyFormat("std::cout << \"foo\\n\"\n" diff --git a/clang/unittests/Format/FormatTestTableGen.cpp b/clang/unittests/Format/FormatTestTableGen.cpp index 8ca6bf97e5a6b..79b6961b00b42 100644 --- a/clang/unittests/Format/FormatTestTableGen.cpp +++ b/clang/unittests/Format/FormatTestTableGen.cpp @@ -72,7 +72,7 @@ TEST_F(FormatTestTableGen, LiteralsAndIdentifiers) { " let 0startID = $TokVarName;\n" " let 0xstartInteger = 0x42;\n" " let someIdentifier = $TokVarName;\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, BangOperators) { @@ -101,22 +101,22 @@ TEST_F(FormatTestTableGen, BangOperators) { " \"zerozero\",\n" " true: // default\n" " \"positivepositive\");\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, Include) { - verifyFormat("include \"test/IncludeFile.h\"\n"); + verifyFormat("include \"test/IncludeFile.h\""); } TEST_F(FormatTestTableGen, Types) { - verifyFormat("def Types : list, bits<3>, list> {}\n"); + verifyFormat("def Types : list, bits<3>, list> {}"); } TEST_F(FormatTestTableGen, SimpleValue1_SingleLiterals) { verifyFormat("def SimpleValue {\n" " let Integer = 42;\n" " let String = \"some string\";\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, SimpleValue1_MultilineString) { @@ -129,7 +129,7 @@ TEST_F(FormatTestTableGen, SimpleValue1_MultilineString) { "delimited by \\[{ and }\\]. It can break across lines and the line " "breaks are retained in the string. \n" "(https://llvm.org/docs/TableGen/ProgRef.html#grammar-token-TokCode)}];\n" - "}\n"; + "}"; StringRef DefWithCodeMessedUp = "def SimpleValueCode { let \n" "Code= \n" @@ -139,7 +139,7 @@ TEST_F(FormatTestTableGen, SimpleValue1_MultilineString) { "breaks are retained in the string. \n" "(https://llvm.org/docs/TableGen/ProgRef.html#grammar-token-TokCode)}] \n" " ; \n" - " } \n"; + " } "; verifyFormat(DefWithCode, DefWithCodeMessedUp); } @@ -147,15 +147,15 @@ TEST_F(FormatTestTableGen, SimpleValue2) { verifyFormat("def SimpleValue2 {\n" " let True = true;\n" " let False = false;\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, SimpleValue3) { - verifyFormat("class SimpleValue3 { int Question = ?; }\n"); + verifyFormat("class SimpleValue3 { int Question = ?; }"); } TEST_F(FormatTestTableGen, SimpleValue4) { - verifyFormat("def SimpleValue4 { let ValueList = {1, 2, 3}; }\n"); + verifyFormat("def SimpleValue4 { let ValueList = {1, 2, 3}; }"); } TEST_F(FormatTestTableGen, SimpleValue5) { @@ -166,7 +166,7 @@ TEST_F(FormatTestTableGen, SimpleValue5) { " list>;\n" " let SquareBitsListWithType = [ {1, 2},\n" " {3, 4} ]>>;\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, SimpleValue6) { @@ -184,15 +184,15 @@ TEST_F(FormatTestTableGen, SimpleValue6) { " );\n" " let DAGArgBang = (!cast(\"Some\") i32:$src1,\n" " i32:$src2);\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, SimpleValue7) { - verifyFormat("def SimpleValue7 { let Identifier = SimpleValue; }\n"); + verifyFormat("def SimpleValue7 { let Identifier = SimpleValue; }"); } TEST_F(FormatTestTableGen, SimpleValue8) { - verifyFormat("def SimpleValue8 { let Class = SimpleValue3<3>; }\n"); + verifyFormat("def SimpleValue8 { let Class = SimpleValue3<3>; }"); } TEST_F(FormatTestTableGen, ValueSuffix) { @@ -203,19 +203,18 @@ TEST_F(FormatTestTableGen, ValueSuffix) { " let Slice1 = value[1, ];\n" " let Slice2 = value[4...7, 17, 2...3, 4];\n" " let Field = value.field;\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, PasteOperator) { - verifyFormat( - "def Paste#\"Operator\" { string Paste = \"Paste\"#operator; }\n"); + verifyFormat("def Paste#\"Operator\" { string Paste = \"Paste\"#operator; }"); verifyFormat("def [\"Traring\", \"Paste\"]# {\n" " string X = Traring#;\n" " string Y = List<\"Operator\">#;\n" " string Z = [\"Traring\", \"Paste\", \"Traring\", \"Paste\",\n" " \"Traring\", \"Paste\"]#;\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, ClassDefinition) { @@ -229,9 +228,9 @@ TEST_F(FormatTestTableGen, ClassDefinition) { " defvar Item6 = 6;\n" " let Item7 = ?;\n" " assert !ge(x, 0), \"Assert7\";\n" - "}\n"); + "}"); - verifyFormat("class FPFormat val> { bits<3> Value = val; }\n"); + verifyFormat("class FPFormat val> { bits<3> Value = val; }"); } TEST_F(FormatTestTableGen, Def) { @@ -240,18 +239,18 @@ TEST_F(FormatTestTableGen, Def) { " let Item2{1, 3...4} = {1, 2};\n" " defvar Item3 = (ops nodty:$node1, nodty:$node2);\n" " assert !le(Item2, 0), \"Assert4\";\n" - "}\n"); + "}"); - verifyFormat("class FPFormat val> { bits<3> Value = val; }\n"); + verifyFormat("class FPFormat val> { bits<3> Value = val; }"); - verifyFormat("def NotFP : FPFormat<0>;\n"); + verifyFormat("def NotFP : FPFormat<0>;"); } TEST_F(FormatTestTableGen, Let) { verifyFormat("let x = 1, y = value,\n" " z = !and(!gt(!add(1, 2), !sub(3, 4)), !isa($x)) in {\n" " class Class1 : Parent { let Item1 = z; }\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, MultiClass) { @@ -287,25 +286,35 @@ TEST_F(FormatTestTableGen, MultiClass) { " }\n" " }\n" " }\n" - "}\n"); + "}"); +} + +TEST_F(FormatTestTableGen, MultiClassesWithPasteOperator) { + // This is a sensitive example for the handling of the paste operators in + // brace type calculation. + verifyFormat("multiclass MultiClass1 {\n" + " def : Def#x;\n" + " def : Def#y;\n" + "}\n" + "multiclass MultiClass2 { def : Def#x; }"); } TEST_F(FormatTestTableGen, Defm) { - verifyFormat("defm : Multiclass<0>;\n"); + verifyFormat("defm : Multiclass<0>;"); - verifyFormat("defm Defm1 : Multiclass<1>;\n"); + verifyFormat("defm Defm1 : Multiclass<1>;"); } TEST_F(FormatTestTableGen, Defset) { verifyFormat("defset list DefSet1 = {\n" " def Def1 : Class<1>;\n" " def Def2 : Class<2>;\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, Defvar) { verifyFormat("defvar DefVar1 = !cond(!ge(!size(PaseOperator.Paste), 1): 1,\n" - " true: 0);\n"); + " true: 0);"); } TEST_F(FormatTestTableGen, ForEach) { @@ -315,21 +324,21 @@ TEST_F(FormatTestTableGen, ForEach) { " (!if(!lt(x, i),\n" " !shl(!mul(x, i), !size(\"string\")),\n" " !size(!strconcat(\"a\", \"b\", \"c\"))))>;\n" - "}\n"); + "}"); } -TEST_F(FormatTestTableGen, Dump) { verifyFormat("dump \"Dump\";\n"); } +TEST_F(FormatTestTableGen, Dump) { verifyFormat("dump \"Dump\";"); } TEST_F(FormatTestTableGen, If) { verifyFormat("if !gt(x, 0) then {\n" " def : IfThen;\n" "} else {\n" " def : IfElse;\n" - "}\n"); + "}"); } TEST_F(FormatTestTableGen, Assert) { - verifyFormat("assert !le(DefVar1, 0), \"Assert1\";\n"); + verifyFormat("assert !le(DefVar1, 0), \"Assert1\";"); } TEST_F(FormatTestTableGen, DAGArgBreakElements) { @@ -339,7 +348,7 @@ TEST_F(FormatTestTableGen, DAGArgBreakElements) { ASSERT_EQ(Style.TableGenBreakInsideDAGArg, FormatStyle::DAS_DontBreak); verifyFormat("def Def : Parent {\n" " let dagarg = (ins a:$src1, aa:$src2, aaa:$src3)\n" - "}\n", + "}", Style); // This option forces to break inside the DAGArg. Style.TableGenBreakInsideDAGArg = FormatStyle::DAS_BreakElements; @@ -347,13 +356,13 @@ TEST_F(FormatTestTableGen, DAGArgBreakElements) { " let dagarg = (ins a:$src1,\n" " aa:$src2,\n" " aaa:$src3);\n" - "}\n", + "}", Style); verifyFormat("def Def : Parent {\n" " let dagarg = (other a:$src1,\n" " aa:$src2,\n" " aaa:$src3);\n" - "}\n", + "}", Style); // Then, limit the DAGArg operator only to "ins". Style.TableGenBreakingDAGArgOperators = {"ins"}; @@ -361,11 +370,11 @@ TEST_F(FormatTestTableGen, DAGArgBreakElements) { " let dagarg = (ins a:$src1,\n" " aa:$src2,\n" " aaa:$src3);\n" - "}\n", + "}", Style); verifyFormat("def Def : Parent {\n" " let dagarg = (other a:$src1, aa:$src2, aaa:$src3)\n" - "}\n", + "}", Style); } @@ -375,7 +384,7 @@ TEST_F(FormatTestTableGen, DAGArgBreakAll) { // By default, the DAGArg does not have a break inside. verifyFormat("def Def : Parent {\n" " let dagarg = (ins a:$src1, aa:$src2, aaa:$src3)\n" - "}\n", + "}", Style); // This option forces to break inside the DAGArg. Style.TableGenBreakInsideDAGArg = FormatStyle::DAS_BreakAll; @@ -385,7 +394,7 @@ TEST_F(FormatTestTableGen, DAGArgBreakAll) { " aa:$src2,\n" " aaa:$src3\n" " );\n" - "}\n", + "}", Style); verifyFormat("def Def : Parent {\n" " let dagarg = (other\n" @@ -393,7 +402,7 @@ TEST_F(FormatTestTableGen, DAGArgBreakAll) { " aa:$src2,\n" " aaa:$src3\n" " );\n" - "}\n", + "}", Style); // Then, limit the DAGArg operator only to "ins". Style.TableGenBreakingDAGArgOperators = {"ins"}; @@ -403,11 +412,11 @@ TEST_F(FormatTestTableGen, DAGArgBreakAll) { " aa:$src2,\n" " aaa:$src3\n" " );\n" - "}\n", + "}", Style); verifyFormat("def Def : Parent {\n" " let dagarg = (other a:$src1, aa:$src2, aaa:$src3);\n" - "}\n", + "}", Style); } @@ -422,11 +431,11 @@ TEST_F(FormatTestTableGen, DAGArgAlignment) { " aa:$src2,\n" " aaa:$src3\n" " )\n" - "}\n", + "}", Style); verifyFormat("def Def : Parent {\n" " let dagarg = (not a:$src1, aa:$src2, aaa:$src2)\n" - "}\n", + "}", Style); Style.AlignConsecutiveTableGenBreakingDAGArgColons.Enabled = true; verifyFormat("def Def : Parent {\n" @@ -435,11 +444,11 @@ TEST_F(FormatTestTableGen, DAGArgAlignment) { " aa :$src2,\n" " aaa:$src3\n" " )\n" - "}\n", + "}", Style); verifyFormat("def Def : Parent {\n" " let dagarg = (not a:$src1, aa:$src2, aaa:$src2)\n" - "}\n", + "}", Style); } @@ -448,12 +457,12 @@ TEST_F(FormatTestTableGen, CondOperatorAlignment) { Style.ColumnLimit = 60; verifyFormat("let CondOpe1 = !cond(!eq(size, 1): 1,\n" " !eq(size, 16): 1,\n" - " true: 0);\n", + " true: 0);", Style); Style.AlignConsecutiveTableGenCondOperatorColons.Enabled = true; verifyFormat("let CondOpe1 = !cond(!eq(size, 1) : 1,\n" " !eq(size, 16): 1,\n" - " true : 0);\n", + " true : 0);", Style); } @@ -462,12 +471,12 @@ TEST_F(FormatTestTableGen, DefAlignment) { Style.ColumnLimit = 60; verifyFormat("def Def : Parent {}\n" "def DefDef : Parent {}\n" - "def DefDefDef : Parent {}\n", + "def DefDefDef : Parent {}", Style); Style.AlignConsecutiveTableGenDefinitionColons.Enabled = true; verifyFormat("def Def : Parent {}\n" "def DefDef : Parent {}\n" - "def DefDefDef : Parent {}\n", + "def DefDefDef : Parent {}", Style); } diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 9425647d99980..c3153cf6b16f0 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1769,6 +1769,10 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) { EXPECT_TOKEN(Tokens[3], tok::identifier, TT_Unknown); EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_FunctionTypeLParen); + Tokens = annotate("void instanceof();"); + ASSERT_EQ(Tokens.size(), 6u); + EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); + Tokens = annotate("int iso_time(time_t);"); ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); @@ -1933,14 +1937,20 @@ TEST_F(TokenAnnotatorTest, UnderstandHashInMacro) { " #Bar \\\n" " }"); ASSERT_EQ(Tokens.size(), 11u) << Tokens; - EXPECT_BRACE_KIND(Tokens[6], BK_Block); - EXPECT_BRACE_KIND(Tokens[9], BK_Block); + EXPECT_BRACE_KIND(Tokens[6], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[9], BK_BracedInit); Tokens = annotate("#define Foo(Bar) \\\n" " { #Bar }"); ASSERT_EQ(Tokens.size(), 11u) << Tokens; - EXPECT_BRACE_KIND(Tokens[6], BK_Block); - EXPECT_BRACE_KIND(Tokens[9], BK_Block); + EXPECT_BRACE_KIND(Tokens[6], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[9], BK_BracedInit); + + Tokens = annotate("#define FOO(typeName, realClass) \\\n" + " {#typeName, foo(new foo(#typeName))}"); + ASSERT_EQ(Tokens.size(), 29u) << Tokens; + EXPECT_BRACE_KIND(Tokens[8], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[27], BK_BracedInit); } TEST_F(TokenAnnotatorTest, UnderstandsAttributeMacros) { @@ -2809,6 +2819,26 @@ TEST_F(TokenAnnotatorTest, BraceKind) { EXPECT_TOKEN(Tokens[13], tok::l_brace, TT_FunctionLBrace); EXPECT_BRACE_KIND(Tokens[13], BK_Block); EXPECT_BRACE_KIND(Tokens[14], BK_Block); + + Tokens = annotate("{\n" + " char *a[] = {\n" + " /* abc */ \"abc\",\n" + "#if FOO\n" + " /* xyz */ \"xyz\",\n" + "#endif\n" + " /* last */ \"last\"};\n" + "}"); + ASSERT_EQ(Tokens.size(), 25u) << Tokens; + EXPECT_BRACE_KIND(Tokens[0], BK_Block); + EXPECT_BRACE_KIND(Tokens[7], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[21], BK_BracedInit); + + Tokens = + annotate("#define SCOP_STAT(NAME, DESC) \\\n" + " {\"polly\", #NAME, \"Number of rejected regions: \" DESC}"); + ASSERT_EQ(Tokens.size(), 18u) << Tokens; + EXPECT_BRACE_KIND(Tokens[8], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[16], BK_BracedInit); } TEST_F(TokenAnnotatorTest, StreamOperator) { diff --git a/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp b/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp index 1ba865a79ed77..b971cd550dc50 100644 --- a/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp +++ b/clang/unittests/Interpreter/InterpreterExtensionsTest.cpp @@ -30,7 +30,7 @@ #include -#if defined(_AIX) +#if defined(_AIX) || defined(__MVS__) #define CLANG_INTERPRETER_PLATFORM_CANNOT_CREATE_LLJIT #endif diff --git a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp index 0c396720ece66..6ff87f720a559 100644 --- a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp +++ b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp @@ -117,6 +117,11 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) { }; auto PPOpts = std::make_shared(); + PPOpts->DependencyDirectivesForFile = [&](FileEntryRef File) + -> std::optional> { + return getDependencyDirectives(File); + }; + TrivialModuleLoader ModLoader; HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, Diags, LangOpts, Target.get()); @@ -125,12 +130,6 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) { /*OwnsHeaderSearch =*/false); PP.Initialize(*Target); - PP.setDependencyDirectivesFn( - [&](FileEntryRef File) - -> std::optional> { - return getDependencyDirectives(File); - }); - SmallVector IncludedFiles; PP.addPPCallbacks(std::make_unique(PP, IncludedFiles)); PP.EnterMainSourceFile(); diff --git a/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp b/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp index 3aac1f81f523d..238f954d71331 100644 --- a/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp +++ b/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp @@ -488,16 +488,21 @@ TEST(CallDescription, NegativeMatchQualifiedNames) { } TEST(CallDescription, MatchBuiltins) { - // Test CDM::CLibrary - a flag that allows matching weird builtins. - EXPECT_TRUE(tooling::runToolOnCode( - std::unique_ptr(new CallDescriptionAction<>( - {{{{"memset"}, 3}, false}, {{CDM::CLibrary, {"memset"}, 3}, true}})), - "void foo() {" - " int x;" - " __builtin___memset_chk(&x, 0, sizeof(x)," - " __builtin_object_size(&x, 0));" - "}")); - + // Test the matching modes CDM::CLibrary and CDM::CLibraryMaybeHardened, + // which can recognize builtin variants of C library functions. + { + SCOPED_TRACE("hardened variants of functions"); + EXPECT_TRUE(tooling::runToolOnCode( + std::unique_ptr(new CallDescriptionAction<>( + {{{CDM::Unspecified, {"memset"}, 3}, false}, + {{CDM::CLibrary, {"memset"}, 3}, false}, + {{CDM::CLibraryMaybeHardened, {"memset"}, 3}, true}})), + "void foo() {" + " int x;" + " __builtin___memset_chk(&x, 0, sizeof(x)," + " __builtin_object_size(&x, 0));" + "}")); + } { SCOPED_TRACE("multiple similar builtins"); EXPECT_TRUE(tooling::runToolOnCode( @@ -518,6 +523,35 @@ TEST(CallDescription, MatchBuiltins) { __builtin_wmemcpy(x, y, sizeof(wchar_t)); })")); } + { + SCOPED_TRACE("multiple similar builtins with hardened variant"); + EXPECT_TRUE(tooling::runToolOnCode( + std::unique_ptr(new CallDescriptionAction<>( + {{{CDM::CLibraryMaybeHardened, {"memcpy"}, 3}, false}, + {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3}, true}})), + R"(typedef __typeof(sizeof(int)) size_t; + extern wchar_t *__wmemcpy_chk (wchar_t *__restrict __s1, + const wchar_t *__restrict __s2, + size_t __n, size_t __ns1); + void foo(wchar_t *x, wchar_t *y) { + __wmemcpy_chk(x, y, sizeof(wchar_t), 1234); + })")); + } + { + SCOPED_TRACE( + "multiple similar builtins with hardened variant reversed order"); + EXPECT_TRUE(tooling::runToolOnCode( + std::unique_ptr(new CallDescriptionAction<>( + {{{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3}, true}, + {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3}, false}})), + R"(typedef __typeof(sizeof(int)) size_t; + extern wchar_t *__wmemcpy_chk (wchar_t *__restrict __s1, + const wchar_t *__restrict __s2, + size_t __n, size_t __ns1); + void foo(wchar_t *x, wchar_t *y) { + __wmemcpy_chk(x, y, sizeof(wchar_t), 1234); + })")); + } { SCOPED_TRACE("lookbehind and lookahead mismatches"); EXPECT_TRUE(tooling::runToolOnCode( diff --git a/clang/unittests/Tooling/SourceCodeTest.cpp b/clang/unittests/Tooling/SourceCodeTest.cpp index 3641d2ee453f4..3c24b6220a224 100644 --- a/clang/unittests/Tooling/SourceCodeTest.cpp +++ b/clang/unittests/Tooling/SourceCodeTest.cpp @@ -8,6 +8,7 @@ #include "clang/Tooling/Transformer/SourceCode.h" #include "TestVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceLocation.h" #include "clang/Lex/Lexer.h" @@ -18,10 +19,12 @@ #include using namespace clang; +using namespace clang::ast_matchers; using llvm::Failed; using llvm::Succeeded; using llvm::ValueIs; +using testing::Optional; using tooling::getAssociatedRange; using tooling::getExtendedRange; using tooling::getExtendedText; @@ -50,6 +53,15 @@ struct CallsVisitor : TestVisitor { std::function OnCall; }; +struct TypeLocVisitor : TestVisitor { + bool VisitTypeLoc(TypeLoc TL) { + OnTypeLoc(TL, Context); + return true; + } + + std::function OnTypeLoc; +}; + // Equality matcher for `clang::CharSourceRange`, which lacks `operator==`. MATCHER_P(EqualsRange, R, "") { return arg.isTokenRange() == R.isTokenRange() && @@ -510,6 +522,54 @@ int c = M3(3); Visitor.runOver(Code.code()); } +TEST(SourceCodeTest, InnerNestedTemplate) { + llvm::Annotations Code(R"cpp( + template + struct A {}; + template + struct B {}; + template + struct C {}; + + void f(A$r[[>>]]); + )cpp"); + + TypeLocVisitor Visitor; + Visitor.OnTypeLoc = [&](TypeLoc TL, ASTContext *Context) { + if (TL.getSourceRange().isInvalid()) + return; + + // There are no macros, so every TypeLoc's range should be valid. + auto Range = CharSourceRange::getTokenRange(TL.getSourceRange()); + auto LastTokenRange = CharSourceRange::getTokenRange(TL.getEndLoc()); + EXPECT_TRUE(getFileRangeForEdit(Range, *Context, + /*IncludeMacroExpansion=*/false)) + << TL.getSourceRange().printToString(Context->getSourceManager()); + EXPECT_TRUE(getFileRangeForEdit(LastTokenRange, *Context, + /*IncludeMacroExpansion=*/false)) + << TL.getEndLoc().printToString(Context->getSourceManager()); + + if (auto matches = match( + templateSpecializationTypeLoc( + loc(templateSpecializationType( + hasDeclaration(cxxRecordDecl(hasName("A"))))), + hasTemplateArgumentLoc( + 0, templateArgumentLoc(hasTypeLoc(typeLoc().bind("b"))))), + TL, *Context); + !matches.empty()) { + // A range where the start token is split, but the end token is not. + auto OuterTL = TL; + auto MiddleTL = *matches[0].getNodeAs("b"); + EXPECT_THAT( + getFileRangeForEdit(CharSourceRange::getTokenRange( + MiddleTL.getEndLoc(), OuterTL.getEndLoc()), + *Context, /*IncludeMacroExpansion=*/false), + Optional(EqualsAnnotatedRange(Context, Code.range("r")))); + } + }; + Visitor.runOver(Code.code(), TypeLocVisitor::Lang_CXX11); +} + TEST_P(GetFileRangeForEditTest, EditPartialMacroExpansionShouldFail) { std::string Code = R"cpp( #define BAR 10+ diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 6322c72f62ef3..e8e726f0053cc 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -1595,10 +1595,10 @@ writePrettyPrintFunction(const Record &R, std::string Variety = Spellings[I].variety(); if (Variety == "GNU") { - Prefix = " __attribute__(("; + Prefix = "__attribute__(("; Suffix = "))"; } else if (Variety == "CXX11" || Variety == "C23") { - Prefix = " [["; + Prefix = "[["; Suffix = "]]"; std::string Namespace = Spellings[I].nameSpace(); if (!Namespace.empty()) { @@ -1606,7 +1606,7 @@ writePrettyPrintFunction(const Record &R, Spelling += "::"; } } else if (Variety == "Declspec") { - Prefix = " __declspec("; + Prefix = "__declspec("; Suffix = ")"; } else if (Variety == "Microsoft") { Prefix = "["; @@ -3328,37 +3328,6 @@ void EmitClangAttrList(RecordKeeper &Records, raw_ostream &OS) { OS << "#undef PRAGMA_SPELLING_ATTR\n"; } -// Emits the enumeration list for attributes. -void EmitClangAttrPrintList(const std::string &FieldName, RecordKeeper &Records, - raw_ostream &OS) { - emitSourceFileHeader( - "List of attributes that can be print on the left side of a decl", OS, - Records); - - AttrClassHierarchy Hierarchy(Records); - - std::vector Attrs = Records.getAllDerivedDefinitions("Attr"); - std::vector PragmaAttrs; - bool first = false; - - for (auto *Attr : Attrs) { - if (!Attr->getValueAsBit("ASTNode")) - continue; - - if (!Attr->getValueAsBit(FieldName)) - continue; - - if (!first) { - first = true; - OS << "#define CLANG_ATTR_LIST_" << FieldName; - } - - OS << " \\\n case attr::" << Attr->getName() << ":"; - } - - OS << '\n'; -} - // Emits the enumeration list for attributes. void EmitClangAttrSubjectMatchRuleList(RecordKeeper &Records, raw_ostream &OS) { emitSourceFileHeader( diff --git a/clang/utils/TableGen/ClangOptionDocEmitter.cpp b/clang/utils/TableGen/ClangOptionDocEmitter.cpp index 3fe9890994074..86835611b8421 100644 --- a/clang/utils/TableGen/ClangOptionDocEmitter.cpp +++ b/clang/utils/TableGen/ClangOptionDocEmitter.cpp @@ -359,8 +359,38 @@ void emitOption(const DocumentedOption &Option, const Record *DocInfo, // Emit the description, if we have one. const Record *R = Option.Option; - std::string Description = - getRSTStringWithTextFallback(R, "DocBrief", "HelpText"); + std::string Description; + + // Prefer a program specific help string. + // This is a list of (visibilities, string) pairs. + std::vector VisibilitiesHelp = + R->getValueAsListOfDefs("HelpTextsForVariants"); + for (Record *VisibilityHelp : VisibilitiesHelp) { + // This is a list of visibilities. + ArrayRef Visibilities = + VisibilityHelp->getValueAsListInit("Visibilities")->getValues(); + + // See if any of the program's visibilities are in the list. + for (StringRef DocInfoMask : + DocInfo->getValueAsListOfStrings("VisibilityMask")) { + for (Init *Visibility : Visibilities) { + if (Visibility->getAsUnquotedString() == DocInfoMask) { + // Use the first one we find. + Description = escapeRST(VisibilityHelp->getValueAsString("Text")); + break; + } + } + if (!Description.empty()) + break; + } + + if (!Description.empty()) + break; + } + + // If there's not a program specific string, use the default one. + if (Description.empty()) + Description = getRSTStringWithTextFallback(R, "DocBrief", "HelpText"); if (!isa(R->getValueInit("Values"))) { if (!Description.empty() && Description.back() != '.') diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 3a90eee5f1c92..5d7c7f25a685c 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -927,7 +927,7 @@ class ACLEIntrinsic { llvm::APInt ArgTypeRange = llvm::APInt::getMaxValue(ArgTypeBits).zext(128); llvm::APInt ActualRange = (hi-lo).trunc(64).sext(128); if (ActualRange.ult(ArgTypeRange)) - SemaChecks.push_back("SemaBuiltinConstantArgRange(TheCall, " + Index + + SemaChecks.push_back("BuiltinConstantArgRange(TheCall, " + Index + ", " + signedHexLiteral(lo) + ", " + signedHexLiteral(hi) + ")"); @@ -942,9 +942,8 @@ class ACLEIntrinsic { } Suffix = (Twine(", ") + Arg).str(); } - SemaChecks.push_back((Twine("SemaBuiltinConstantArg") + - IA.ExtraCheckType + "(TheCall, " + Index + - Suffix + ")") + SemaChecks.push_back((Twine("BuiltinConstantArg") + IA.ExtraCheckType + + "(TheCall, " + Index + Suffix + ")") .str()); } diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp index eb211f7ca8cbd..4ce588aeba070 100644 --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -31,8 +31,6 @@ enum ActionType { GenClangAttrSubjectMatchRulesParserStringSwitches, GenClangAttrImpl, GenClangAttrList, - GenClangAttrCanPrintLeftList, - GenClangAttrMustPrintLeftList, GenClangAttrDocTable, GenClangAttrSubjectMatchRuleList, GenClangAttrPCHRead, @@ -135,14 +133,6 @@ cl::opt Action( "Generate clang attribute implementations"), clEnumValN(GenClangAttrList, "gen-clang-attr-list", "Generate a clang attribute list"), - clEnumValN(GenClangAttrCanPrintLeftList, - "gen-clang-attr-can-print-left-list", - "Generate list of attributes that can be printed on left " - "side of a decl"), - clEnumValN(GenClangAttrMustPrintLeftList, - "gen-clang-attr-must-print-left-list", - "Generate list of attributes that must be printed on left " - "side of a decl"), clEnumValN(GenClangAttrDocTable, "gen-clang-attr-doc-table", "Generate a table of attribute documentation"), clEnumValN(GenClangAttrSubjectMatchRuleList, @@ -348,12 +338,6 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenClangAttrList: EmitClangAttrList(Records, OS); break; - case GenClangAttrCanPrintLeftList: - EmitClangAttrPrintList("CanPrintOnLeft", Records, OS); - break; - case GenClangAttrMustPrintLeftList: - EmitClangAttrPrintList("PrintOnLeft", Records, OS); - break; case GenClangAttrDocTable: EmitClangAttrDocTable(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h index a694d077652d9..0376637f79375 100644 --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -47,8 +47,6 @@ void EmitClangAttrSubjectMatchRulesParserStringSwitches( void EmitClangAttrClass(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrImpl(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrList(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); -void EmitClangAttrPrintList(const std::string &FieldName, - llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrSubjectMatchRuleList(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitClangAttrPCHRead(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); diff --git a/clang/www/c_status.html b/clang/www/c_status.html index bc27b20ce6487..411f55447be77 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -203,7 +203,7 @@

C99 implementation status

extended identifiers N717 - Unknown + Clang 17 hexadecimal floating-point constants @@ -568,7 +568,7 @@

C11 implementation status

Conditional normative status for Annex G N1514 - Unknown + Yes (1) Creation of complex value @@ -606,6 +606,9 @@

C11 implementation status

Unknown +(1): Clang does not implement Annex G, so our conditional support +conforms by not defining the __STDC_IEC_559_COMPLEX__ macro. +

C17 implementation status

diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 5e1e03dec1d48..6daa5ad2d1316 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -2392,7 +2392,7 @@

C++ defect report implementation status

392 CD1 Use of full expression lvalue before temporary destruction - Unknown + Clang 2.8 393 @@ -2836,7 +2836,7 @@

C++ defect report implementation status

466 CD1 cv-qualifiers on pseudo-destructor type - No + Clang 2.8 467 @@ -3600,7 +3600,7 @@

C++ defect report implementation status

593 NAD Falling off the end of a destructor's function-try-block handler - Unknown + Clang 2.8 594 @@ -3672,7 +3672,7 @@

C++ defect report implementation status

605 C++11 Linkage of explicit specializations - Unknown + Clang 2.7 606 @@ -3942,7 +3942,7 @@

C++ defect report implementation status

650 CD2 Order of destruction for temporaries bound to the returned value of a function - Unknown + Clang 2.8 651 @@ -3960,7 +3960,7 @@

C++ defect report implementation status

653 CD2 Copy assignment of unions - Unknown + Clang 2.7 654 @@ -3990,7 +3990,7 @@

C++ defect report implementation status

658 CD2 Defining reinterpret_cast for pointer types - Unknown + Clang 2.7 659 @@ -4008,7 +4008,7 @@

C++ defect report implementation status

661 CD1 Semantics of arithmetic comparisons - Unknown + Clang 2.7 662 @@ -4074,7 +4074,7 @@

C++ defect report implementation status

672 CD2 Sequencing of initialization in new-expressions - Unknown + Clang 2.7 673 diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index c1d95dadbb27e..130148c7420fa 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -544,16 +544,8 @@

C++20 implementation status

P0848R3 - -
- Clang 16 (Partial) - Because of other concepts implementation deficits, the __cpp_concepts macro is not yet set to 202002L. - Also, the related defect reports DR1496 and - DR1734 are not yet implemented. Accordingly, deleted - special member functions are treated as eligible even though they shouldn't be. -
- - + Clang 19 + P1616R1 Clang 10 diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 46a6fdf8728ff..b281ac64f5d5c 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -463,9 +463,11 @@ if(APPLE) set(DEFAULT_SANITIZER_MIN_OSX_VERSION 10.13) set(DARWIN_osx_MIN_VER_FLAG "-mmacosx-version-min") + + string(REGEX MATCH "${DARWIN_osx_MIN_VER_FLAG}=([.0-9]+)" + MACOSX_VERSION_MIN_FLAG "${CMAKE_CXX_FLAGS}") + if(NOT SANITIZER_MIN_OSX_VERSION) - string(REGEX MATCH "${DARWIN_osx_MIN_VER_FLAG}=([.0-9]+)" - MACOSX_VERSION_MIN_FLAG "${CMAKE_CXX_FLAGS}") if(MACOSX_VERSION_MIN_FLAG) set(MIN_OSX_VERSION "${CMAKE_MATCH_1}") elseif(CMAKE_OSX_DEPLOYMENT_TARGET) diff --git a/compiler-rt/lib/asan/asan_thread.cpp b/compiler-rt/lib/asan/asan_thread.cpp index 8798968947e82..480a423952e8f 100644 --- a/compiler-rt/lib/asan/asan_thread.cpp +++ b/compiler-rt/lib/asan/asan_thread.cpp @@ -44,10 +44,15 @@ static ThreadRegistry *asan_thread_registry; static ThreadArgRetval *thread_data; static Mutex mu_for_thread_context; +// TODO(leonardchan@): It should be possible to make LowLevelAllocator +// threadsafe and consolidate this one into the GlobalLoweLevelAllocator. +// We should be able to do something similar to what's in +// sanitizer_stack_store.cpp. +static LowLevelAllocator allocator_for_thread_context; static ThreadContextBase *GetAsanThreadContext(u32 tid) { Lock lock(&mu_for_thread_context); - return new (GetGlobalLowLevelAllocator()) AsanThreadContext(tid); + return new (allocator_for_thread_context) AsanThreadContext(tid); } static void InitThreads() { diff --git a/compiler-rt/lib/hwasan/hwasan_thread_list.h b/compiler-rt/lib/hwasan/hwasan_thread_list.h index d0eebd1b373a3..369a1c3d6f5fd 100644 --- a/compiler-rt/lib/hwasan/hwasan_thread_list.h +++ b/compiler-rt/lib/hwasan/hwasan_thread_list.h @@ -18,7 +18,7 @@ // * Start of the shadow memory region is aligned to 2**kShadowBaseAlignment. // * All stack ring buffers are located within (2**kShadowBaseAlignment) // sized region below and adjacent to the shadow region. -// * Each ring buffer has a size of (2**N)*4096 where N is in [0, 8), and is +// * Each ring buffer has a size of (2**N)*4096 where N is in [0, 7), and is // aligned to twice its size. The value of N can be different for each buffer. // // These constrains guarantee that, given an address A of any element of the @@ -55,7 +55,10 @@ static uptr RingBufferSize() { uptr desired_bytes = flags()->stack_history_size * sizeof(uptr); // FIXME: increase the limit to 8 once this bug is fixed: // https://bugs.llvm.org/show_bug.cgi?id=39030 - for (int shift = 1; shift < 7; ++shift) { + // Note that we *cannot* do that on Android, as the runtime will indefinitely + // have to support code that is compiled with ashr, which only works with + // shifts up to 6. + for (int shift = 0; shift < 7; ++shift) { uptr size = 4096 * (1ULL << shift); if (size >= desired_bytes) return size; diff --git a/compiler-rt/lib/memprof/tests/CMakeLists.txt b/compiler-rt/lib/memprof/tests/CMakeLists.txt index f812bd1f86ff8..dc19ac5cd49a9 100644 --- a/compiler-rt/lib/memprof/tests/CMakeLists.txt +++ b/compiler-rt/lib/memprof/tests/CMakeLists.txt @@ -41,33 +41,47 @@ if(NOT WIN32) list(APPEND MEMPROF_UNITTEST_LINK_FLAGS -pthread) endif() +set(MEMPROF_UNITTEST_DEPS) +if (TARGET cxx-headers OR HAVE_LIBCXX) + list(APPEND MEMPROF_UNITTEST_DEPS cxx-headers) +endif() + set(MEMPROF_UNITTEST_LINK_LIBRARIES ${COMPILER_RT_UNWINDER_LINK_LIBS} ${SANITIZER_TEST_CXX_LIBRARIES}) -list(APPEND MEMPROF_UNITTEST_LINK_LIBRARIES "dl") - -if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST MEMPROF_SUPPORTED_ARCH) - # MemProf unit tests are only run on the host machine. - set(arch ${COMPILER_RT_DEFAULT_TARGET_ARCH}) +append_list_if(COMPILER_RT_HAS_LIBDL -ldl MEMPROF_UNITTEST_LINK_LIBRARIES) - add_executable(MemProfUnitTests - ${MEMPROF_UNITTESTS} - ${COMPILER_RT_GTEST_SOURCE} - ${COMPILER_RT_GMOCK_SOURCE} - ${MEMPROF_SOURCES} +# Adds memprof tests for each architecture. +macro(add_memprof_tests_for_arch arch) + set(MEMPROF_TEST_RUNTIME_OBJECTS $ $ $ $ - $) - set_target_compile_flags(MemProfUnitTests ${MEMPROF_UNITTEST_CFLAGS}) - set_target_link_flags(MemProfUnitTests ${MEMPROF_UNITTEST_LINK_FLAGS}) - target_link_libraries(MemProfUnitTests ${MEMPROF_UNITTEST_LINK_LIBRARIES}) - - if (TARGET cxx-headers OR HAVE_LIBCXX) - add_dependencies(MemProfUnitTests cxx-headers) - endif() + $ + ) + set(MEMPROF_TEST_RUNTIME RTMemProfTest.${arch}) + add_library(${MEMPROF_TEST_RUNTIME} STATIC ${MEMPROF_TEST_RUNTIME_OBJECTS}) + set_target_properties(${MEMPROF_TEST_RUNTIME} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + FOLDER "Compiler-RT Runtime tests") + set(MEMPROF_TEST_OBJECTS) + generate_compiler_rt_tests(MEMPROF_TEST_OBJECTS + MemProfUnitTests "MemProf-${arch}-UnitTest" ${arch} + RUNTIME ${MEMPROF_TEST_RUNTIME} + DEPS ${MEMPROF_UNITTEST_DEPS} + SOURCES ${MEMPROF_UNITTESTS} ${MEMPROF_SOURCES} ${COMPILER_RT_GTEST_SOURCE} + COMPILE_DEPS ${MEMPROF_UNIT_TEST_HEADERS} + CFLAGS ${MEMPROF_UNITTEST_CFLAGS} + LINK_FLAGS ${MEMPROF_UNITTEST_LINK_FLAGS} ${MEMPROF_UNITTEST_LINK_LIBRARIES}) +endmacro() - set_target_properties(MemProfUnitTests PROPERTIES - RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +# MemProf unit tests testsuite. +add_custom_target(MemProfUnitTests) +set_target_properties(MemProfUnitTests PROPERTIES FOLDER "Compiler-RT Tests") +if(COMPILER_RT_CAN_EXECUTE_TESTS AND COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST MEMPROF_SUPPORTED_ARCH) + # MemProf unit tests are only run on the host machine. + foreach(arch ${COMPILER_RT_DEFAULT_TARGET_ARCH}) + add_memprof_tests_for_arch(${arch}) + endforeach() endif() diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.def b/compiler-rt/lib/scudo/standalone/allocator_config.def index c50aadad2d637..9691a007eed5c 100644 --- a/compiler-rt/lib/scudo/standalone/allocator_config.def +++ b/compiler-rt/lib/scudo/standalone/allocator_config.def @@ -87,9 +87,14 @@ PRIMARY_REQUIRED(const s32, MaxReleaseToOsIntervalMs) // PRIMARY_OPTIONAL(TYPE, NAME, DEFAULT) // // Indicates support for offsetting the start of a region by a random number of -// pages. Only used with primary64. +// pages. This is only used if `EnableContiguousRegions` is enabled. PRIMARY_OPTIONAL(const bool, EnableRandomOffset, false) +// When `EnableContiguousRegions` is true, all regions will be be arranged in +// adjacency. This will reduce the fragmentation caused by region allocations +// but may require a huge amount of contiguous pages at initialization. +PRIMARY_OPTIONAL(const bool, EnableContiguousRegions, true) + // PRIMARY_OPTIONAL_TYPE(NAME, DEFAULT) // // Use condition variable to shorten the waiting time of refillment of diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index abce4bff321cb..61d57976ae43b 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -117,40 +117,30 @@ template class SizeClassAllocator64 { SmallerBlockReleasePageDelta = PagesInGroup * (1 + MinSizeClass / 16U) / 100; - // Reserve the space required for the Primary. - CHECK(ReservedMemory.create(/*Addr=*/0U, PrimarySize, - "scudo:primary_reserve")); - PrimaryBase = ReservedMemory.getBase(); - DCHECK_NE(PrimaryBase, 0U); - u32 Seed; const u64 Time = getMonotonicTimeFast(); if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) - Seed = static_cast(Time ^ (PrimaryBase >> 12)); + Seed = static_cast(Time ^ (reinterpret_cast(&Seed) >> 12)); - for (uptr I = 0; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); + for (uptr I = 0; I < NumClasses; I++) + getRegionInfo(I)->RandState = getRandomU32(&Seed); - // The actual start of a region is offset by a random number of pages - // when PrimaryEnableRandomOffset is set. - Region->RegionBeg = (PrimaryBase + (I << RegionSizeLog)) + - (Config::getEnableRandomOffset() - ? ((getRandomModN(&Seed, 16) + 1) * PageSize) - : 0); - Region->RandState = getRandomU32(&Seed); - // Releasing small blocks is expensive, set a higher threshold to avoid - // frequent page releases. - if (isSmallBlock(getSizeByClassId(I))) - Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta; - else - Region->TryReleaseThreshold = PageSize; - Region->ReleaseInfo.LastReleaseAtNs = Time; + if (Config::getEnableContiguousRegions()) { + ReservedMemoryT ReservedMemory = {}; + // Reserve the space required for the Primary. + CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses, + "scudo:primary_reserve")); + const uptr PrimaryBase = ReservedMemory.getBase(); + + for (uptr I = 0; I < NumClasses; I++) { + MemMapT RegionMemMap = ReservedMemory.dispatch( + PrimaryBase + (I << RegionSizeLog), RegionSize); + RegionInfo *Region = getRegionInfo(I); - Region->MemMapInfo.MemMap = ReservedMemory.dispatch( - PrimaryBase + (I << RegionSizeLog), RegionSize); - CHECK(Region->MemMapInfo.MemMap.isAllocated()); + initRegion(Region, I, RegionMemMap, Config::getEnableRandomOffset()); + } + shuffle(RegionInfoArray, NumClasses, &Seed); } - shuffle(RegionInfoArray, NumClasses, &Seed); // The binding should be done after region shuffling so that it won't bind // the FLLock from the wrong region. @@ -160,14 +150,17 @@ template class SizeClassAllocator64 { setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } - void unmapTestOnly() NO_THREAD_SAFETY_ANALYSIS { + void unmapTestOnly() { for (uptr I = 0; I < NumClasses; I++) { RegionInfo *Region = getRegionInfo(I); + { + ScopedLock ML(Region->MMLock); + MemMapT MemMap = Region->MemMapInfo.MemMap; + if (MemMap.isAllocated()) + MemMap.unmap(MemMap.getBase(), MemMap.getCapacity()); + } *Region = {}; } - if (PrimaryBase) - ReservedMemory.release(); - PrimaryBase = 0U; } // When all blocks are freed, it has to be the same size as `AllocatedUser`. @@ -251,9 +244,10 @@ template class SizeClassAllocator64 { } const bool RegionIsExhausted = Region->Exhausted; - if (!RegionIsExhausted) + if (!RegionIsExhausted) { PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray, MaxBlockCount); + } ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted; break; } @@ -514,7 +508,6 @@ template class SizeClassAllocator64 { private: static const uptr RegionSize = 1UL << RegionSizeLog; static const uptr NumClasses = SizeClassMap::NumClasses; - static const uptr PrimarySize = RegionSize * NumClasses; static const uptr MapSizeIncrement = Config::getMapSizeIncrement(); // Fill at most this number of batches from the newly map'd memory. @@ -570,9 +563,14 @@ template class SizeClassAllocator64 { } uptr getRegionBaseByClassId(uptr ClassId) { - return roundDown(getRegionInfo(ClassId)->RegionBeg - PrimaryBase, - RegionSize) + - PrimaryBase; + RegionInfo *Region = getRegionInfo(ClassId); + Region->MMLock.assertHeld(); + + if (!Config::getEnableContiguousRegions() && + !Region->MemMapInfo.MemMap.isAllocated()) { + return 0U; + } + return Region->MemMapInfo.MemMap.getBase(); } static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) { @@ -602,6 +600,30 @@ template class SizeClassAllocator64 { return BlockSize > PageSize; } + ALWAYS_INLINE void initRegion(RegionInfo *Region, uptr ClassId, + MemMapT MemMap, bool EnableRandomOffset) + REQUIRES(Region->MMLock) { + DCHECK(!Region->MemMapInfo.MemMap.isAllocated()); + DCHECK(MemMap.isAllocated()); + + const uptr PageSize = getPageSizeCached(); + + Region->MemMapInfo.MemMap = MemMap; + + Region->RegionBeg = MemMap.getBase(); + if (EnableRandomOffset) { + Region->RegionBeg += + (getRandomModN(&Region->RandState, 16) + 1) * PageSize; + } + + // Releasing small blocks is expensive, set a higher threshold to avoid + // frequent page releases. + if (isSmallBlock(getSizeByClassId(ClassId))) + Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta; + else + Region->TryReleaseThreshold = PageSize; + } + void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size) REQUIRES(Region->FLLock) { DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId)); @@ -989,9 +1011,26 @@ template class SizeClassAllocator64 { CompactPtrT *ToArray, const u16 MaxBlockCount) REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) { + if (!Config::getEnableContiguousRegions() && + !Region->MemMapInfo.MemMap.isAllocated()) { + ReservedMemoryT ReservedMemory; + if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize, + "scudo:primary_reserve", + MAP_ALLOWNOMEM))) { + Printf("Can't reserve pages for size class %zu.\n", + getSizeByClassId(ClassId)); + Region->Exhausted = true; + return 0U; + } + initRegion(Region, ClassId, + ReservedMemory.dispatch(ReservedMemory.getBase(), + ReservedMemory.getCapacity()), + /*EnableRandomOffset=*/false); + } + + DCHECK(Region->MemMapInfo.MemMap.isAllocated()); const uptr Size = getSizeByClassId(ClassId); const u16 MaxCount = CacheT::getMaxCached(Size); - const uptr RegionBeg = Region->RegionBeg; const uptr MappedUser = Region->MemMapInfo.MappedUser; const uptr TotalUserBytes = @@ -1683,10 +1722,6 @@ template class SizeClassAllocator64 { Region->FLLockCV.notifyAll(Region->FLLock); } - // TODO: `PrimaryBase` can be obtained from ReservedMemory. This needs to be - // deprecated. - uptr PrimaryBase = 0; - ReservedMemoryT ReservedMemory = {}; // The minimum size of pushed blocks that we will try to release the pages in // that size class. uptr SmallerBlockReleasePageDelta = 0; diff --git a/compiler-rt/lib/scudo/standalone/report_linux.cpp b/compiler-rt/lib/scudo/standalone/report_linux.cpp index dfddef3324bd6..432f6a0169646 100644 --- a/compiler-rt/lib/scudo/standalone/report_linux.cpp +++ b/compiler-rt/lib/scudo/standalone/report_linux.cpp @@ -25,10 +25,10 @@ namespace scudo { // Fatal internal map() error (potentially OOM related). void NORETURN reportMapError(uptr SizeIfOOM) { ScopedString Error; - Error.append("Scudo ERROR: internal map failure"); - if (SizeIfOOM) { - Error.append(" (NO MEMORY) requesting %zuKB", SizeIfOOM >> 10); - } + Error.append("Scudo ERROR: internal map failure (error desc=%s)", + strerror(errno)); + if (SizeIfOOM) + Error.append(" requesting %zuKB", SizeIfOOM >> 10); Error.append("\n"); reportRawError(Error.data()); } diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp index 683ce3e596596..1cf3bb51db0e7 100644 --- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp @@ -90,6 +90,7 @@ template struct TestConfig3 { static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX; typedef scudo::uptr CompactPtrT; static const scudo::uptr CompactPtrScale = 0; + static const bool EnableContiguousRegions = false; static const bool EnableRandomOffset = true; static const scudo::uptr MapSizeIncrement = 1UL << 18; }; diff --git a/compiler-rt/lib/scudo/standalone/tests/report_test.cpp b/compiler-rt/lib/scudo/standalone/tests/report_test.cpp index 92f1ee813036c..6c46243053d9e 100644 --- a/compiler-rt/lib/scudo/standalone/tests/report_test.cpp +++ b/compiler-rt/lib/scudo/standalone/tests/report_test.cpp @@ -53,3 +53,28 @@ TEST(ScudoReportDeathTest, CSpecific) { EXPECT_DEATH(scudo::reportInvalidAlignedAllocAlignment(123, 456), "Scudo ERROR.*123.*456"); } + +#if SCUDO_LINUX || SCUDO_TRUSTY || SCUDO_ANDROID +#include "report_linux.h" + +#include +#include + +TEST(ScudoReportDeathTest, Linux) { + errno = ENOMEM; + EXPECT_DEATH(scudo::reportMapError(), + "Scudo ERROR:.*internal map failure \\(error desc=.*\\)"); + errno = ENOMEM; + EXPECT_DEATH(scudo::reportMapError(1024U), + "Scudo ERROR:.*internal map failure \\(error desc=.*\\) " + "requesting 1KB"); + errno = ENOMEM; + EXPECT_DEATH(scudo::reportUnmapError(0x1000U, 100U), + "Scudo ERROR:.*internal unmap failure \\(error desc=.*\\) Addr " + "0x1000 Size 100"); + errno = ENOMEM; + EXPECT_DEATH(scudo::reportProtectError(0x1000U, 100U, PROT_READ), + "Scudo ERROR:.*internal protect failure \\(error desc=.*\\) " + "Addr 0x1000 Size 100 Prot 1"); +} +#endif diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 0f16507d5d88f..27d01653f088d 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -555,13 +555,11 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ReportOptions Opts, ValueHandle Src, ValueHandle Dst) { SourceLocation Loc = Data->Loc.acquire(); - ErrorType ET = ErrorType::GenericUB; - const TypeDescriptor &SrcTy = Data->FromType; const TypeDescriptor &DstTy = Data->ToType; - bool SrcSigned = SrcTy.isSignedIntegerTy(); bool DstSigned = DstTy.isSignedIntegerTy(); + ErrorType ET = ErrorType::GenericUB; switch (Data->Kind) { case ICCK_IntegerTruncation: { // Legacy, no longer used. @@ -594,14 +592,23 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ScopedReport R(Opts, Loc, ET); + // In the case we have a bitfield, we want to explicitly say so in the + // error message. // FIXME: is it possible to dump the values as hex with fixed width? - - Diag(Loc, DL_Error, ET, - "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " - "type %4 changed the value to %5 (%6-bit, %7signed)") - << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() - << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) - << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); + if (Data->BitfieldBits) + Diag(Loc, DL_Error, ET, + "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " + "type %4 changed the value to %5 (%6-bit bitfield, %7signed)") + << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() + << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) + << Data->BitfieldBits << (DstSigned ? "" : "un"); + else + Diag(Loc, DL_Error, ET, + "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " + "type %4 changed the value to %5 (%6-bit, %7signed)") + << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() + << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) + << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); } void __ubsan::__ubsan_handle_implicit_conversion(ImplicitConversionData *Data, diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h index 3bd5046de3d7d..bae661a56833d 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers.h @@ -147,6 +147,7 @@ struct ImplicitConversionData { const TypeDescriptor &FromType; const TypeDescriptor &ToType; /* ImplicitConversionCheckKind */ unsigned char Kind; + unsigned int BitfieldBits; }; /// \brief Implict conversion that changed the value. diff --git a/compiler-rt/test/fuzzer/lit.cfg.py b/compiler-rt/test/fuzzer/lit.cfg.py index 4e203236b1670..9084254b3b15c 100644 --- a/compiler-rt/test/fuzzer/lit.cfg.py +++ b/compiler-rt/test/fuzzer/lit.cfg.py @@ -98,6 +98,11 @@ def generate_compiler_cmd(is_cpp=True, fuzzer_enabled=True, msan_enabled=False): if "windows" in config.available_features: extra_cmd = extra_cmd + " -D_DISABLE_VECTOR_ANNOTATION -D_DISABLE_STRING_ANNOTATION" + if "darwin" in config.available_features and getattr( + config, "darwin_linker_version", None + ): + extra_cmd = extra_cmd + " -mlinker-version=" + config.darwin_linker_version + return " ".join( [ compiler_cmd, diff --git a/compiler-rt/test/ubsan/TestCases/ImplicitConversion/bitfield-conversion.c b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/bitfield-conversion.c new file mode 100644 index 0000000000000..3b359f071a619 --- /dev/null +++ b/compiler-rt/test/ubsan/TestCases/ImplicitConversion/bitfield-conversion.c @@ -0,0 +1,649 @@ +// RUN: %clang -x c -fsanitize=implicit-bitfield-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-bitfield-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-bitfield-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-bitfield-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -x c -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK + +// RUN: %clangxx -x c++ -fsanitize=implicit-bitfield-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clangxx -x c++ -fsanitize=implicit-bitfield-conversion -O1 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clangxx -x c++ -fsanitize=implicit-bitfield-conversion -O2 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clangxx -x c++ -fsanitize=implicit-bitfield-conversion -O3 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK +// RUN: %clangxx -x c++ -fsanitize=implicit-conversion -O0 %s -o %t && %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK + +#include +#include + +#define UINT4_MIN 0 +#define UINT4_MAX (1 << 4) - 1 +#define UINT5_MIN 0 +#define UINT5_MAX (1 << 5) - 1 +#define INT7_MIN -(1 << 6) +#define INT7_MAX (1 << 6) - 1 + +typedef struct _X { + uint8_t a : 4; + uint32_t b : 5; + int8_t c : 7; + int32_t d : 16; + uint8_t e : 8; + uint16_t f : 16; + uint32_t g : 32; + int8_t h : 8; + int16_t i : 16; + int32_t j : 32; + uint32_t k : 1; + int32_t l : 1; + bool m : 1; +} X; + +void test_a() { + X x; + uint32_t min = UINT4_MIN; + uint32_t max = UINT4_MAX; + + uint8_t v8 = max + 1; + uint16_t v16 = (UINT8_MAX + 1) + (max + 1); + uint32_t v32 = (UINT8_MAX + 1) + (max + 1); + + // Assignment + x.a = v8; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint8_t' (aka 'unsigned char') of value 16 (8-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (4-bit bitfield, unsigned) + x.a = v16; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint16_t' (aka 'unsigned short') of value 272 (16-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (4-bit bitfield, unsigned) + x.a = v32; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 272 (32-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (4-bit bitfield, unsigned) + + // PrePostIncDec + x.a = min; + x.a--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'uint8_t' (aka 'unsigned char') changed the value to 15 (4-bit bitfield, unsigned) + x.a = min; + --x.a; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'uint8_t' (aka 'unsigned char') changed the value to 15 (4-bit bitfield, unsigned) + + x.a = max; + x.a++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value 16 (32-bit, signed) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (4-bit bitfield, unsigned) + x.a = max; + ++x.a; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 16 (32-bit, signed) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (4-bit bitfield, unsigned) + + x.a = min + 1; + x.a++; + x.a = min + 1; + ++x.a; + + x.a = min + 1; + x.a--; + x.a = min + 1; + --x.a; + + x.a = max - 1; + x.a++; + x.a = max - 1; + ++x.a; + + x.a = max - 1; + x.a--; + x.a = max - 1; + --x.a; + + // Compound assignment + x.a = 0; + x.a += max; + x.a = 0; + x.a += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 16 (32-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (4-bit bitfield, unsigned) + + x.a = max; + x.a -= max; + x.a = max; + x.a -= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 4294967295 (32-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 15 (4-bit bitfield, unsigned) + + x.a = 1; + x.a *= max; + x.a = 1; + x.a *= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 16 (32-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (4-bit bitfield, unsigned) +} + +void test_b() { + X x; + uint32_t min = UINT5_MIN; + uint32_t max = UINT5_MAX; + + uint8_t v8 = max + 1; + uint16_t v16 = max + 1; + uint32_t v32 = max + 1; + + // Assignment + x.b = v8; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint8_t' (aka 'unsigned char') of value 32 (8-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (5-bit bitfield, unsigned) + x.b = v16; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint16_t' (aka 'unsigned short') of value 32 (16-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (5-bit bitfield, unsigned) + x.b = v32; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 32 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (5-bit bitfield, unsigned) + + // PrePostIncDec + x.b = min; + x.b--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 4294967295 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 31 (5-bit bitfield, unsigned) + x.b = min; + --x.b; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 4294967295 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 31 (5-bit bitfield, unsigned) + + x.b = max; + x.b++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 32 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (5-bit bitfield, unsigned) + x.b = max; + ++x.b; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 32 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (5-bit bitfield, unsigned) + + x.b = min + 1; + x.b++; + x.b = min + 1; + ++x.b; + + x.b = min + 1; + x.b--; + x.b = min + 1; + --x.b; + + x.b = max - 1; + x.b++; + x.b = max - 1; + ++x.b; + + x.b = max - 1; + x.b--; + x.b = max - 1; + --x.b; + + // Compound assignment + x.b = 0; + x.b += max; + x.b = 0; + x.b += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 32 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (5-bit bitfield, unsigned) + + x.b = max; + x.b -= max; + x.b = max; + x.b -= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 4294967295 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 31 (5-bit bitfield, unsigned) + + x.b = 1; + x.b *= max; + x.b = 1; + x.b *= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 32 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (5-bit bitfield, unsigned) +} + +void test_c() { + X x; + int32_t min = INT7_MIN; + int32_t max = INT7_MAX; + + uint8_t v8 = max + 1; + uint16_t v16 = (UINT8_MAX + 1) + (max + 1); + uint32_t v32 = (UINT8_MAX + 1) + (max + 1); + + // Assignment + x.c = v8; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint8_t' (aka 'unsigned char') of value 64 (8-bit, unsigned) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -64 (7-bit bitfield, signed) + x.c = v16; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint16_t' (aka 'unsigned short') of value 320 (16-bit, unsigned) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -64 (7-bit bitfield, signed) + x.c = v32; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 320 (32-bit, unsigned) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -64 (7-bit bitfield, signed) + + // PrePostIncDec + x.c = min; + x.c--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value -65 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to 63 (7-bit bitfield, signed) + x.c = min; + --x.c; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -65 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to 63 (7-bit bitfield, signed) + + x.c = max; + x.c++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value 64 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -64 (7-bit bitfield, signed) + x.c = max; + ++x.c; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 64 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -64 (7-bit bitfield, signed) + + x.c = min + 1; + x.c++; + x.c = min + 1; + ++x.c; + + x.c = min + 1; + x.c--; + x.c = min + 1; + --x.c; + + x.c = max - 1; + x.c++; + x.c = max - 1; + ++x.c; + + x.c = max - 1; + x.c--; + x.c = max - 1; + --x.c; + + // Compound assignment + x.c = 0; + x.c += max; + x.c = 0; + x.c += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value 64 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -64 (7-bit bitfield, signed) + + x.c = 0; + x.c -= (-min); + x.c = 0; + x.c -= (-min + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value -65 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to 63 (7-bit bitfield, signed) + + x.c = 1; + x.c *= max; + x.c = 1; + x.c *= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value 64 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -64 (7-bit bitfield, signed) +} + +void test_d() { + X x; + int32_t min = INT16_MIN; + int32_t max = INT16_MAX; + + uint32_t v32 = max + 1; + + // Assignment + x.d = v32; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 32768 (32-bit, unsigned) to type 'int32_t' (aka 'int') changed the value to -32768 (16-bit bitfield, signed) + + // PrePostIncDec + x.d = min; + x.d--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value -32769 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to 32767 (16-bit bitfield, signed) + x.d = min; + --x.d; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value -32769 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to 32767 (16-bit bitfield, signed) + + x.d = max; + x.d++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value 32768 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to -32768 (16-bit bitfield, signed) + x.d = max; + ++x.d; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value 32768 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to -32768 (16-bit bitfield, signed) + + x.d = min + 1; + x.d++; + x.d = min + 1; + ++x.d; + + x.d = min + 1; + x.d--; + x.d = min + 1; + --x.d; + + x.d = max - 1; + x.d++; + x.d = max - 1; + ++x.d; + + x.d = max - 1; + x.d--; + x.d = max - 1; + --x.d; + + // Compound assignment + x.d = 0; + x.d += max; + x.d = 0; + x.d += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value 32768 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to -32768 (16-bit bitfield, signed) + + x.d = 0; + x.d -= (-min); + x.d = 0; + x.d -= (-min + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value -32769 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to 32767 (16-bit bitfield, signed) + + x.d = 1; + x.d *= max; + x.d = 1; + x.d *= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value 32768 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to -32768 (16-bit bitfield, signed) +} + +void test_e() { + X x; + uint32_t min = 0; + uint32_t max = UINT8_MAX; + + uint16_t v16 = max + 1; + uint32_t v32 = max + 1; + + // Assignment + x.e = v16; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint16_t' (aka 'unsigned short') of value 256 (16-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (8-bit bitfield, unsigned) + x.e = v32; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 256 (32-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (8-bit bitfield, unsigned) + + // PrePostIncDec + x.e = min; + x.e--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'uint8_t' (aka 'unsigned char') changed the value to 255 (8-bit bitfield, unsigned) + x.e = min; + --x.e; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'uint8_t' (aka 'unsigned char') changed the value to 255 (8-bit bitfield, unsigned) + x.e = min + 1; + x.e--; + + x.e = max; + x.e++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (8-bit bitfield, unsigned) + x.e = max; + ++x.e; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 256 (32-bit, signed) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (8-bit bitfield, unsigned) + x.e = max - 1; + x.e++; + + // Compound assignment + x.e = 0; + x.e += max; + x.e = 0; + x.e += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 256 (32-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 0 (8-bit bitfield, unsigned) + + x.e = max; + x.e -= max; + x.e = max; + x.e -= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 4294967295 (32-bit, unsigned) to type 'uint8_t' (aka 'unsigned char') changed the value to 255 (8-bit bitfield, unsigned) +} + +void test_f() { + X x; + uint32_t min = 0; + uint32_t max = UINT16_MAX; + + uint32_t v32 = max + 1; + + // Assignment + x.f = v32; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 65536 (32-bit, unsigned) to type 'uint16_t' (aka 'unsigned short') changed the value to 0 (16-bit bitfield, unsigned) + + // PrePostIncDec + x.f = min; + x.f--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'uint16_t' (aka 'unsigned short') changed the value to 65535 (16-bit bitfield, unsigned) + x.f = min; + --x.f; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -1 (32-bit, signed) to type 'uint16_t' (aka 'unsigned short') changed the value to 65535 (16-bit bitfield, unsigned) + x.f = min + 1; + x.f--; + + x.f = max; + x.f++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value 65536 (32-bit, signed) to type 'uint16_t' (aka 'unsigned short') changed the value to 0 (16-bit bitfield, unsigned) + x.f = max; + ++x.f; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 65536 (32-bit, signed) to type 'uint16_t' (aka 'unsigned short') changed the value to 0 (16-bit bitfield, unsigned) + x.f = max - 1; + x.f++; + + // Compound assignment + x.f = 0; + x.f += max; + x.f = 0; + x.f += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 65536 (32-bit, unsigned) to type 'uint16_t' (aka 'unsigned short') changed the value to 0 (16-bit bitfield, unsigned) + + x.f = max; + x.f -= max; + x.f = max; + x.f -= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 4294967295 (32-bit, unsigned) to type 'uint16_t' (aka 'unsigned short') changed the value to 65535 (16-bit bitfield, unsigned) +} + +void test_g() { + X x; + uint64_t min = 0; + uint64_t max = UINT32_MAX; + + uint64_t v64 = max + 1; + + // Assignment + x.g = v64; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint64_t' (aka 'unsigned long{{( long)?}}') of value 4294967296 (64-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (32-bit bitfield, unsigned) + + // PrePostIncDec + x.g = min; + x.g--; + x.g = min; + --x.g; + x.g = min + 1; + x.g--; + + x.g = max; + x.g++; + x.g = max; + ++x.g; + x.g = max - 1; + x.g++; + + // Compound assignment + x.g = 0; + x.g += max; + x.g = 0; + x.g += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint64_t' (aka 'unsigned long{{( long)?}}') of value 4294967296 (64-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (32-bit bitfield, unsigned) + + x.g = max; + x.g -= max; + x.g = max; + x.g -= (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint64_t' (aka 'unsigned long{{( long)?}}') of value 18446744073709551615 (64-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 4294967295 (32-bit bitfield, unsigned) +} + +void test_h() { + X x; + int32_t min = INT8_MIN; + int32_t max = INT8_MAX; + + int16_t v16 = max + 1; + int32_t v32 = max + 1; + + // Assignment + x.h = v16; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int16_t' (aka 'short') of value 128 (16-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -128 (8-bit bitfield, signed) + x.h = v32; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value 128 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -128 (8-bit bitfield, signed) + + // PrePostIncDec + x.h = min; + x.h--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to 127 (8-bit bitfield, signed) + x.h = min; + --x.h; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to 127 (8-bit bitfield, signed) + x.h = min + 1; + x.h--; + + x.h = max; + x.h++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -128 (8-bit bitfield, signed) + x.h = max; + ++x.h; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -128 (8-bit bitfield, signed) + x.h = max - 1; + x.h++; + + // Compound assignment + x.h = 0; + x.h += max; + x.h = 0; + x.h += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value 128 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to -128 (8-bit bitfield, signed) + + x.h = 0; + x.h -= (-min); + x.h = 0; + x.h -= (-min + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value -129 (32-bit, signed) to type 'int8_t' (aka '{{(signed )?}}char') changed the value to 127 (8-bit bitfield, signed) +} + +void test_i() { + X x; + int32_t min = INT16_MIN; + int32_t max = INT16_MAX; + + int32_t v32 = max + 1; + + // Assignment + x.i = v32; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value 32768 (32-bit, signed) to type 'int16_t' (aka 'short') changed the value to -32768 (16-bit bitfield, signed) + + // PrePostIncDec + x.i = min; + x.i--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value -32769 (32-bit, signed) to type 'int16_t' (aka 'short') changed the value to 32767 (16-bit bitfield, signed) + x.i = min; + --x.i; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value -32769 (32-bit, signed) to type 'int16_t' (aka 'short') changed the value to 32767 (16-bit bitfield, signed) + x.i = min + 1; + x.i--; + + x.i = max; + x.i++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int' of value 32768 (32-bit, signed) to type 'int16_t' (aka 'short') changed the value to -32768 (16-bit bitfield, signed) + x.i = max; + ++x.i; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:3: runtime error: implicit conversion from type 'int' of value 32768 (32-bit, signed) to type 'int16_t' (aka 'short') changed the value to -32768 (16-bit bitfield, signed) + x.i = max - 1; + x.i++; + + // Compound assignment + x.i = 0; + x.i += max; + x.i = 0; + x.i += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value 32768 (32-bit, signed) to type 'int16_t' (aka 'short') changed the value to -32768 (16-bit bitfield, signed) + + x.i = 0; + x.i -= (-min); + x.i = 0; + x.i -= (-min + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int' of value -32769 (32-bit, signed) to type 'int16_t' (aka 'short') changed the value to 32767 (16-bit bitfield, signed) +} + +void test_j() { + X x; + int64_t min = INT32_MIN; + int64_t max = INT32_MAX; + + int64_t v64 = max + 1; + + // Assignment + x.j = v64; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int64_t' (aka 'long{{( long)?}}') of value 2147483648 (64-bit, signed) to type 'int32_t' (aka 'int') changed the value to -2147483648 (32-bit bitfield, signed) + + // PrePostIncDec + x.j = min; + x.j--; + x.j = min; + --x.j; + x.j = min + 1; + x.j--; + + x.j = max; + x.j++; + x.j = max; + ++x.j; + x.j = max - 1; + x.j++; + + // Compound assignment + x.j = 0; + x.j += max; + x.j = 0; + x.j += (max + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int64_t' (aka 'long{{( long)?}}') of value 2147483648 (64-bit, signed) to type 'int32_t' (aka 'int') changed the value to -2147483648 (32-bit bitfield, signed) + + x.j = 0; + x.j -= (-min); + x.j = 0; + x.j -= (-min + 1); + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int64_t' (aka 'long{{( long)?}}') of value -2147483649 (64-bit, signed) to type 'int32_t' (aka 'int') changed the value to 2147483647 (32-bit bitfield, signed) +} + +void test_k_l() { + X x; + int32_t one = 1; + int32_t neg_one = -1; + + // k + uint8_t v8 = 2; + x.k = v8; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint8_t' (aka 'unsigned char') of value 2 (8-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (1-bit bitfield, unsigned) + x.k = one; + x.k = neg_one; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value -1 (32-bit, signed) to type 'uint32_t' (aka 'unsigned int') changed the value to 1 (1-bit bitfield, unsigned) + + x.k = 0; + x.k--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 4294967295 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 1 (1-bit bitfield, unsigned) + x.k = 1; + x.k--; + + x.k = 1; + x.k++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'uint32_t' (aka 'unsigned int') of value 2 (32-bit, unsigned) to type 'uint32_t' (aka 'unsigned int') changed the value to 0 (1-bit bitfield, unsigned) + x.k = 0; + x.k++; + + // l + x.l = v8; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'uint8_t' (aka 'unsigned char') of value 2 (8-bit, unsigned) to type 'int32_t' (aka 'int') changed the value to 0 (1-bit bitfield, signed) + x.l = one; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:7: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value 1 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to -1 (1-bit bitfield, signed) + x.l = neg_one; + + x.l = 0; + x.l--; + x.l = -1; + x.l--; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value -2 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to 0 (1-bit bitfield, signed) + + x.l = 0; + x.l++; + // CHECK: {{.*}}bitfield-conversion.c:[[@LINE-1]]:6: runtime error: implicit conversion from type 'int32_t' (aka 'int') of value 1 (32-bit, signed) to type 'int32_t' (aka 'int') changed the value to -1 (1-bit bitfield, signed) + x.l = -1; + x.l++; +} + +void test_m() { + X x; + + uint8_t v8 = 2; + x.m = v8; +} + +int main() { + test_a(); + test_b(); + test_c(); + test_d(); + test_e(); + test_f(); + test_g(); + test_h(); + test_i(); + test_j(); + test_k_l(); + test_m(); + return 0; +} diff --git a/flang/.gitignore b/flang/.gitignore index 4da4ee1178ba2..508e70c1e1eaf 100644 --- a/flang/.gitignore +++ b/flang/.gitignore @@ -5,7 +5,6 @@ build root tags TAGS -*.o .nfs* *.sw? *~ diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index 71141e5efac48..c8e75024823f2 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -81,12 +81,13 @@ if (FLANG_STANDALONE_BUILD) mark_as_advanced(LLVM_ENABLE_ASSERTIONS) endif() - # We need a pre-built/installed version of LLVM. - find_package(LLVM REQUIRED HINTS "${LLVM_CMAKE_DIR}") # If the user specifies a relative path to LLVM_DIR, the calls to include # LLVM modules fail. Append the absolute path to LLVM_DIR instead. - get_filename_component(LLVM_DIR_ABSOLUTE ${LLVM_DIR} REALPATH) + get_filename_component(LLVM_DIR_ABSOLUTE ${LLVM_DIR} + REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}) list(APPEND CMAKE_MODULE_PATH ${LLVM_DIR_ABSOLUTE}) + # We need a pre-built/installed version of LLVM. + find_package(LLVM REQUIRED HINTS "${LLVM_DIR_ABSOLUTE}") # Users might specify a path to CLANG_DIR that's: # * a full path, or @@ -97,7 +98,7 @@ if (FLANG_STANDALONE_BUILD) CLANG_DIR_ABSOLUTE ${CLANG_DIR} REALPATH - ${CMAKE_CURRENT_SOURCE_DIR}) + BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}) list(APPEND CMAKE_MODULE_PATH ${CLANG_DIR_ABSOLUTE}) # TODO: Remove when libclangDriver is lifted out of Clang @@ -124,13 +125,14 @@ if (FLANG_STANDALONE_BUILD) include(AddClang) include(TableGen) - find_package(MLIR REQUIRED CONFIG) - # Use SYSTEM for the same reasons as for LLVM includes - include_directories(SYSTEM ${MLIR_INCLUDE_DIRS}) # If the user specifies a relative path to MLIR_DIR, the calls to include # MLIR modules fail. Append the absolute path to MLIR_DIR instead. - get_filename_component(MLIR_DIR_ABSOLUTE ${MLIR_DIR} REALPATH) + get_filename_component(MLIR_DIR_ABSOLUTE ${MLIR_DIR} + REALPATH BASE_DIR ${CMAKE_CURRENT_BINARY_DIR}) list(APPEND CMAKE_MODULE_PATH ${MLIR_DIR_ABSOLUTE}) + find_package(MLIR REQUIRED CONFIG HINTS ${MLIR_DIR_ABSOLUTE}) + # Use SYSTEM for the same reasons as for LLVM includes + include_directories(SYSTEM ${MLIR_INCLUDE_DIRS}) include(AddMLIR) find_program(MLIR_TABLEGEN_EXE "mlir-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake b/flang/cmake/modules/AddFlangOffloadRuntime.cmake new file mode 100644 index 0000000000000..6fb6213e90fc4 --- /dev/null +++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake @@ -0,0 +1,132 @@ +option(FLANG_EXPERIMENTAL_CUDA_RUNTIME + "Compile Fortran runtime as CUDA sources (experimental)" OFF + ) + +set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation") + +set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING + "Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'") + +set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING + "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')") + +macro(enable_cuda_compilation files) + if (FLANG_EXPERIMENTAL_CUDA_RUNTIME) + if (BUILD_SHARED_LIBS) + message(FATAL_ERROR + "BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime" + ) + endif() + + enable_language(CUDA) + + # TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION + # work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION. + set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) + + # Treat all supported sources as CUDA files. + set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA) + set(CUDA_COMPILE_OPTIONS) + if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang") + # Allow varargs. + set(CUDA_COMPILE_OPTIONS + -Xclang -fcuda-allow-variadic-functions + ) + endif() + if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA") + set(CUDA_COMPILE_OPTIONS + --expt-relaxed-constexpr + # Disable these warnings: + # 'long double' is treated as 'double' in device code + -Xcudafe --diag_suppress=20208 + -Xcudafe --display_error_number + ) + endif() + set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS + "${CUDA_COMPILE_OPTIONS}" + ) + + if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include") + # When using libcudacxx headers files, we have to use them + # for all files of F18 runtime. + include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include) + add_compile_definitions(RT_USE_LIBCUDACXX=1) + endif() + endif() +endmacro() + +macro(enable_omp_offload_compilation files) + if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off") + # 'host_device' build only works with Clang compiler currently. + # The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use + # the in-tree built Clang. We may have a mode that would use the in-tree + # built Clang. + # + # 'nohost' is supposed to produce an LLVM Bitcode library, + # and it has to be done with a C/C++ compiler producing LLVM Bitcode + # compatible with the LLVM toolchain version distributed with the Flang + # compiler. + # In general, the in-tree built Clang should be used for 'nohost' build. + # Note that 'nohost' build does not produce the host version of Flang + # runtime library, so there will be two separate distributable objects. + # 'nohost' build is a TODO. + + if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device") + message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime") + endif() + if (BUILD_SHARED_LIBS) + message(FATAL_ERROR + "BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime" + ) + endif() + + if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND + "${CMAKE_C_COMPILER_ID}" MATCHES "Clang") + + set(all_amdgpu_architectures + "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906" + "gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030" + "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036" + "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151" + ) + set(all_nvptx_architectures + "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62" + "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90" + ) + set(all_gpu_architectures + "${all_amdgpu_architectures};${all_nvptx_architectures}" + ) + # TODO: support auto detection on the build system. + if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all") + set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures}) + endif() + list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES) + + string(REPLACE ";" "," compile_for_architectures + "${FLANG_OMP_DEVICE_ARCHITECTURES}" + ) + + set(OMP_COMPILE_OPTIONS + -fopenmp + -fvisibility=hidden + -fopenmp-cuda-mode + --offload-arch=${compile_for_architectures} + # Force LTO for the device part. + -foffload-lto + ) + set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS + "${OMP_COMPILE_OPTIONS}" + ) + + # Enable "declare target" in the source code. + set_source_files_properties(${files} + PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD + ) + else() + message(FATAL_ERROR + "Flang runtime build is not supported for these compilers:\n" + "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n" + "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}") + endif() + endif() +endmacro() diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md index 697bd131c04c7..9030207d9bda5 100644 --- a/flang/docs/Extensions.md +++ b/flang/docs/Extensions.md @@ -193,7 +193,9 @@ end converted. BOZ literals are interpreted as default INTEGER only when they appear as the first items of array constructors with no explicit type. Otherwise, they generally cannot be used if the type would - not be known (e.g., `IAND(X'1',X'2')`). + not be known (e.g., `IAND(X'1',X'2')`, or as arguments of `DIM`, `MOD`, + `MODULO`, and `SIGN`. Note that while other compilers may accept such usages, + the type resolution of such BOZ literals usages is highly non portable). * BOZ literals can also be used as REAL values in some contexts where the type is unambiguous, such as initializations of REAL parameters. * EQUIVALENCE of numeric and character sequences (a ubiquitous extension), @@ -306,9 +308,10 @@ end enforce it and the constraint is not necessary for a correct implementation. * A label may follow a semicolon in fixed form source. -* A scalar logical dummy argument to a `BIND(C)` procedure does - not have to have `KIND=C_BOOL` since it can be converted to/from - `_Bool` without loss of information. +* A logical dummy argument to a `BIND(C)` procedure, or a logical + component to a `BIND(C)` derived type does not have to have + `KIND=C_BOOL` since it can be converted to/from `_Bool` without + loss of information. * The character length of the `SOURCE=` or `MOLD=` in `ALLOCATE` may be distinct from the constant character length, if any, of an allocated object. @@ -344,6 +347,10 @@ end * A `NAMELIST` input group may begin with either `&` or `$`. * A comma in a fixed-width numeric input field terminates the field rather than signaling an invalid character error. +* Arguments to the intrinsic functions `MAX` and `MIN` are converted + when necessary to the type of the result. + An `OPTIONAL`, `POINTER`, or `ALLOCATABLE` argument after + the first two cannot be converted, as it may not be present. ### Extensions supported when enabled by options diff --git a/flang/docs/GettingStarted.md b/flang/docs/GettingStarted.md index 043804e5a1228..1c85a6754b155 100644 --- a/flang/docs/GettingStarted.md +++ b/flang/docs/GettingStarted.md @@ -304,6 +304,16 @@ Clang-like device linking pipeline. The same set of CMake variables works for Flang in-tree build. +### Build options + +One may provide optional CMake variables to customize the build. Available options: + +* `-DFLANG_RUNTIME_F128_MATH_LIB=libquadmath`: enables build of + `FortranFloat128Math` library that provides `REAL(16)` math APIs + for intrinsics such as `SIN`, `COS`, etc. GCC `libquadmath`'s header file + `quadmath.h` must be available to the build compiler. + [More details](Real16MathSupport.md). + ## Supported C++ compilers Flang is written in C++17. diff --git a/flang/docs/Real16MathSupport.md b/flang/docs/Real16MathSupport.md new file mode 100644 index 0000000000000..21482c7be21af --- /dev/null +++ b/flang/docs/Real16MathSupport.md @@ -0,0 +1,38 @@ + + +# Flang support for REAL(16) math intrinsics + +To support most `REAL(16)` (i.e. 128-bit float) math intrinsics Flang relies +on third-party libraries providing the implementation. + +`-DFLANG_RUNTIME_F128_MATH_LIB=libquadmath` CMake option can be used +to build `FortranFloat128Math` library that has unresolved references +to GCC `libquadmath` library. A Flang driver built with this option +will automatically link `FortranFloat128Math` and `libquadmath` libraries +to any Fortran program. This implies that `libquadmath` library +has to be available in the standard library paths, so that linker +can find it. The `libquadmath` library installation into Flang project +distribution is not automatic in CMake currently. + +Testing shows that `libquadmath` versions before GCC-9.3.0 have +accuracy issues, so it is recommended to distribute the Flang +package with later versions of `libquadmath`. + +Care must be taken by the distributors of a Flang package built +with `REAL(16)` support via `libquadmath` because of its licensing +under the GNU Library General Public License. Moreover, static linking +of `libquadmath` to the Flang users' programs may imply some +restrictions/requirements. This document is not intended to give +any legal advice on distributing such a Flang compiler. + +Flang compiler targeting systems with `LDBL_MANT_DIG == 113` +may provide `REAL(16)` math support without a `libquadmath` +dependency, using standard `libc` APIs for the `long double` +data type. It is not recommended to use the above CMake option +for building Flang compilers for such targets. diff --git a/flang/docs/index.md b/flang/docs/index.md index ed749f565ff1b..4a0b145df10b0 100644 --- a/flang/docs/index.md +++ b/flang/docs/index.md @@ -85,6 +85,7 @@ on how to get in touch with us and to learn more about the current status. Semantics f2018-grammar.md fstack-arrays + Real16MathSupport ``` # Indices and tables diff --git a/flang/include/flang/Common/Fortran.h b/flang/include/flang/Common/Fortran.h index ac1973fdff667..2a53452a2774f 100644 --- a/flang/include/flang/Common/Fortran.h +++ b/flang/include/flang/Common/Fortran.h @@ -85,7 +85,8 @@ static constexpr int maxRank{15}; ENUM_CLASS(CUDASubprogramAttrs, Host, Device, HostDevice, Global, Grid_Global) // CUDA data attributes; mutually exclusive -ENUM_CLASS(CUDADataAttr, Constant, Device, Managed, Pinned, Shared, Texture) +ENUM_CLASS( + CUDADataAttr, Constant, Device, Managed, Pinned, Shared, Texture, Unified) // OpenACC device types ENUM_CLASS( diff --git a/flang/include/flang/Common/windows-include.h b/flang/include/flang/Common/windows-include.h new file mode 100644 index 0000000000000..75ef4974251ff --- /dev/null +++ b/flang/include/flang/Common/windows-include.h @@ -0,0 +1,25 @@ +//===-- include/flang/Common/windows-include.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Wrapper around windows.h that works around the name conflicts. +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_COMMON_WINDOWS_INCLUDE_H_ +#define FORTRAN_COMMON_WINDOWS_INCLUDE_H_ + +#ifdef _WIN32 + +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX + +#include + +#endif // _WIN32 + +#endif // FORTRAN_COMMON_WINDOWS_INCLUDE_H_ diff --git a/flang/include/flang/Evaluate/common.h b/flang/include/flang/Evaluate/common.h index d04c901929e74..c2c7711c4684e 100644 --- a/flang/include/flang/Evaluate/common.h +++ b/flang/include/flang/Evaluate/common.h @@ -256,9 +256,11 @@ class FoldingContext { const common::LanguageFeatureControl &languageFeatures() const { return languageFeatures_; } - bool inModuleFile() const { return inModuleFile_; } - FoldingContext &set_inModuleFile(bool yes = true) { - inModuleFile_ = yes; + std::optional moduleFileName() const { + return moduleFileName_; + } + FoldingContext &set_moduleFileName(std::optional n) { + moduleFileName_ = n; return *this; } @@ -288,7 +290,7 @@ class FoldingContext { const IntrinsicProcTable &intrinsics_; const TargetCharacteristics &targetCharacteristics_; const semantics::DerivedTypeSpec *pdtInstance_{nullptr}; - bool inModuleFile_{false}; + std::optional moduleFileName_; std::map impliedDos_; const common::LanguageFeatureControl &languageFeatures_; std::set &tempNames_; diff --git a/flang/include/flang/Evaluate/integer.h b/flang/include/flang/Evaluate/integer.h index 7395645701265..b62e2bcb90f2f 100644 --- a/flang/include/flang/Evaluate/integer.h +++ b/flang/include/flang/Evaluate/integer.h @@ -27,7 +27,7 @@ #include #include -// Some environments, viz. glibc 2.17, allow the macro HUGE +// Some environments, viz. glibc 2.17 and *BSD, allow the macro HUGE // to leak out of . #undef HUGE diff --git a/flang/include/flang/Evaluate/real.h b/flang/include/flang/Evaluate/real.h index b7af0ff6b431c..6f2466c9da677 100644 --- a/flang/include/flang/Evaluate/real.h +++ b/flang/include/flang/Evaluate/real.h @@ -18,7 +18,7 @@ #include #include -// Some environments, viz. glibc 2.17, allow the macro HUGE +// Some environments, viz. glibc 2.17 and *BSD, allow the macro HUGE // to leak out of . #undef HUGE diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 8c872a0579c8e..ca14c144af2d6 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -19,6 +19,7 @@ #include "flang/Evaluate/type.h" #include "flang/Parser/message.h" #include "flang/Semantics/attr.h" +#include "flang/Semantics/scope.h" #include "flang/Semantics/symbol.h" #include #include @@ -1226,18 +1227,53 @@ bool CheckForCoindexedObject(parser::ContextualMessages &, const std::optional &, const std::string &procName, const std::string &argName); -/// Check if any of the symbols part of the expression has a cuda data -/// attribute. -inline bool HasCUDAAttrs(const Expr &expr) { +// Get the number of distinct symbols with CUDA attribute in the expression. +template inline int GetNbOfCUDASymbols(const A &expr) { + semantics::UnorderedSymbolSet symbols; for (const Symbol &sym : CollectSymbols(expr)) { if (const auto *details = sym.GetUltimate().detailsIf()) { if (details->cudaDataAttr()) { - return true; + symbols.insert(sym); } } } - return false; + return symbols.size(); +} + +// Check if any of the symbols part of the expression has a CUDA data +// attribute. +template inline bool HasCUDAAttrs(const A &expr) { + return GetNbOfCUDASymbols(expr) > 0; +} + +/// Check if the expression is a mix of host and device variables that require +/// implicit data transfer. +inline bool HasCUDAImplicitTransfer(const Expr &expr) { + unsigned hostSymbols{0}; + unsigned deviceSymbols{0}; + for (const Symbol &sym : CollectSymbols(expr)) { + if (const auto *details = + sym.GetUltimate().detailsIf()) { + if (details->cudaDataAttr()) { + ++deviceSymbols; + } else { + if (sym.owner().IsDerivedType()) { + if (const auto *details = + sym.owner() + .GetSymbol() + ->GetUltimate() + .detailsIf()) { + if (details->cudaDataAttr()) { + ++deviceSymbols; + } + } + } + ++hostSymbols; + } + } + } + return hostSymbols > 0 && deviceSymbols > 0; } } // namespace Fortran::evaluate diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index 940866b25d2fe..e4c954159f71b 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -164,9 +164,22 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { mlir::Value createNullConstant(mlir::Location loc, mlir::Type ptrType = {}); /// Create an integer constant of type \p type and value \p i. + /// Should not be used with negative values with integer types of more + /// than 64 bits. mlir::Value createIntegerConstant(mlir::Location loc, mlir::Type integerType, std::int64_t i); + /// Create an integer of \p integerType where all the bits have been set to + /// ones. Safe to use regardless of integerType bitwidth. + mlir::Value createAllOnesInteger(mlir::Location loc, mlir::Type integerType); + + /// Create -1 constant of \p integerType. Safe to use regardless of + /// integerType bitwidth. + mlir::Value createMinusOneInteger(mlir::Location loc, + mlir::Type integerType) { + return createAllOnesInteger(loc, integerType); + } + /// Create a real constant from an integer value. mlir::Value createRealConstant(mlir::Location loc, mlir::Type realType, llvm::APFloat::integerPart val); diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h index f29e44504acb6..3266ea3aa7fdc 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h +++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h @@ -155,13 +155,7 @@ bool valueMayHaveFirAttributes(mlir::Value value, bool anyFuncArgsHaveAttr(mlir::func::FuncOp func, llvm::StringRef attr); /// Unwrap integer constant from an mlir::Value. -inline std::optional getIntIfConstant(mlir::Value value) { - if (auto *definingOp = value.getDefiningOp()) - if (auto cst = mlir::dyn_cast(definingOp)) - if (auto intAttr = cst.getValue().dyn_cast()) - return intAttr.getInt(); - return {}; -} +std::optional getIntIfConstant(mlir::Value value); static constexpr llvm::StringRef getAdaptToByRefAttrName() { return "adapt.valuebyref"; diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h index 7a8a34c25ce95..2b4fa50e0e421 100644 --- a/flang/include/flang/Optimizer/Support/Utils.h +++ b/flang/include/flang/Optimizer/Support/Utils.h @@ -158,6 +158,9 @@ getCUDADataAttribute(mlir::MLIRContext *mlirContext, case Fortran::common::CUDADataAttr::Texture: // Obsolete attribute return {}; + case Fortran::common::CUDADataAttr::Unified: + attr = fir::CUDADataAttribute::Unified; + break; } return fir::CUDADataAttributeAttr::get(mlirContext, attr); } diff --git a/flang/include/flang/Parser/char-block.h b/flang/include/flang/Parser/char-block.h index acd8aee98bf8d..38f4f7b82e1ea 100644 --- a/flang/include/flang/Parser/char-block.h +++ b/flang/include/flang/Parser/char-block.h @@ -132,17 +132,20 @@ class CharBlock { // "memcmp" in glibc has "nonnull" attributes on the input pointers. // Avoid passing null pointers, since it would result in an undefined // behavior. - if (size() == 0) + if (size() == 0) { return that.size() == 0 ? 0 : -1; - if (that.size() == 0) + } else if (that.size() == 0) { return 1; - std::size_t bytes{std::min(size(), that.size())}; - int cmp{std::memcmp(static_cast(begin()), - static_cast(that.begin()), bytes)}; - if (cmp != 0) { - return cmp; + } else { + std::size_t bytes{std::min(size(), that.size())}; + int cmp{std::memcmp(static_cast(begin()), + static_cast(that.begin()), bytes)}; + if (cmp != 0) { + return cmp; + } else { + return size() < that.size() ? -1 : size() > that.size(); + } } - return size() < that.size() ? -1 : size() > that.size(); } int Compare(const char *that) const { diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 26b2e5f4e34b0..574a95cf22afd 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -991,7 +991,7 @@ struct ComponentArraySpec { // access-spec | ALLOCATABLE | // CODIMENSION lbracket coarray-spec rbracket | // CONTIGUOUS | DIMENSION ( component-array-spec ) | POINTER | -// (CUDA) CONSTANT | DEVICE | MANAGED | PINNED | SHARED | TEXTURE +// (CUDA) CONSTANT | DEVICE | MANAGED | PINNED | SHARED | TEXTURE | UNIFIED EMPTY_CLASS(Allocatable); EMPTY_CLASS(Pointer); EMPTY_CLASS(Contiguous); @@ -1097,7 +1097,8 @@ struct ProcComponentDefStmt { // R736 component-def-stmt -> data-component-def-stmt | proc-component-def-stmt struct ComponentDefStmt { UNION_CLASS_BOILERPLATE(ComponentDefStmt); - std::variant, ErrorRecovery // , TypeParamDefStmt -- PGI accidental extension, not enabled > u; diff --git a/flang/runtime/freestanding-tools.h b/flang/include/flang/Runtime/freestanding-tools.h similarity index 81% rename from flang/runtime/freestanding-tools.h rename to flang/include/flang/Runtime/freestanding-tools.h index 451bf13b9fa6d..e94cb0a6c938c 100644 --- a/flang/runtime/freestanding-tools.h +++ b/flang/include/flang/Runtime/freestanding-tools.h @@ -1,4 +1,4 @@ -//===-- runtime/freestanding-tools.h ----------------------------*- C++ -*-===// +//===-- include/flang/Runtime/freestanding-tools.h --------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -12,6 +12,7 @@ #include "flang/Common/api-attrs.h" #include "flang/Runtime/c-or-cpp.h" #include +#include #include // The file defines a set of utilities/classes that might be @@ -52,6 +53,16 @@ #define STD_STRCPY_UNSUPPORTED 1 #endif +#if !defined(STD_STRCMP_UNSUPPORTED) && \ + (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) +#define STD_STRCMP_UNSUPPORTED 1 +#endif + +#if !defined(STD_TOUPPER_UNSUPPORTED) && \ + (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) +#define STD_TOUPPER_UNSUPPORTED 1 +#endif + namespace Fortran::runtime { #if STD_FILL_N_UNSUPPORTED @@ -176,5 +187,32 @@ static inline RT_API_ATTRS char *strcpy(char *dest, const char *src) { using std::strcpy; #endif // !STD_STRCPY_UNSUPPORTED +#if STD_STRCMP_UNSUPPORTED +// Provides alternative implementation for std::strcmp(), if +// it is not supported. +static inline RT_API_ATTRS int strcmp(const char *lhs, const char *rhs) { + while (*lhs != '\0' && *lhs == *rhs) { + ++lhs; + ++rhs; + } + return static_cast(*lhs) - static_cast(*rhs); +} +#else // !STD_STRCMP_UNSUPPORTED +using std::strcmp; +#endif // !STD_STRCMP_UNSUPPORTED + +#if STD_TOUPPER_UNSUPPORTED +// Provides alternative implementation for std::toupper(), if +// it is not supported. +static inline RT_API_ATTRS int toupper(int ch) { + if (ch >= 'a' && ch <= 'z') { + return ch - 'a' + 'A'; + } + return ch; +} +#else // !STD_TOUPPER_UNSUPPORTED +using std::toupper; +#endif // !STD_TOUPPER_UNSUPPORTED + } // namespace Fortran::runtime #endif // FORTRAN_RUNTIME_FREESTANDING_TOOLS_H_ diff --git a/flang/include/flang/Runtime/io-api.h b/flang/include/flang/Runtime/io-api.h index 1b6c4f5d6a65c..328afc715a3f1 100644 --- a/flang/include/flang/Runtime/io-api.h +++ b/flang/include/flang/Runtime/io-api.h @@ -92,18 +92,18 @@ constexpr std::size_t RecommendedInternalIoScratchAreaBytes( // Internal I/O to/from character arrays &/or non-default-kind character // requires a descriptor, which is copied. -Cookie IONAME(BeginInternalArrayListOutput)(const Descriptor &, +Cookie IODECL(BeginInternalArrayListOutput)(const Descriptor &, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalArrayListInput)(const Descriptor &, +Cookie IODECL(BeginInternalArrayListInput)(const Descriptor &, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &, +Cookie IODECL(BeginInternalArrayFormattedOutput)(const Descriptor &, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &, +Cookie IODECL(BeginInternalArrayFormattedInput)(const Descriptor &, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, @@ -111,20 +111,20 @@ Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &, // Internal I/O to/from a default-kind character scalar can avoid a // descriptor. -Cookie IONAME(BeginInternalListOutput)(char *internal, +Cookie IODECL(BeginInternalListOutput)(char *internal, std::size_t internalLength, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalListInput)(const char *internal, +Cookie IODECL(BeginInternalListInput)(const char *internal, std::size_t internalLength, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalFormattedOutput)(char *internal, +Cookie IODECL(BeginInternalFormattedOutput)(char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalFormattedInput)(const char *internal, +Cookie IODECL(BeginInternalFormattedInput)(const char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, @@ -139,63 +139,63 @@ Cookie IONAME(BeginInternalFormattedInput)(const char *internal, // If handleError is false, and the unit number is out of range, the program // will be terminated. Otherwise, if unit is out of range, a nonzero Iostat // code is returned and ioMsg is set if it is not a nullptr. -enum Iostat IONAME(CheckUnitNumberInRange64)(std::int64_t unit, +enum Iostat IODECL(CheckUnitNumberInRange64)(std::int64_t unit, bool handleError, char *ioMsg = nullptr, std::size_t ioMsgLength = 0, const char *sourceFile = nullptr, int sourceLine = 0); -enum Iostat IONAME(CheckUnitNumberInRange128)(common::int128_t unit, +enum Iostat IODECL(CheckUnitNumberInRange128)(common::int128_t unit, bool handleError, char *ioMsg = nullptr, std::size_t ioMsgLength = 0, const char *sourceFile = nullptr, int sourceLine = 0); // External synchronous I/O initiation Cookie IODECL(BeginExternalListOutput)(ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginExternalListInput)(ExternalUnit = DefaultInputUnit, +Cookie IODECL(BeginExternalListInput)(ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginExternalFormattedOutput)(const char *format, std::size_t, +Cookie IODECL(BeginExternalFormattedOutput)(const char *format, std::size_t, const Descriptor *formatDescriptor = nullptr, ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginExternalFormattedInput)(const char *format, std::size_t, +Cookie IODECL(BeginExternalFormattedInput)(const char *format, std::size_t, const Descriptor *formatDescriptor = nullptr, ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginUnformattedOutput)(ExternalUnit = DefaultOutputUnit, +Cookie IODECL(BeginUnformattedOutput)(ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginUnformattedInput)(ExternalUnit = DefaultInputUnit, +Cookie IODECL(BeginUnformattedInput)(ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); // WAIT(ID=) -Cookie IONAME(BeginWait)(ExternalUnit, AsynchronousId, +Cookie IODECL(BeginWait)(ExternalUnit, AsynchronousId, const char *sourceFile = nullptr, int sourceLine = 0); // WAIT(no ID=) -Cookie IONAME(BeginWaitAll)( +Cookie IODECL(BeginWaitAll)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); // Other I/O statements -Cookie IONAME(BeginClose)( +Cookie IODECL(BeginClose)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginFlush)( +Cookie IODECL(BeginFlush)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginBackspace)( +Cookie IODECL(BeginBackspace)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginEndfile)( +Cookie IODECL(BeginEndfile)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginRewind)( +Cookie IODECL(BeginRewind)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); // OPEN(UNIT=) and OPEN(NEWUNIT=) have distinct interfaces. -Cookie IONAME(BeginOpenUnit)( +Cookie IODECL(BeginOpenUnit)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginOpenNewUnit)( +Cookie IODECL(BeginOpenNewUnit)( const char *sourceFile = nullptr, int sourceLine = 0); // The variant forms of INQUIRE() statements have distinct interfaces. // BeginInquireIoLength() is basically a no-op output statement. -Cookie IONAME(BeginInquireUnit)( +Cookie IODECL(BeginInquireUnit)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInquireFile)(const char *, std::size_t, +Cookie IODECL(BeginInquireFile)(const char *, std::size_t, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInquireIoLength)( +Cookie IODECL(BeginInquireIoLength)( const char *sourceFile = nullptr, int sourceLine = 0); // If an I/O statement has any IOSTAT=, ERR=, END=, or EOR= specifiers, @@ -214,33 +214,33 @@ Cookie IONAME(BeginInquireIoLength)( // } // } // if (EndIoStatement(cookie) == FORTRAN_RUTIME_IOSTAT_END) goto label666; -void IONAME(EnableHandlers)(Cookie, bool hasIoStat = false, bool hasErr = false, +void IODECL(EnableHandlers)(Cookie, bool hasIoStat = false, bool hasErr = false, bool hasEnd = false, bool hasEor = false, bool hasIoMsg = false); // ASYNCHRONOUS='YES' or 'NO' on READ/WRITE/OPEN // Use GetAsynchronousId() to handle ID=. -bool IONAME(SetAsynchronous)(Cookie, const char *, std::size_t); +bool IODECL(SetAsynchronous)(Cookie, const char *, std::size_t); // Control list options. These return false on a error that the // Begin...() call has specified will be handled by the caller. // The interfaces that pass a default-kind CHARACTER argument // are limited to passing specific case-insensitive keyword values. // ADVANCE=YES, NO -bool IONAME(SetAdvance)(Cookie, const char *, std::size_t); +bool IODECL(SetAdvance)(Cookie, const char *, std::size_t); // BLANK=NULL, ZERO -bool IONAME(SetBlank)(Cookie, const char *, std::size_t); +bool IODECL(SetBlank)(Cookie, const char *, std::size_t); // DECIMAL=COMMA, POINT -bool IONAME(SetDecimal)(Cookie, const char *, std::size_t); +bool IODECL(SetDecimal)(Cookie, const char *, std::size_t); // DELIM=APOSTROPHE, QUOTE, NONE -bool IONAME(SetDelim)(Cookie, const char *, std::size_t); +bool IODECL(SetDelim)(Cookie, const char *, std::size_t); // PAD=YES, NO -bool IONAME(SetPad)(Cookie, const char *, std::size_t); -bool IONAME(SetPos)(Cookie, std::int64_t); -bool IONAME(SetRec)(Cookie, std::int64_t); +bool IODECL(SetPad)(Cookie, const char *, std::size_t); +bool IODECL(SetPos)(Cookie, std::int64_t); +bool IODECL(SetRec)(Cookie, std::int64_t); // ROUND=UP, DOWN, ZERO, NEAREST, COMPATIBLE, PROCESSOR_DEFINED -bool IONAME(SetRound)(Cookie, const char *, std::size_t); +bool IODECL(SetRound)(Cookie, const char *, std::size_t); // SIGN=PLUS, SUPPRESS, PROCESSOR_DEFINED -bool IONAME(SetSign)(Cookie, const char *, std::size_t); +bool IODECL(SetSign)(Cookie, const char *, std::size_t); // Data item transfer for modes other than NAMELIST: // Any data object that can be passed as an actual argument without the @@ -256,34 +256,34 @@ bool IONAME(SetSign)(Cookie, const char *, std::size_t); // Once the statement has encountered an error, all following items will be // ignored and also return false; but compiled code should check for errors // and avoid the following items when they might crash. -bool IONAME(OutputDescriptor)(Cookie, const Descriptor &); -bool IONAME(InputDescriptor)(Cookie, const Descriptor &); +bool IODECL(OutputDescriptor)(Cookie, const Descriptor &); +bool IODECL(InputDescriptor)(Cookie, const Descriptor &); // Formatted (including list directed) I/O data items -bool IONAME(OutputInteger8)(Cookie, std::int8_t); -bool IONAME(OutputInteger16)(Cookie, std::int16_t); +bool IODECL(OutputInteger8)(Cookie, std::int8_t); +bool IODECL(OutputInteger16)(Cookie, std::int16_t); bool IODECL(OutputInteger32)(Cookie, std::int32_t); -bool IONAME(OutputInteger64)(Cookie, std::int64_t); -bool IONAME(OutputInteger128)(Cookie, common::int128_t); -bool IONAME(InputInteger)(Cookie, std::int64_t &, int kind = 8); -bool IONAME(OutputReal32)(Cookie, float); -bool IONAME(InputReal32)(Cookie, float &); -bool IONAME(OutputReal64)(Cookie, double); -bool IONAME(InputReal64)(Cookie, double &); -bool IONAME(OutputComplex32)(Cookie, float, float); -bool IONAME(InputComplex32)(Cookie, float[2]); -bool IONAME(OutputComplex64)(Cookie, double, double); -bool IONAME(InputComplex64)(Cookie, double[2]); -bool IONAME(OutputCharacter)(Cookie, const char *, std::size_t, int kind = 1); -bool IONAME(OutputAscii)(Cookie, const char *, std::size_t); -bool IONAME(InputCharacter)(Cookie, char *, std::size_t, int kind = 1); -bool IONAME(InputAscii)(Cookie, char *, std::size_t); -bool IONAME(OutputLogical)(Cookie, bool); -bool IONAME(InputLogical)(Cookie, bool &); +bool IODECL(OutputInteger64)(Cookie, std::int64_t); +bool IODECL(OutputInteger128)(Cookie, common::int128_t); +bool IODECL(InputInteger)(Cookie, std::int64_t &, int kind = 8); +bool IODECL(OutputReal32)(Cookie, float); +bool IODECL(InputReal32)(Cookie, float &); +bool IODECL(OutputReal64)(Cookie, double); +bool IODECL(InputReal64)(Cookie, double &); +bool IODECL(OutputComplex32)(Cookie, float, float); +bool IODECL(InputComplex32)(Cookie, float[2]); +bool IODECL(OutputComplex64)(Cookie, double, double); +bool IODECL(InputComplex64)(Cookie, double[2]); +bool IODECL(OutputCharacter)(Cookie, const char *, std::size_t, int kind = 1); +bool IODECL(OutputAscii)(Cookie, const char *, std::size_t); +bool IODECL(InputCharacter)(Cookie, char *, std::size_t, int kind = 1); +bool IODECL(InputAscii)(Cookie, char *, std::size_t); +bool IODECL(OutputLogical)(Cookie, bool); +bool IODECL(InputLogical)(Cookie, bool &); // NAMELIST I/O must be the only data item in an (otherwise) // list-directed I/O statement. -bool IONAME(OutputNamelist)(Cookie, const NamelistGroup &); -bool IONAME(InputNamelist)(Cookie, const NamelistGroup &); +bool IODECL(OutputNamelist)(Cookie, const NamelistGroup &); +bool IODECL(InputNamelist)(Cookie, const NamelistGroup &); // When an I/O list item has a derived type with a specific defined // I/O subroutine of the appropriate generic kind for the active @@ -294,9 +294,9 @@ bool IONAME(InputNamelist)(Cookie, const NamelistGroup &); // made such a generic interface inaccessible), these data item transfer // APIs enable the I/O runtime to make the right calls to defined I/O // subroutines. -bool IONAME(OutputDerivedType)( +bool IODECL(OutputDerivedType)( Cookie, const Descriptor &, const NonTbpDefinedIoTable *); -bool IONAME(InputDerivedType)( +bool IODECL(InputDerivedType)( Cookie, const Descriptor &, const NonTbpDefinedIoTable *); // Additional specifier interfaces for the connection-list of @@ -304,56 +304,56 @@ bool IONAME(InputDerivedType)( // SetDelim(), GetIoMsg(), SetPad(), SetRound(), SetSign(), // & SetAsynchronous() are also acceptable for OPEN. // ACCESS=SEQUENTIAL, DIRECT, STREAM -bool IONAME(SetAccess)(Cookie, const char *, std::size_t); +bool IODECL(SetAccess)(Cookie, const char *, std::size_t); // ACTION=READ, WRITE, or READWRITE -bool IONAME(SetAction)(Cookie, const char *, std::size_t); +bool IODECL(SetAction)(Cookie, const char *, std::size_t); // CARRIAGECONTROL=LIST, FORTRAN, NONE -bool IONAME(SetCarriagecontrol)(Cookie, const char *, std::size_t); +bool IODECL(SetCarriagecontrol)(Cookie, const char *, std::size_t); // CONVERT=NATIVE, LITTLE_ENDIAN, BIG_ENDIAN, or SWAP -bool IONAME(SetConvert)(Cookie, const char *, std::size_t); +bool IODECL(SetConvert)(Cookie, const char *, std::size_t); // ENCODING=UTF-8, DEFAULT -bool IONAME(SetEncoding)(Cookie, const char *, std::size_t); +bool IODECL(SetEncoding)(Cookie, const char *, std::size_t); // FORM=FORMATTED, UNFORMATTED -bool IONAME(SetForm)(Cookie, const char *, std::size_t); +bool IODECL(SetForm)(Cookie, const char *, std::size_t); // POSITION=ASIS, REWIND, APPEND -bool IONAME(SetPosition)(Cookie, const char *, std::size_t); -bool IONAME(SetRecl)(Cookie, std::size_t); // RECL= +bool IODECL(SetPosition)(Cookie, const char *, std::size_t); +bool IODECL(SetRecl)(Cookie, std::size_t); // RECL= // STATUS can be set during an OPEN or CLOSE statement. // For OPEN: STATUS=OLD, NEW, SCRATCH, REPLACE, UNKNOWN // For CLOSE: STATUS=KEEP, DELETE -bool IONAME(SetStatus)(Cookie, const char *, std::size_t); +bool IODECL(SetStatus)(Cookie, const char *, std::size_t); -bool IONAME(SetFile)(Cookie, const char *, std::size_t chars); +bool IODECL(SetFile)(Cookie, const char *, std::size_t chars); // Acquires the runtime-created unit number for OPEN(NEWUNIT=) -bool IONAME(GetNewUnit)(Cookie, int &, int kind = 4); +bool IODECL(GetNewUnit)(Cookie, int &, int kind = 4); // READ(SIZE=), after all input items -std::size_t IONAME(GetSize)(Cookie); +std::size_t IODECL(GetSize)(Cookie); // INQUIRE(IOLENGTH=), after all output items -std::size_t IONAME(GetIoLength)(Cookie); +std::size_t IODECL(GetIoLength)(Cookie); // GetIoMsg() does not modify its argument unless an error or // end-of-record/file condition is present. -void IONAME(GetIoMsg)(Cookie, char *, std::size_t); // IOMSG= +void IODECL(GetIoMsg)(Cookie, char *, std::size_t); // IOMSG= // Defines ID= on READ/WRITE(ASYNCHRONOUS='YES') -AsynchronousId IONAME(GetAsynchronousId)(Cookie); +AsynchronousId IODECL(GetAsynchronousId)(Cookie); // INQUIRE() specifiers are mostly identified by their NUL-terminated // case-insensitive names. // ACCESS, ACTION, ASYNCHRONOUS, BLANK, CONVERT, DECIMAL, DELIM, DIRECT, // ENCODING, FORM, FORMATTED, NAME, PAD, POSITION, READ, READWRITE, ROUND, // SEQUENTIAL, SIGN, STREAM, UNFORMATTED, WRITE: -bool IONAME(InquireCharacter)(Cookie, InquiryKeywordHash, char *, std::size_t); +bool IODECL(InquireCharacter)(Cookie, InquiryKeywordHash, char *, std::size_t); // EXIST, NAMED, OPENED, and PENDING (without ID): -bool IONAME(InquireLogical)(Cookie, InquiryKeywordHash, bool &); +bool IODECL(InquireLogical)(Cookie, InquiryKeywordHash, bool &); // PENDING with ID -bool IONAME(InquirePendingId)(Cookie, AsynchronousId, bool &); +bool IODECL(InquirePendingId)(Cookie, AsynchronousId, bool &); // NEXTREC, NUMBER, POS, RECL, SIZE -bool IONAME(InquireInteger64)( +bool IODECL(InquireInteger64)( Cookie, InquiryKeywordHash, std::int64_t &, int kind = 8); // This function must be called to end an I/O statement, and its diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index f0eb82eebefa3..da10969ebc702 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -212,6 +212,16 @@ inline bool IsCUDADeviceContext(const Scope *scope) { return false; } +inline bool HasCUDAAttr(const Symbol &sym) { + if (const auto *details{ + sym.GetUltimate().detailsIf()}) { + if (details->cudaDataAttr()) { + return true; + } + } + return false; +} + const Scope *FindCUDADeviceContext(const Scope *); std::optional GetCUDADataAttr(const Symbol *); diff --git a/flang/lib/Decimal/CMakeLists.txt b/flang/lib/Decimal/CMakeLists.txt index 2f6caa22e1562..3d562b8e3ce1e 100644 --- a/flang/lib/Decimal/CMakeLists.txt +++ b/flang/lib/Decimal/CMakeLists.txt @@ -49,11 +49,17 @@ endif() # avoid an unwanted dependency on libstdc++.so. add_definitions(-U_GLIBCXX_ASSERTIONS) -add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN +set(sources binary-to-decimal.cpp decimal-to-binary.cpp ) +include(AddFlangOffloadRuntime) +enable_cuda_compilation("${sources}") +enable_omp_offload_compilation("${sources}") + +add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN ${sources}) + if (DEFINED MSVC) set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded) add_flang_library(FortranDecimal.static INSTALL_WITH_TOOLCHAIN @@ -77,4 +83,4 @@ if (DEFINED MSVC) ) add_dependencies(FortranDecimal FortranDecimal.static FortranDecimal.dynamic FortranDecimal.static_dbg FortranDecimal.dynamic_dbg) -endif() \ No newline at end of file +endif() diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h index 2143d1d9b3f77..6ce8ae7925c15 100644 --- a/flang/lib/Decimal/big-radix-floating-point.h +++ b/flang/lib/Decimal/big-radix-floating-point.h @@ -30,6 +30,10 @@ #include #include +// Some environments, viz. glibc 2.17, allow the macro HUGE +// to leak out of . +#undef HUGE + namespace Fortran::decimal { static constexpr std::uint64_t TenToThe(int power) { @@ -64,15 +68,15 @@ template class BigRadixFloatingPointNumber { static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix}; public: - explicit BigRadixFloatingPointNumber( + explicit RT_API_ATTRS BigRadixFloatingPointNumber( enum FortranRounding rounding = RoundNearest) : rounding_{rounding} {} // Converts a binary floating point value. - explicit BigRadixFloatingPointNumber( + explicit RT_API_ATTRS BigRadixFloatingPointNumber( Real, enum FortranRounding = RoundNearest); - BigRadixFloatingPointNumber &SetToZero() { + RT_API_ATTRS BigRadixFloatingPointNumber &SetToZero() { isNegative_ = false; digits_ = 0; exponent_ = 0; @@ -80,14 +84,14 @@ template class BigRadixFloatingPointNumber { } // Converts decimal floating-point to binary. - ConversionToBinaryResult ConvertToBinary(); + RT_API_ATTRS ConversionToBinaryResult ConvertToBinary(); // Parses and converts to binary. Handles leading spaces, // "NaN", & optionally-signed "Inf". Does not skip internal // spaces. // The argument is a reference to a pointer that is left // pointing to the first character that wasn't parsed. - ConversionToBinaryResult ConvertToBinary( + RT_API_ATTRS ConversionToBinaryResult ConvertToBinary( const char *&, const char *end = nullptr); // Formats a decimal floating-point number to a user buffer. @@ -96,7 +100,7 @@ template class BigRadixFloatingPointNumber { // after the last digit; the effective decimal exponent is // returned as part of the result structure so that it can be // formatted by the client. - ConversionToDecimalResult ConvertToDecimal( + RT_API_ATTRS ConversionToDecimalResult ConvertToDecimal( char *, std::size_t, enum DecimalConversionFlags, int digits) const; // Discard decimal digits not needed to distinguish this value @@ -108,13 +112,14 @@ template class BigRadixFloatingPointNumber { // This minimization necessarily assumes that the value will be // emitted and read back into the same (or less precise) format // with default rounding to the nearest value. - void Minimize( + RT_API_ATTRS void Minimize( BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more); template STREAM &Dump(STREAM &) const; private: - BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that) + RT_API_ATTRS BigRadixFloatingPointNumber( + const BigRadixFloatingPointNumber &that) : digits_{that.digits_}, exponent_{that.exponent_}, isNegative_{that.isNegative_}, rounding_{that.rounding_} { for (int j{0}; j < digits_; ++j) { @@ -122,7 +127,7 @@ template class BigRadixFloatingPointNumber { } } - bool IsZero() const { + RT_API_ATTRS bool IsZero() const { // Don't assume normalization. for (int j{0}; j < digits_; ++j) { if (digit_[j] != 0) { @@ -136,13 +141,13 @@ template class BigRadixFloatingPointNumber { // (When this happens during decimal-to-binary conversion, // there are more digits in the input string than can be // represented precisely.) - bool IsFull() const { + RT_API_ATTRS bool IsFull() const { return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10; } // Sets *this to an unsigned integer value. // Returns any remainder. - template UINT SetTo(UINT n) { + template RT_API_ATTRS UINT SetTo(UINT n) { static_assert( std::is_same_v || std::is_unsigned_v); SetToZero(); @@ -169,7 +174,7 @@ template class BigRadixFloatingPointNumber { } } - int RemoveLeastOrderZeroDigits() { + RT_API_ATTRS int RemoveLeastOrderZeroDigits() { int remove{0}; if (digits_ > 0 && digit_[0] == 0) { while (remove < digits_ && digit_[remove] == 0) { @@ -193,25 +198,25 @@ template class BigRadixFloatingPointNumber { return remove; } - void RemoveLeadingZeroDigits() { + RT_API_ATTRS void RemoveLeadingZeroDigits() { while (digits_ > 0 && digit_[digits_ - 1] == 0) { --digits_; } } - void Normalize() { + RT_API_ATTRS void Normalize() { RemoveLeadingZeroDigits(); exponent_ += RemoveLeastOrderZeroDigits() * log10Radix; } // This limited divisibility test only works for even divisors of the radix, // which is fine since it's only ever used with 2 and 5. - template bool IsDivisibleBy() const { + template RT_API_ATTRS bool IsDivisibleBy() const { static_assert(N > 1 && radix % N == 0, "bad modulus"); return digits_ == 0 || (digit_[0] % N) == 0; } - template int DivideBy() { + template RT_API_ATTRS int DivideBy() { Digit remainder{0}; for (int j{digits_ - 1}; j >= 0; --j) { Digit q{digit_[j] / DIVISOR}; @@ -222,7 +227,7 @@ template class BigRadixFloatingPointNumber { return remainder; } - void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix + RT_API_ATTRS void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix Digit remainder{0}; auto mask{(Digit{1} << twoPow) - 1}; auto coeff{radix >> twoPow}; @@ -234,7 +239,7 @@ template class BigRadixFloatingPointNumber { } // Returns true on overflow - bool DivideByPowerOfTwoInPlace(int twoPow) { + RT_API_ATTRS bool DivideByPowerOfTwoInPlace(int twoPow) { if (digits_ > 0) { while (twoPow > 0) { int chunk{twoPow > log10Radix ? log10Radix : twoPow}; @@ -264,7 +269,7 @@ template class BigRadixFloatingPointNumber { return false; // no overflow } - int AddCarry(int position = 0, int carry = 1) { + RT_API_ATTRS int AddCarry(int position = 0, int carry = 1) { for (; position < digits_; ++position) { Digit v{digit_[position] + carry}; if (v < radix) { @@ -286,13 +291,13 @@ template class BigRadixFloatingPointNumber { return carry; } - void Decrement() { + RT_API_ATTRS void Decrement() { for (int j{0}; digit_[j]-- == 0; ++j) { digit_[j] = radix - 1; } } - template int MultiplyByHelper(int carry = 0) { + template RT_API_ATTRS int MultiplyByHelper(int carry = 0) { for (int j{0}; j < digits_; ++j) { auto v{N * digit_[j] + carry}; carry = v / radix; @@ -301,7 +306,7 @@ template class BigRadixFloatingPointNumber { return carry; } - template int MultiplyBy(int carry = 0) { + template RT_API_ATTRS int MultiplyBy(int carry = 0) { if (int newCarry{MultiplyByHelper(carry)}) { return AddCarry(digits_, newCarry); } else { @@ -309,7 +314,7 @@ template class BigRadixFloatingPointNumber { } } - template int MultiplyWithoutNormalization() { + template RT_API_ATTRS int MultiplyWithoutNormalization() { if (int carry{MultiplyByHelper(0)}) { if (digits_ < digitLimit_) { digit_[digits_++] = carry; @@ -322,9 +327,9 @@ template class BigRadixFloatingPointNumber { } } - void LoseLeastSignificantDigit(); // with rounding + RT_API_ATTRS void LoseLeastSignificantDigit(); // with rounding - void PushCarry(int carry) { + RT_API_ATTRS void PushCarry(int carry) { if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) { LoseLeastSignificantDigit(); digit_[digits_ - 1] += carry; @@ -336,18 +341,20 @@ template class BigRadixFloatingPointNumber { // Adds another number and then divides by two. // Assumes same exponent and sign. // Returns true when the result has effectively been rounded down. - bool Mean(const BigRadixFloatingPointNumber &); + RT_API_ATTRS bool Mean(const BigRadixFloatingPointNumber &); // Parses a floating-point number; leaves the pointer reference // argument pointing at the next character after what was recognized. // The "end" argument can be left null if the caller is sure that the // string is properly terminated with an addressable character that // can't be in a valid floating-point character. - bool ParseNumber(const char *&, bool &inexact, const char *end); + RT_API_ATTRS bool ParseNumber(const char *&, bool &inexact, const char *end); using Raw = typename Real::RawType; - constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); } - constexpr Raw Infinity() const { + constexpr RT_API_ATTRS Raw SignBit() const { + return Raw{isNegative_} << (Real::bits - 1); + } + constexpr RT_API_ATTRS Raw Infinity() const { Raw result{static_cast(Real::maxExponent)}; result <<= Real::significandBits; result |= SignBit(); @@ -356,7 +363,7 @@ template class BigRadixFloatingPointNumber { } return result; } - constexpr Raw NaN(bool isQuiet = true) { + constexpr RT_API_ATTRS Raw NaN(bool isQuiet = true) { Raw result{Real::maxExponent}; result <<= Real::significandBits; result |= SignBit(); @@ -369,7 +376,7 @@ template class BigRadixFloatingPointNumber { } return result; } - constexpr Raw HUGE() const { + constexpr RT_API_ATTRS Raw HUGE() const { Raw result{static_cast(Real::maxExponent)}; result <<= Real::significandBits; result |= SignBit(); diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp index 55fc548a6979b..b64865e95df24 100644 --- a/flang/lib/Decimal/binary-to-decimal.cpp +++ b/flang/lib/Decimal/binary-to-decimal.cpp @@ -336,6 +336,8 @@ template ConversionToDecimalResult ConvertToDecimal<113>(char *, std::size_t, BinaryFloatingPointNumber<113>); extern "C" { +RT_EXT_API_GROUP_BEGIN + ConversionToDecimalResult ConvertFloatToDecimal(char *buffer, std::size_t size, enum DecimalConversionFlags flags, int digits, enum FortranRounding rounding, float x) { @@ -365,7 +367,9 @@ ConversionToDecimalResult ConvertLongDoubleToDecimal(char *buffer, rounding, Fortran::decimal::BinaryFloatingPointNumber<113>(x)); } #endif -} + +RT_EXT_API_GROUP_END +} // extern "C" template template diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp index c5cdb72e355f6..94c5177423739 100644 --- a/flang/lib/Decimal/decimal-to-binary.cpp +++ b/flang/lib/Decimal/decimal-to-binary.cpp @@ -11,11 +11,15 @@ #include "flang/Common/leading-zero-bit-count.h" #include "flang/Decimal/binary-floating-point.h" #include "flang/Decimal/decimal.h" +#include "flang/Runtime/freestanding-tools.h" #include #include -#include #include +// Some environments, viz. glibc 2.17 and *BSD, allow the macro HUGE +// to leak out of . +#undef HUGE + namespace Fortran::decimal { template @@ -191,12 +195,12 @@ template class IntermediateFloat { static constexpr IntType topBit{IntType{1} << (precision - 1)}; static constexpr IntType mask{topBit + (topBit - 1)}; - IntermediateFloat() {} + RT_API_ATTRS IntermediateFloat() {} IntermediateFloat(const IntermediateFloat &) = default; // Assumes that exponent_ is valid on entry, and may increment it. // Returns the number of guard_ bits that have been determined. - template bool SetTo(UINT n) { + template RT_API_ATTRS bool SetTo(UINT n) { static constexpr int nBits{CHAR_BIT * sizeof n}; if constexpr (precision >= nBits) { value_ = n; @@ -218,14 +222,14 @@ template class IntermediateFloat { } } - void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } - bool IsFull() const { return value_ >= topBit; } - void AdjustExponent(int by) { exponent_ += by; } - void SetGuard(int g) { + RT_API_ATTRS void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } + RT_API_ATTRS bool IsFull() const { return value_ >= topBit; } + RT_API_ATTRS void AdjustExponent(int by) { exponent_ += by; } + RT_API_ATTRS void SetGuard(int g) { guard_ |= (static_cast(g & 6) << (guardBits - 3)) | (g & 1); } - ConversionToBinaryResult ToBinary( + RT_API_ATTRS ConversionToBinaryResult ToBinary( bool isNegative, FortranRounding) const; private: @@ -241,7 +245,7 @@ template class IntermediateFloat { // The standard says that these overflow cases round to "representable" // numbers, and some popular compilers interpret that to mean +/-HUGE() // rather than +/-Inf. -static inline constexpr bool RoundOverflowToHuge( +static inline RT_API_ATTRS constexpr bool RoundOverflowToHuge( enum FortranRounding rounding, bool isNegative) { return rounding == RoundToZero || (!isNegative && rounding == RoundDown) || (isNegative && rounding == RoundUp); @@ -468,8 +472,8 @@ BigRadixFloatingPointNumber::ConvertToBinary( ++q; } } - if ((!limit || limit >= q + 3) && toupper(q[0]) == 'N' && - toupper(q[1]) == 'A' && toupper(q[2]) == 'N') { + if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'N' && + runtime::toupper(q[1]) == 'A' && runtime::toupper(q[2]) == 'N') { // NaN p = q + 3; bool isQuiet{true}; @@ -493,11 +497,11 @@ BigRadixFloatingPointNumber::ConvertToBinary( } return {Real{NaN(isQuiet)}}; } else { // Inf? - if ((!limit || limit >= q + 3) && toupper(q[0]) == 'I' && - toupper(q[1]) == 'N' && toupper(q[2]) == 'F') { - if ((!limit || limit >= q + 8) && toupper(q[3]) == 'I' && - toupper(q[4]) == 'N' && toupper(q[5]) == 'I' && - toupper(q[6]) == 'T' && toupper(q[7]) == 'Y') { + if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'I' && + runtime::toupper(q[1]) == 'N' && runtime::toupper(q[2]) == 'F') { + if ((!limit || limit >= q + 8) && runtime::toupper(q[3]) == 'I' && + runtime::toupper(q[4]) == 'N' && runtime::toupper(q[5]) == 'I' && + runtime::toupper(q[6]) == 'T' && runtime::toupper(q[7]) == 'Y') { p = q + 8; } else { p = q + 3; @@ -531,6 +535,8 @@ template ConversionToBinaryResult<113> ConvertToBinary<113>( const char *&, enum FortranRounding, const char *end); extern "C" { +RT_EXT_API_GROUP_BEGIN + enum ConversionResultFlags ConvertDecimalToFloat( const char **p, float *f, enum FortranRounding rounding) { auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; @@ -552,5 +558,7 @@ enum ConversionResultFlags ConvertDecimalToLongDouble( reinterpret_cast(&result.binary), sizeof *ld); return result.flags; } -} + +RT_EXT_API_GROUP_END +} // extern "C" } // namespace Fortran::decimal diff --git a/flang/lib/Evaluate/check-expression.cpp b/flang/lib/Evaluate/check-expression.cpp index 7d721399072ca..0e14aa0957294 100644 --- a/flang/lib/Evaluate/check-expression.cpp +++ b/flang/lib/Evaluate/check-expression.cpp @@ -478,6 +478,14 @@ std::optional> NonPointerInitializationExpr(const Symbol &symbol, return {std::move(folded)}; } } else if (IsNamedConstant(symbol)) { + if (symbol.name() == "numeric_storage_size" && + symbol.owner().IsModule() && + DEREF(symbol.owner().symbol()).name() == "iso_fortran_env") { + // Very special case: numeric_storage_size is not folded until + // it read from the iso_fortran_env module file, as its value + // depends on compilation options. + return {std::move(folded)}; + } context.messages().Say( "Value of named constant '%s' (%s) cannot be computed as a constant value"_err_en_US, symbol.name(), folded.AsFortran()); diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h index 9dd8c3843465d..34f79f9e6f25b 100644 --- a/flang/lib/Evaluate/fold-implementation.h +++ b/flang/lib/Evaluate/fold-implementation.h @@ -39,7 +39,7 @@ #include #include -// Some environments, viz. glibc 2.17, allow the macro HUGE +// Some environments, viz. glibc 2.17 and *BSD, allow the macro HUGE // to leak out of . #undef HUGE @@ -1969,7 +1969,7 @@ Expr FoldOperation(FoldingContext &context, Divide &&x) { // NaN, and Inf respectively. bool isCanonicalNaNOrInf{false}; if constexpr (T::category == TypeCategory::Real) { - if (folded->second.IsZero() && context.inModuleFile()) { + if (folded->second.IsZero() && context.moduleFileName().has_value()) { using IntType = typename T::Scalar::Word; auto intNumerator{folded->first.template ToInteger()}; isCanonicalNaNOrInf = intNumerator.flags == RealFlags{} && diff --git a/flang/lib/Evaluate/fold-integer.cpp b/flang/lib/Evaluate/fold-integer.cpp index 25ae4831ab208..0a6ff12049f30 100644 --- a/flang/lib/Evaluate/fold-integer.cpp +++ b/flang/lib/Evaluate/fold-integer.cpp @@ -1302,6 +1302,24 @@ Expr> FoldIntrinsicFunction( return FoldSum(context, std::move(funcRef)); } else if (name == "ubound") { return UBOUND(context, std::move(funcRef)); + } else if (name == "__builtin_numeric_storage_size") { + if (!context.moduleFileName()) { + // Don't fold this reference until it appears in the module file + // for ISO_FORTRAN_ENV -- the value depends on the compiler options + // that might be in force. + } else { + auto intBytes{ + context.targetCharacteristics().GetByteSize(TypeCategory::Integer, + context.defaults().GetDefaultKind(TypeCategory::Integer))}; + auto realBytes{ + context.targetCharacteristics().GetByteSize(TypeCategory::Real, + context.defaults().GetDefaultKind(TypeCategory::Real))}; + if (intBytes != realBytes) { + context.messages().Say(*context.moduleFileName(), + "NUMERIC_STORAGE_SIZE from ISO_FORTRAN_ENV is not well-defined when default INTEGER and REAL are not consistent due to compiler options"_warn_en_US); + } + return Expr{8 * std::min(intBytes, realBytes)}; + } } return Expr{std::move(funcRef)}; } diff --git a/flang/lib/Evaluate/fold-reduction.h b/flang/lib/Evaluate/fold-reduction.h index 1ee957c0faebd..c84d35734ab5a 100644 --- a/flang/lib/Evaluate/fold-reduction.h +++ b/flang/lib/Evaluate/fold-reduction.h @@ -182,25 +182,27 @@ static Constant DoReduction(const Constant &array, ConstantSubscript &maskDimAt{maskAt[*dim - 1]}; ConstantSubscript maskDimLbound{maskDimAt}; for (auto n{GetSize(resultShape)}; n-- > 0; - IncrementSubscripts(at, array.shape()), - IncrementSubscripts(maskAt, mask.shape())) { - dimAt = dimLbound; - maskDimAt = maskDimLbound; + array.IncrementSubscripts(at), mask.IncrementSubscripts(maskAt)) { elements.push_back(identity); - bool firstUnmasked{true}; - for (ConstantSubscript j{0}; j < dimExtent; ++j, ++dimAt, ++maskDimAt) { - if (mask.At(maskAt).IsTrue()) { - accumulator(elements.back(), at, firstUnmasked); - firstUnmasked = false; + if (dimExtent > 0) { + dimAt = dimLbound; + maskDimAt = maskDimLbound; + bool firstUnmasked{true}; + for (ConstantSubscript j{0}; j < dimExtent; ++j, ++dimAt, ++maskDimAt) { + if (mask.At(maskAt).IsTrue()) { + accumulator(elements.back(), at, firstUnmasked); + firstUnmasked = false; + } } + --dimAt, --maskDimAt; } accumulator.Done(elements.back()); } } else { // no DIM=, result is scalar elements.push_back(identity); bool firstUnmasked{true}; - for (auto n{array.size()}; n-- > 0; IncrementSubscripts(at, array.shape()), - IncrementSubscripts(maskAt, mask.shape())) { + for (auto n{array.size()}; n-- > 0; + array.IncrementSubscripts(at), mask.IncrementSubscripts(maskAt)) { if (mask.At(maskAt).IsTrue()) { accumulator(elements.back(), at, firstUnmasked); firstUnmasked = false; diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 9b98d22cc58e5..7226d69f6391c 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -903,6 +903,8 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"back", AnyLogical, Rank::elemental, Optionality::optional}, DefaultingKIND}, KINDInt}, + {"__builtin_compiler_options", {}, DefaultChar}, + {"__builtin_compiler_version", {}, DefaultChar}, {"__builtin_fma", {{"f1", SameReal}, {"f2", SameReal}, {"f3", SameReal}}, SameReal}, {"__builtin_ieee_is_nan", {{"a", AnyFloating}}, DefaultLogical}, @@ -941,8 +943,7 @@ static const IntrinsicInterface genericIntrinsicFunction[]{ {"__builtin_ieee_support_underflow_control", {{"x", AnyReal, Rank::elemental, Optionality::optional}}, DefaultLogical}, - {"__builtin_compiler_options", {}, DefaultChar}, - {"__builtin_compiler_version", {}, DefaultChar}, + {"__builtin_numeric_storage_size", {}, DefaultInt}, }; // TODO: Coarray intrinsic functions diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 3557ea93e1384..42e78fc96e444 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -379,6 +379,9 @@ class AllocateStmtHelper { } void lowerAllocation(const Allocation &alloc) { + if (Fortran::semantics::HasCUDAAttr(alloc.getSymbol())) + TODO(loc, "Allocation of variable with CUDA attributes"); + fir::MutableBoxValue boxAddr = genMutableBoxValue(converter, loc, alloc.getAllocObj()); @@ -588,13 +591,15 @@ class AllocateStmtHelper { TODO(loc, "coarray: allocation of a coarray object"); // Set length of the allocate object if it has. Otherwise, get the length // from source for the deferred length parameter. - if (lenParams.empty() && box.isCharacter() && - !box.hasNonDeferredLenParams()) + const bool isDeferredLengthCharacter = + box.isCharacter() && !box.hasNonDeferredLenParams(); + if (lenParams.empty() && isDeferredLengthCharacter) lenParams.push_back(fir::factory::readCharLen(builder, loc, exv)); if (!isSource || alloc.type.IsPolymorphic()) genRuntimeAllocateApplyMold(builder, loc, box, exv, alloc.getSymbol().Rank()); - genSetDeferredLengthParameters(alloc, box); + if (isDeferredLengthCharacter) + genSetDeferredLengthParameters(alloc, box); genAllocateObjectBounds(alloc, box); mlir::Value stat; if (isSource) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 5bba0978617c7..47bd6ace4e4b5 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3710,16 +3710,18 @@ class FirConverter : public Fortran::lower::AbstractConverter { return false; } - static void genCUDADataTransfer(fir::FirOpBuilder &builder, - mlir::Location loc, bool lhsIsDevice, - hlfir::Entity &lhs, bool rhsIsDevice, - hlfir::Entity &rhs) { + void genCUDADataTransfer(fir::FirOpBuilder &builder, mlir::Location loc, + const Fortran::evaluate::Assignment &assign, + hlfir::Entity &lhs, hlfir::Entity &rhs) { + bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.lhs); + bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.rhs); if (rhs.isBoxAddressOrValue() || lhs.isBoxAddressOrValue()) TODO(loc, "CUDA data transfler with descriptors"); + + // device = host if (lhsIsDevice && !rhsIsDevice) { auto transferKindAttr = fir::CUDADataTransferKindAttr::get( builder.getContext(), fir::CUDADataTransferKind::HostDevice); - // device = host if (!rhs.isVariable()) { auto associate = hlfir::genAssociateExpr( loc, builder, rhs, rhs.getType(), ".cuf_host_tmp"); @@ -3732,7 +3734,73 @@ class FirConverter : public Fortran::lower::AbstractConverter { } return; } - TODO(loc, "Assignement with CUDA Fortran variables"); + + // host = device + if (!lhsIsDevice && rhsIsDevice) { + auto transferKindAttr = fir::CUDADataTransferKindAttr::get( + builder.getContext(), fir::CUDADataTransferKind::DeviceHost); + if (!rhs.isVariable()) { + // evaluateRhs loads scalar. Look for the memory reference to be used in + // the transfer. + if (mlir::isa_and_nonnull(rhs.getDefiningOp())) { + auto loadOp = mlir::dyn_cast(rhs.getDefiningOp()); + builder.create(loc, loadOp.getMemref(), lhs, + transferKindAttr); + return; + } + } else { + builder.create(loc, rhs, lhs, + transferKindAttr); + } + return; + } + + if (lhsIsDevice && rhsIsDevice) { + assert(rhs.isVariable() && "CUDA Fortran assignment rhs is not legal"); + auto transferKindAttr = fir::CUDADataTransferKindAttr::get( + builder.getContext(), fir::CUDADataTransferKind::DeviceDevice); + builder.create(loc, rhs, lhs, transferKindAttr); + return; + } + llvm_unreachable("Unhandled CUDA data transfer"); + } + + llvm::SmallVector + genCUDAImplicitDataTransfer(fir::FirOpBuilder &builder, mlir::Location loc, + const Fortran::evaluate::Assignment &assign) { + llvm::SmallVector temps; + localSymbols.pushScope(); + auto transferKindAttr = fir::CUDADataTransferKindAttr::get( + builder.getContext(), fir::CUDADataTransferKind::DeviceHost); + [[maybe_unused]] unsigned nbDeviceResidentObject = 0; + for (const Fortran::semantics::Symbol &sym : + Fortran::evaluate::CollectSymbols(assign.rhs)) { + if (const auto *details = + sym.GetUltimate() + .detailsIf()) { + if (details->cudaDataAttr()) { + if (sym.owner().IsDerivedType() && IsAllocatable(sym.GetUltimate())) + TODO(loc, "Device resident allocatable derived-type component"); + // TODO: This should probably being checked in semantic and give a + // proper error. + assert( + nbDeviceResidentObject <= 1 && + "Only one reference to the device resident object is supported"); + auto addr = getSymbolAddress(sym); + hlfir::Entity entity{addr}; + auto [temp, cleanup] = + hlfir::createTempFromMold(loc, builder, entity); + auto needCleanup = fir::getIntIfConstant(cleanup); + if (needCleanup && *needCleanup) + temps.push_back(temp); + addSymbol(sym, temp, /*forced=*/true); + builder.create(loc, addr, temp, + transferKindAttr); + ++nbDeviceResidentObject; + } + } + } + return temps; } void genDataAssignment( @@ -3741,8 +3809,13 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::Location loc = getCurrentLocation(); fir::FirOpBuilder &builder = getFirOpBuilder(); - bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.lhs); - bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.rhs); + bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) || + Fortran::evaluate::HasCUDAAttrs(assign.rhs); + bool hasCUDAImplicitTransfer = + Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs); + llvm::SmallVector implicitTemps; + if (hasCUDAImplicitTransfer) + implicitTemps = genCUDAImplicitDataTransfer(builder, loc, assign); // Gather some information about the assignment that will impact how it is // lowered. @@ -3800,12 +3873,16 @@ class FirConverter : public Fortran::lower::AbstractConverter { Fortran::lower::StatementContext localStmtCtx; hlfir::Entity rhs = evaluateRhs(localStmtCtx); hlfir::Entity lhs = evaluateLhs(localStmtCtx); - if (lhsIsDevice || rhsIsDevice) { - genCUDADataTransfer(builder, loc, lhsIsDevice, lhs, rhsIsDevice, rhs); - } else { + if (isCUDATransfer && !hasCUDAImplicitTransfer) + genCUDADataTransfer(builder, loc, assign, lhs, rhs); + else builder.create(loc, rhs, lhs, isWholeAllocatableAssignment, keepLhsLengthInAllocatableAssignment); + if (hasCUDAImplicitTransfer) { + localSymbols.popScope(); + for (mlir::Value temp : implicitTemps) + builder.create(loc, temp); } return; } diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index f59c784cff6f9..01473f7f6cd72 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -1469,7 +1469,7 @@ static void lowerExplicitLowerBounds( /// CFI_desc_t requirements in 18.5.3 point 5.). static mlir::Value getAssumedSizeExtent(mlir::Location loc, fir::FirOpBuilder &builder) { - return builder.createIntegerConstant(loc, builder.getIndexType(), -1); + return builder.createMinusOneInteger(loc, builder.getIndexType()); } /// Lower explicit extents into \p result if this is an explicit-shape or diff --git a/flang/lib/Lower/DirectivesCommon.h b/flang/lib/Lower/DirectivesCommon.h index 6daa72b84d90d..3ebf3fd965da1 100644 --- a/flang/lib/Lower/DirectivesCommon.h +++ b/flang/lib/Lower/DirectivesCommon.h @@ -744,7 +744,7 @@ genBoundsOpsFromBox(fir::FirOpBuilder &builder, mlir::Location loc, // Box is not present. Populate bound values with default values. llvm::SmallVector boundValues; mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); - mlir::Value mOne = builder.createIntegerConstant(loc, idxTy, -1); + mlir::Value mOne = builder.createMinusOneInteger(loc, idxTy); for (unsigned dim = 0; dim < dataExv.rank(); ++dim) { boundValues.push_back(zero); // lb boundValues.push_back(mOne); // ub diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index c1c94119fd908..0453c01522779 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -13,7 +13,9 @@ #include "ReductionProcessor.h" #include "flang/Lower/AbstractConverter.h" +#include "flang/Lower/ConvertType.h" #include "flang/Lower/SymbolMap.h" +#include "flang/Optimizer/Builder/Complex.h" #include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRType.h" @@ -131,7 +133,7 @@ ReductionProcessor::getReductionInitValue(mlir::Location loc, mlir::Type type, fir::FirOpBuilder &builder) { type = fir::unwrapRefType(type); if (!fir::isa_integer(type) && !fir::isa_real(type) && - !mlir::isa(type)) + !fir::isa_complex(type) && !mlir::isa(type)) TODO(loc, "Reduction of some types is not supported"); switch (redId) { case ReductionIdentifier::MAX: { @@ -175,6 +177,16 @@ ReductionProcessor::getReductionInitValue(mlir::Location loc, mlir::Type type, case ReductionIdentifier::OR: case ReductionIdentifier::EQV: case ReductionIdentifier::NEQV: + if (auto cplxTy = mlir::dyn_cast(type)) { + mlir::Type realTy = + Fortran::lower::convertReal(builder.getContext(), cplxTy.getFKind()); + mlir::Value initRe = builder.createRealConstant( + loc, realTy, getOperationIdentity(redId, loc)); + mlir::Value initIm = builder.createRealConstant(loc, realTy, 0); + + return fir::factory::Complex{builder, loc}.createComplex(type, initRe, + initIm); + } if (type.isa()) return builder.create( loc, type, @@ -229,13 +241,13 @@ mlir::Value ReductionProcessor::createScalarCombiner( break; case ReductionIdentifier::ADD: reductionOp = - getReductionOperation( - builder, type, loc, op1, op2); + getReductionOperation(builder, type, loc, op1, op2); break; case ReductionIdentifier::MULTIPLY: reductionOp = - getReductionOperation( - builder, type, loc, op1, op2); + getReductionOperation(builder, type, loc, op1, op2); break; case ReductionIdentifier::AND: { mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.h b/flang/lib/Lower/OpenMP/ReductionProcessor.h index ee2732547fc28..8b116a4c52041 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.h +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.h @@ -97,6 +97,10 @@ class ReductionProcessor { fir::FirOpBuilder &builder); template + static mlir::Value getReductionOperation(fir::FirOpBuilder &builder, + mlir::Type type, mlir::Location loc, + mlir::Value op1, mlir::Value op2); + template static mlir::Value getReductionOperation(fir::FirOpBuilder &builder, mlir::Type type, mlir::Location loc, mlir::Value op1, mlir::Value op2); @@ -136,12 +140,26 @@ ReductionProcessor::getReductionOperation(fir::FirOpBuilder &builder, mlir::Value op1, mlir::Value op2) { type = fir::unwrapRefType(type); assert(type.isIntOrIndexOrFloat() && - "only integer and float types are currently supported"); + "only integer, float and complex types are currently supported"); if (type.isIntOrIndex()) return builder.create(loc, op1, op2); return builder.create(loc, op1, op2); } +template +mlir::Value +ReductionProcessor::getReductionOperation(fir::FirOpBuilder &builder, + mlir::Type type, mlir::Location loc, + mlir::Value op1, mlir::Value op2) { + assert((type.isIntOrIndexOrFloat() || fir::isa_complex(type)) && + "only integer, float and complex types are currently supported"); + if (type.isIntOrIndex()) + return builder.create(loc, op1, op2); + if (fir::isa_real(type)) + return builder.create(loc, op1, op2); + return builder.create(loc, op1, op2); +} + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Lower/Runtime.cpp b/flang/lib/Lower/Runtime.cpp index e7695929623f6..3474832bdb225 100644 --- a/flang/lib/Lower/Runtime.cpp +++ b/flang/lib/Lower/Runtime.cpp @@ -55,6 +55,8 @@ static void genUnreachable(fir::FirOpBuilder &builder, mlir::Location loc) { void Fortran::lower::genStopStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::StopStmt &stmt) { + const bool isError = std::get(stmt.t) == + Fortran::parser::StopStmt::Kind::ErrorStop; fir::FirOpBuilder &builder = converter.getFirOpBuilder(); mlir::Location loc = converter.getCurrentLocation(); Fortran::lower::StatementContext stmtCtx; @@ -94,13 +96,12 @@ void Fortran::lower::genStopStatement( } else { callee = fir::runtime::getRuntimeFunc(loc, builder); calleeType = callee.getFunctionType(); - operands.push_back( - builder.createIntegerConstant(loc, calleeType.getInput(0), 0)); + // Default to values are advised in F'2023 11.4 p2. + operands.push_back(builder.createIntegerConstant( + loc, calleeType.getInput(0), isError ? 1 : 0)); } // Second operand indicates ERROR STOP - bool isError = std::get(stmt.t) == - Fortran::parser::StopStmt::Kind::ErrorStop; operands.push_back(builder.createIntegerConstant( loc, calleeType.getInput(operands.size()), isError)); diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index e4362b2f9e694..b09da4929a8a2 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -128,9 +128,21 @@ mlir::Value fir::FirOpBuilder::createNullConstant(mlir::Location loc, mlir::Value fir::FirOpBuilder::createIntegerConstant(mlir::Location loc, mlir::Type ty, std::int64_t cst) { + assert((cst >= 0 || mlir::isa(ty) || + mlir::cast(ty).getWidth() <= 64) && + "must use APint"); return create(loc, ty, getIntegerAttr(ty, cst)); } +mlir::Value fir::FirOpBuilder::createAllOnesInteger(mlir::Location loc, + mlir::Type ty) { + if (mlir::isa(ty)) + return createIntegerConstant(loc, ty, -1); + llvm::APInt allOnes = + llvm::APInt::getAllOnes(mlir::cast(ty).getWidth()); + return create(loc, ty, getIntegerAttr(ty, allOnes)); +} + mlir::Value fir::FirOpBuilder::createRealConstant(mlir::Location loc, mlir::Type fltTy, llvm::APFloat::integerPart val) { diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 5f6de9439b4bc..4ee7258004fa7 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -3621,7 +3621,7 @@ mlir::Value IntrinsicLibrary::genIbclr(mlir::Type resultType, assert(args.size() == 2); mlir::Value pos = builder.createConvert(loc, resultType, args[1]); mlir::Value one = builder.createIntegerConstant(loc, resultType, 1); - mlir::Value ones = builder.createIntegerConstant(loc, resultType, -1); + mlir::Value ones = builder.createAllOnesInteger(loc, resultType); auto mask = builder.create(loc, one, pos); auto res = builder.create(loc, ones, mask); return builder.create(loc, args[0], res); @@ -3645,7 +3645,7 @@ mlir::Value IntrinsicLibrary::genIbits(mlir::Type resultType, loc, resultType, resultType.cast().getWidth()); auto shiftCount = builder.create(loc, bitSize, len); mlir::Value zero = builder.createIntegerConstant(loc, resultType, 0); - mlir::Value ones = builder.createIntegerConstant(loc, resultType, -1); + mlir::Value ones = builder.createAllOnesInteger(loc, resultType); auto mask = builder.create(loc, ones, shiftCount); auto res1 = builder.create(loc, args[0], pos); auto res2 = builder.create(loc, res1, mask); @@ -3805,7 +3805,8 @@ mlir::Value IntrinsicLibrary::genIeeeClass(mlir::Type resultType, assert(args.size() == 1); mlir::Value realVal = args[0]; mlir::FloatType realType = realVal.getType().dyn_cast(); - mlir::Type intType = builder.getIntegerType(realType.getWidth()); + const unsigned intWidth = realType.getWidth(); + mlir::Type intType = builder.getIntegerType(intWidth); mlir::Value intVal = builder.create(loc, intType, realVal); llvm::StringRef tableName = RTNAME_STRING(IeeeClassTable); @@ -3816,41 +3817,25 @@ mlir::Value IntrinsicLibrary::genIeeeClass(mlir::Type resultType, auto createIntegerConstant = [&](uint64_t k) { return builder.createIntegerConstant(loc, intType, k); }; + auto createIntegerConstantAPI = [&](const llvm::APInt &apInt) { + return builder.create( + loc, intType, builder.getIntegerAttr(intType, apInt)); + }; auto getMasksAndShifts = [&](uint64_t totalSize, uint64_t exponentSize, uint64_t significandSize, bool hasExplicitBit = false) { assert(1 + exponentSize + significandSize == totalSize && "invalid floating point fields"); - constexpr uint64_t one = 1; // type promotion uint64_t lowSignificandSize = significandSize - hasExplicitBit - 1; signShift = createIntegerConstant(totalSize - 1 - hasExplicitBit - 4); highSignificandShift = createIntegerConstant(lowSignificandSize); - if (totalSize <= 64) { - exponentMask = - createIntegerConstant(((one << exponentSize) - 1) << significandSize); - lowSignificandMask = - createIntegerConstant((one << lowSignificandSize) - 1); - return; - } - // Mlir can't directly build large constants. Build them in steps. - // The folded end result is the same. - mlir::Value sixtyfour = createIntegerConstant(64); - exponentMask = createIntegerConstant(((one << exponentSize) - 1) - << (significandSize - 64)); - exponentMask = - builder.create(loc, exponentMask, sixtyfour); - if (lowSignificandSize <= 64) { - lowSignificandMask = - createIntegerConstant((one << lowSignificandSize) - 1); - return; - } - mlir::Value ones = createIntegerConstant(0xffffffffffffffff); - lowSignificandMask = - createIntegerConstant((one << (lowSignificandSize - 64)) - 1); - lowSignificandMask = - builder.create(loc, lowSignificandMask, sixtyfour); - lowSignificandMask = - builder.create(loc, lowSignificandMask, ones); + llvm::APInt exponentMaskAPI = + llvm::APInt::getBitsSet(intWidth, /*lo=*/significandSize, + /*hi=*/significandSize + exponentSize); + exponentMask = createIntegerConstantAPI(exponentMaskAPI); + llvm::APInt lowSignificandMaskAPI = + llvm::APInt::getLowBitsSet(intWidth, lowSignificandSize); + lowSignificandMask = createIntegerConstantAPI(lowSignificandMaskAPI); }; switch (realType.getWidth()) { case 16: @@ -4318,7 +4303,7 @@ mlir::Value IntrinsicLibrary::genIeeeLogb(mlir::Type resultType, // X is zero -- result is -infinity builder.setInsertionPointToStart(&outerIfOp.getThenRegion().front()); genRaiseExcept(_FORTRAN_RUNTIME_IEEE_DIVIDE_BY_ZERO); - mlir::Value ones = builder.createIntegerConstant(loc, intType, -1); + mlir::Value ones = builder.createAllOnesInteger(loc, intType); mlir::Value result = builder.create( loc, ones, builder.createIntegerConstant(loc, intType, @@ -4937,7 +4922,7 @@ mlir::Value IntrinsicLibrary::genIshftc(mlir::Type resultType, mlir::Value size = args[2] ? builder.createConvert(loc, resultType, args[2]) : bitSize; mlir::Value zero = builder.createIntegerConstant(loc, resultType, 0); - mlir::Value ones = builder.createIntegerConstant(loc, resultType, -1); + mlir::Value ones = builder.createAllOnesInteger(loc, resultType); mlir::Value absShift = genAbs(resultType, {shift}); auto elseSize = builder.create(loc, size, absShift); auto shiftIsZero = builder.create( @@ -5073,7 +5058,7 @@ mlir::Value IntrinsicLibrary::genMask(mlir::Type resultType, assert(args.size() == 2); mlir::Value zero = builder.createIntegerConstant(loc, resultType, 0); - mlir::Value ones = builder.createIntegerConstant(loc, resultType, -1); + mlir::Value ones = builder.createAllOnesInteger(loc, resultType); mlir::Value bitSize = builder.createIntegerConstant( loc, resultType, resultType.getIntOrFloatBitWidth()); mlir::Value bitsToSet = builder.createConvert(loc, resultType, args[0]); @@ -5206,7 +5191,7 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType, mlir::Value i = builder.createConvert(loc, resultType, args[0]); mlir::Value j = builder.createConvert(loc, resultType, args[1]); mlir::Value mask = builder.createConvert(loc, resultType, args[2]); - mlir::Value ones = builder.createIntegerConstant(loc, resultType, -1); + mlir::Value ones = builder.createAllOnesInteger(loc, resultType); // MERGE_BITS(I, J, MASK) = IOR(IAND(I, MASK), IAND(J, NOT(MASK))) mlir::Value notMask = builder.create(loc, mask, ones); @@ -5353,7 +5338,7 @@ void IntrinsicLibrary::genMvbits(llvm::ArrayRef args) { auto to = builder.create(loc, resultType, toAddr); mlir::Value topos = builder.createConvert(loc, resultType, unbox(args[4])); mlir::Value zero = builder.createIntegerConstant(loc, resultType, 0); - mlir::Value ones = builder.createIntegerConstant(loc, resultType, -1); + mlir::Value ones = builder.createAllOnesInteger(loc, resultType); mlir::Value bitSize = builder.createIntegerConstant( loc, resultType, resultType.cast().getWidth()); auto shiftCount = builder.create(loc, bitSize, len); @@ -5432,7 +5417,7 @@ IntrinsicLibrary::genNorm2(mlir::Type resultType, mlir::Value IntrinsicLibrary::genNot(mlir::Type resultType, llvm::ArrayRef args) { assert(args.size() == 1); - mlir::Value allOnes = builder.createIntegerConstant(loc, resultType, -1); + mlir::Value allOnes = builder.createAllOnesInteger(loc, resultType); return builder.create(loc, args[0], allOnes); } @@ -5875,7 +5860,7 @@ mlir::Value IntrinsicLibrary::genShiftA(mlir::Type resultType, // the shift amount is equal to the element size. // So if SHIFT is equal to the bit width then it is handled as a special case. mlir::Value zero = builder.createIntegerConstant(loc, resultType, 0); - mlir::Value minusOne = builder.createIntegerConstant(loc, resultType, -1); + mlir::Value minusOne = builder.createMinusOneInteger(loc, resultType); mlir::Value valueIsNeg = builder.create( loc, mlir::arith::CmpIPredicate::slt, args[0], zero); mlir::Value specialRes = diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp index e588b19dded4f..160118e2c050a 100644 --- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -2120,7 +2120,7 @@ PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType, if (isNativeVecElemOrderOnLE()) { auto i8Ty{mlir::IntegerType::get(context, 8)}; auto v8Ty{mlir::VectorType::get(16, i8Ty)}; - auto negOne{builder.createIntegerConstant(loc, i8Ty, -1)}; + auto negOne{builder.createMinusOneInteger(loc, i8Ty)}; auto vNegOne{ builder.create(loc, v8Ty, negOne)}; @@ -2209,7 +2209,7 @@ PPCIntrinsicLibrary::genVecSel(mlir::Type resultType, auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)}; auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)}; - auto negOne{builder.createIntegerConstant(loc, i8Ty, -1)}; + auto negOne{builder.createMinusOneInteger(loc, i8Ty)}; // construct a constant <16 x i8> vector with value -1 for bitcast auto bcVecTy{mlir::VectorType::get(16, i8Ty)}; diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 06ce84f1543a3..d909bda89cdeb 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -92,27 +92,10 @@ static mlir::Block *createBlock(mlir::ConversionPatternRewriter &rewriter, mlir::Region::iterator(insertBefore)); } -/// Extract constant from a value if it is a result of one of the -/// ConstantOp operations, otherwise, return std::nullopt. -static std::optional getIfConstantIntValue(mlir::Value val) { - if (!val || !val.dyn_cast()) - return {}; - - mlir::Operation *defop = val.getDefiningOp(); - - if (auto constOp = mlir::dyn_cast(defop)) - return constOp.value(); - if (auto llConstOp = mlir::dyn_cast(defop)) - if (auto attr = llConstOp.getValue().dyn_cast()) - return attr.getValue().getSExtValue(); - - return {}; -} - /// Extract constant from a value that must be the result of one of the /// ConstantOp operations. static int64_t getConstantIntValue(mlir::Value val) { - if (auto constVal = getIfConstantIntValue(val)) + if (auto constVal = fir::getIntIfConstant(val)) return *constVal; fir::emitFatalError(val.getLoc(), "must be a constant"); } @@ -664,7 +647,7 @@ struct ConvertOpConversion : public fir::FIROpConversion { << " -> " << toTy; // Do folding for constant inputs. - if (auto constVal = getIfConstantIntValue(op0)) { + if (auto constVal = fir::getIntIfConstant(op0)) { mlir::Value normVal = genConstantIndex(loc, toTy, rewriter, *constVal ? 1 : 0); rewriter.replaceOp(convert, normVal); diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index dba2c30d1851b..8ab74103cb6a8 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -3834,6 +3834,18 @@ bool fir::anyFuncArgsHaveAttr(mlir::func::FuncOp func, llvm::StringRef attr) { return false; } +std::optional fir::getIntIfConstant(mlir::Value value) { + if (auto *definingOp = value.getDefiningOp()) { + if (auto cst = mlir::dyn_cast(definingOp)) + if (auto intAttr = cst.getValue().dyn_cast()) + return intAttr.getInt(); + if (auto llConstOp = mlir::dyn_cast(definingOp)) + if (auto attr = llConstOp.getValue().dyn_cast()) + return attr.getValue().getSExtValue(); + } + return {}; +} + mlir::Type fir::applyPathToType(mlir::Type eleTy, mlir::ValueRange path) { for (auto i = path.begin(), end = path.end(); eleTy && i < end;) { eleTy = llvm::TypeSwitch(eleTy) diff --git a/flang/lib/Optimizer/Transforms/AddDebugFoundation.cpp b/flang/lib/Optimizer/Transforms/AddDebugFoundation.cpp index 7a6f58066722d..678fbf6a7d23b 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugFoundation.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugFoundation.cpp @@ -65,7 +65,7 @@ void AddDebugFoundationPass::runOnOperation() { mlir::LLVM::DIFileAttr fileAttr = getFileAttr(inputFilePath); mlir::StringAttr producer = mlir::StringAttr::get(context, "Flang"); mlir::LLVM::DICompileUnitAttr cuAttr = mlir::LLVM::DICompileUnitAttr::get( - context, mlir::DistinctAttr::create(mlir::UnitAttr::get(context)), + mlir::DistinctAttr::create(mlir::UnitAttr::get(context)), llvm::dwarf::getLanguage("DW_LANG_Fortran95"), fileAttr, producer, /*isOptimized=*/false, mlir::LLVM::DIEmissionKind::LineTablesOnly); diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index 21185694227d9..2bdb8e38db95d 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -437,7 +437,8 @@ TYPE_PARSER(construct(name, maybe("=" >> scalarIntConstantExpr))) TYPE_PARSER(recovery( withMessage("expected component definition"_err_en_US, first(construct(Parser{}), - construct(Parser{}))), + construct(Parser{}), + construct(indirect(compilerDirective)))), construct(inStmtErrorRecovery))) // R737 data-component-def-stmt -> @@ -702,13 +703,15 @@ TYPE_PARSER(construct(accessSpec) || extension( construct(Parser{}))) -// CUDA-data-attr -> CONSTANT | DEVICE | MANAGED | PINNED | SHARED | TEXTURE +// CUDA-data-attr -> +// CONSTANT | DEVICE | MANAGED | PINNED | SHARED | TEXTURE | UNIFIED TYPE_PARSER("CONSTANT" >> pure(common::CUDADataAttr::Constant) || "DEVICE" >> pure(common::CUDADataAttr::Device) || "MANAGED" >> pure(common::CUDADataAttr::Managed) || "PINNED" >> pure(common::CUDADataAttr::Pinned) || "SHARED" >> pure(common::CUDADataAttr::Shared) || - "TEXTURE" >> pure(common::CUDADataAttr::Texture)) + "TEXTURE" >> pure(common::CUDADataAttr::Texture) || + "UNIFIED" >> pure(common::CUDADataAttr::Unified)) // R804 object-name -> name constexpr auto objectName{name}; diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 51a16ee155fab..bd2f755855172 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -1466,6 +1466,29 @@ static void CheckImage_Index(evaluate::ActualArguments &arguments, } } +// Ensure that any optional argument that might be absent at run time +// does not require data conversion. +static void CheckMaxMin(const characteristics::Procedure &proc, + evaluate::ActualArguments &arguments, + parser::ContextualMessages &messages) { + if (proc.functionResult) { + if (const auto *typeAndShape{proc.functionResult->GetTypeAndShape()}) { + for (std::size_t j{2}; j < arguments.size(); ++j) { + if (arguments[j]) { + if (const auto *expr{arguments[j]->UnwrapExpr()}; + expr && evaluate::MayBePassedAsAbsentOptional(*expr)) { + if (auto thisType{expr->GetType()}; + thisType && *thisType != typeAndShape->type()) { + messages.Say(arguments[j]->sourceLocation(), + "An actual argument to MAX/MIN requiring data conversion may not be OPTIONAL, POINTER, or ALLOCATABLE"_err_en_US); + } + } + } + } + } + } +} + // MOVE_ALLOC (F'2023 16.9.147) static void CheckMove_Alloc(evaluate::ActualArguments &arguments, parser::ContextualMessages &messages) { @@ -1733,13 +1756,15 @@ static void CheckTransfer(evaluate::ActualArguments &arguments, } } -static void CheckSpecificIntrinsic(evaluate::ActualArguments &arguments, - SemanticsContext &context, const Scope *scope, - const evaluate::SpecificIntrinsic &intrinsic) { +static void CheckSpecificIntrinsic(const characteristics::Procedure &proc, + evaluate::ActualArguments &arguments, SemanticsContext &context, + const Scope *scope, const evaluate::SpecificIntrinsic &intrinsic) { if (intrinsic.name == "associated") { CheckAssociated(arguments, context, scope); } else if (intrinsic.name == "image_index") { CheckImage_Index(arguments, context.foldingContext().messages()); + } else if (intrinsic.name == "max" || intrinsic.name == "min") { + CheckMaxMin(proc, arguments, context.foldingContext().messages()); } else if (intrinsic.name == "move_alloc") { CheckMove_Alloc(arguments, context.foldingContext().messages()); } else if (intrinsic.name == "present") { @@ -1790,7 +1815,7 @@ static parser::Messages CheckExplicitInterface( CheckElementalConformance(messages, proc, actuals, foldingContext); } if (intrinsic) { - CheckSpecificIntrinsic(actuals, context, scope, *intrinsic); + CheckSpecificIntrinsic(proc, actuals, context, scope, *intrinsic); } return buffer; } diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp index c0c6ff4c1a2ba..e0a796972441b 100644 --- a/flang/lib/Semantics/check-cuda.cpp +++ b/flang/lib/Semantics/check-cuda.cpp @@ -9,12 +9,14 @@ #include "check-cuda.h" #include "flang/Common/template.h" #include "flang/Evaluate/fold.h" +#include "flang/Evaluate/tools.h" #include "flang/Evaluate/traverse.h" #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Parser/tools.h" #include "flang/Semantics/expression.h" #include "flang/Semantics/symbol.h" +#include "flang/Semantics/tools.h" // Once labeled DO constructs have been canonicalized and their parse subtrees // transformed into parser::DoConstructs, scan the parser::Blocks of the program @@ -275,9 +277,73 @@ template class DeviceContextChecker { }, ec.u); } + template + static const SEEK *GetIOControl(const A &stmt) { + for (const auto &spec : stmt.controls) { + if (const auto *result{std::get_if(&spec.u)}) { + return result; + } + } + return nullptr; + } + template static bool IsInternalIO(const A &stmt) { + if (stmt.iounit.has_value()) { + return std::holds_alternative(stmt.iounit->u); + } + if (auto *unit{GetIOControl(stmt)}) { + return std::holds_alternative(unit->u); + } + return false; + } + void WarnOnIoStmt(const parser::CharBlock &source) { + context_.Say( + source, "I/O statement might not be supported on device"_warn_en_US); + } + template + void WarnIfNotInternal(const A &stmt, const parser::CharBlock &source) { + if (!IsInternalIO(stmt)) { + WarnOnIoStmt(source); + } + } void Check(const parser::ActionStmt &stmt, const parser::CharBlock &source) { common::visit( common::visitors{ + [&](const common::Indirection &) {}, + [&](const common::Indirection &x) { + if (x.value().format) { // Formatted write to '*' or '6' + if (std::holds_alternative( + x.value().format->u)) { + if (x.value().iounit) { + if (std::holds_alternative( + x.value().iounit->u)) { + return; + } + } + } + } + WarnIfNotInternal(x.value(), source); + }, + [&](const common::Indirection &x) { + WarnOnIoStmt(source); + }, + [&](const common::Indirection &x) { + WarnOnIoStmt(source); + }, + [&](const common::Indirection &x) { + WarnOnIoStmt(source); + }, + [&](const common::Indirection &x) { + WarnIfNotInternal(x.value(), source); + }, + [&](const common::Indirection &x) { + WarnOnIoStmt(source); + }, + [&](const common::Indirection &x) { + WarnOnIoStmt(source); + }, + [&](const common::Indirection &x) { + WarnOnIoStmt(source); + }, [&](const auto &x) { if (auto msg{ActionStmtChecker::WhyNotOk(x)}) { context_.Say(source, std::move(*msg)); @@ -413,4 +479,18 @@ void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) { } } +void CUDAChecker::Enter(const parser::AssignmentStmt &x) { + const evaluate::Assignment *assign{semantics::GetAssignment(x)}; + int nbLhs{evaluate::GetNbOfCUDASymbols(assign->lhs)}; + int nbRhs{evaluate::GetNbOfCUDASymbols(assign->rhs)}; + auto lhsLoc{std::get(x.t).GetSource()}; + + // device to host transfer with more than one device object on the rhs is not + // legal. + if (nbLhs == 0 && nbRhs > 1) { + context_.Say(lhsLoc, + "More than one reference to a CUDA object on the right hand side of the assigment"_err_en_US); + } +} + } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/check-cuda.h b/flang/lib/Semantics/check-cuda.h index d863795f16a7c..aa0cb46360bef 100644 --- a/flang/lib/Semantics/check-cuda.h +++ b/flang/lib/Semantics/check-cuda.h @@ -17,6 +17,7 @@ struct Program; class Messages; struct Name; class CharBlock; +struct AssignmentStmt; struct ExecutionPartConstruct; struct ExecutableConstruct; struct ActionStmt; @@ -38,6 +39,7 @@ class CUDAChecker : public virtual BaseChecker { void Enter(const parser::FunctionSubprogram &); void Enter(const parser::SeparateModuleSubprogram &); void Enter(const parser::CUFKernelDoConstruct &); + void Enter(const parser::AssignmentStmt &); private: SemanticsContext &context_; diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index dec8fee774c5b..824f1b6053ca3 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -948,6 +948,11 @@ void CheckHelper::CheckObjectEntity( "Component '%s' with ATTRIBUTES(DEVICE) must also be allocatable"_err_en_US, symbol.name()); } + if (IsAssumedSizeArray(symbol)) { + messages_.Say( + "Object '%s' with ATTRIBUTES(DEVICE) may not be assumed size"_err_en_US, + symbol.name()); + } break; case common::CUDADataAttr::Managed: if (!IsAutomatic(symbol) && !IsAllocatable(symbol) && @@ -983,6 +988,13 @@ void CheckHelper::CheckObjectEntity( symbol.name()); } break; + case common::CUDADataAttr::Unified: + if ((!subpDetails || inDeviceSubprogram) && !isComponent) { + messages_.Say( + "Object '%s' with ATTRIBUTES(UNIFIED) must be declared in a host subprogram"_err_en_US, + symbol.name()); + } + break; case common::CUDADataAttr::Texture: messages_.Say( "ATTRIBUTES(TEXTURE) is obsolete and no longer supported"_err_en_US); diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 5d0d210fa3487..4a531c3c0f99f 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -1458,11 +1458,11 @@ Scope *ModFileReader::Read(SourceName name, std::optional isIntrinsic, parentScope = ancestor; } // Process declarations from the module file - bool wasInModuleFile{context_.foldingContext().inModuleFile()}; - context_.foldingContext().set_inModuleFile(true); + auto wasModuleFileName{context_.foldingContext().moduleFileName()}; + context_.foldingContext().set_moduleFileName(name); GetModuleDependences(context_.moduleDependences(), sourceFile->content()); ResolveNames(context_, parseTree, topScope); - context_.foldingContext().set_inModuleFile(wasInModuleFile); + context_.foldingContext().set_moduleFileName(wasModuleFileName); if (!moduleSymbol) { // Submodule symbols' storage are owned by their parents' scopes, // but their names are not in their parents' dictionaries -- we diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 2e88a2daff2c0..f0198cb792280 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -175,7 +175,9 @@ class BaseVisitor { } } - bool InModuleFile() const { return GetFoldingContext().inModuleFile(); } + bool InModuleFile() const { + return GetFoldingContext().moduleFileName().has_value(); + } // Make a placeholder symbol for a Name that otherwise wouldn't have one. // It is not in any scope and always has MiscDetails. @@ -1635,6 +1637,8 @@ class ResolveNamesVisitor : public virtual ScopeHandler, void FinishDerivedTypeInstantiation(Scope &); void ResolveExecutionParts(const ProgramTree &); void UseCUDABuiltinNames(); + void HandleDerivedTypesInImplicitStmts(const parser::ImplicitPart &, + const std::list &); }; // ImplicitRules implementation @@ -2035,6 +2039,7 @@ bool ImplicitRulesVisitor::Pre(const parser::ImplicitSpec &) { } void ImplicitRulesVisitor::Post(const parser::ImplicitSpec &) { + set_allowForwardReferenceToDerivedType(false); EndDeclTypeSpec(); } @@ -2250,7 +2255,7 @@ void ScopeHandler::SayWithDecl( const parser::Name &name, Symbol &symbol, MessageFixedText &&msg) { bool isFatal{msg.IsFatal()}; Say(name, std::move(msg), symbol.name()) - .Attach(Message{name.source, + .Attach(Message{symbol.name(), symbol.test(Symbol::Flag::Implicit) ? "Implicit declaration of '%s'"_en_US : "Declaration of '%s'"_en_US, @@ -7840,7 +7845,7 @@ const parser::Name *DeclarationVisitor::FindComponent( auto &symbol{base->symbol->GetUltimate()}; if (!symbol.has() && !ConvertToObjectEntity(symbol)) { SayWithDecl(*base, symbol, - "'%s' is an invalid base for a component reference"_err_en_US); + "'%s' is not an object and may not be used as the base of a component reference or type parameter inquiry"_err_en_US); return nullptr; } auto *type{symbol.GetType()}; @@ -8329,6 +8334,67 @@ static bool NeedsExplicitType(const Symbol &symbol) { } } +void ResolveNamesVisitor::HandleDerivedTypesInImplicitStmts( + const parser::ImplicitPart &implicitPart, + const std::list &decls) { + // Detect derived type definitions and create symbols for them now if + // they appear in IMPLICIT statements so that these forward-looking + // references will not be ambiguous with host associations. + std::set implicitDerivedTypes; + for (const auto &ipStmt : implicitPart.v) { + if (const auto *impl{std::get_if< + parser::Statement>>( + &ipStmt.u)}) { + if (const auto *specs{std::get_if>( + &impl->statement.value().u)}) { + for (const auto &spec : *specs) { + const auto &declTypeSpec{ + std::get(spec.t)}; + if (const auto *dtSpec{common::visit( + common::visitors{ + [](const parser::DeclarationTypeSpec::Type &x) { + return &x.derived; + }, + [](const parser::DeclarationTypeSpec::Class &x) { + return &x.derived; + }, + [](const auto &) -> const parser::DerivedTypeSpec * { + return nullptr; + }}, + declTypeSpec.u)}) { + implicitDerivedTypes.emplace( + std::get(dtSpec->t).source); + } + } + } + } + } + if (!implicitDerivedTypes.empty()) { + for (const auto &decl : decls) { + if (const auto *spec{ + std::get_if(&decl.u)}) { + if (const auto *dtDef{ + std::get_if>( + &spec->u)}) { + const parser::DerivedTypeStmt &dtStmt{ + std::get>( + dtDef->value().t) + .statement}; + const parser::Name &name{std::get(dtStmt.t)}; + if (implicitDerivedTypes.find(name.source) != + implicitDerivedTypes.end() && + !FindInScope(name)) { + DerivedTypeDetails details; + details.set_isForwardReferenced(true); + Resolve(name, MakeSymbol(name, std::move(details))); + implicitDerivedTypes.erase(name.source); + } + } + } + } + } +} + bool ResolveNamesVisitor::Pre(const parser::SpecificationPart &x) { const auto &[accDecls, ompDecls, compilerDirectives, useStmts, importStmts, implicitPart, decls] = x.t; @@ -8347,6 +8413,7 @@ bool ResolveNamesVisitor::Pre(const parser::SpecificationPart &x) { ClearUseOnly(); ClearModuleUses(); Walk(importStmts); + HandleDerivedTypesInImplicitStmts(implicitPart, decls); Walk(implicitPart); for (const auto &decl : decls) { if (const auto *spec{ @@ -8525,7 +8592,9 @@ void ResolveNamesVisitor::AnalyzeStmtFunctionStmt( Symbol *symbol{name.symbol}; auto *details{symbol ? symbol->detailsIf() : nullptr}; if (!details || !symbol->scope() || - &symbol->scope()->parent() != &currScope()) { + &symbol->scope()->parent() != &currScope() || details->isInterface() || + details->isDummy() || details->entryScope() || + details->moduleInterface() || symbol->test(Symbol::Flag::Subroutine)) { return; // error recovery } // Resolve the symbols on the RHS of the statement function. diff --git a/flang/module/iso_fortran_env.f90 b/flang/module/iso_fortran_env.f90 index 23e22e1f64de6..6ca98e518aeac 100644 --- a/flang/module/iso_fortran_env.f90 +++ b/flang/module/iso_fortran_env.f90 @@ -6,8 +6,7 @@ ! !===------------------------------------------------------------------------===! -! See Fortran 2018, clause 16.10.2 -! TODO: These are placeholder values so that some tests can be run. +! See Fortran 2023, subclause 16.10.2 include '../include/flang/Runtime/magic-numbers.h' @@ -24,27 +23,20 @@ module iso_fortran_env compiler_version => __builtin_compiler_version implicit none - - ! Set PRIVATE by default to explicitly only export what is meant - ! to be exported by this MODULE. private public :: event_type, notify_type, lock_type, team_type, & atomic_int_kind, atomic_logical_kind, compiler_options, & compiler_version - - ! TODO: Use PACK([x],test) in place of the array constructor idiom - ! [(x, integer::j=1,COUNT([test]))] below once PACK() can be folded. - integer, parameter :: & selectedASCII = selected_char_kind('ASCII'), & selectedUCS_2 = selected_char_kind('UCS-2'), & selectedUnicode = selected_char_kind('ISO_10646') integer, parameter, public :: character_kinds(*) = [ & - [(selectedASCII, integer :: j=1, count([selectedASCII >= 0]))], & - [(selectedUCS_2, integer :: j=1, count([selectedUCS_2 >= 0]))], & - [(selectedUnicode, integer :: j=1, count([selectedUnicode >= 0]))]] + pack([selectedASCII], selectedASCII >= 0), & + pack([selectedUCS_2], selectedUCS_2 >= 0), & + pack([selectedUnicode], selectedUnicode >= 0)] integer, parameter :: & selectedInt8 = selected_int_kind(2), & @@ -76,19 +68,18 @@ module iso_fortran_env integer, parameter, public :: integer_kinds(*) = [ & selected_int_kind(0), & - ((selected_int_kind(k), & - integer :: j=1, count([selected_int_kind(k) >= 0 .and. & - selected_int_kind(k) /= & - selected_int_kind(k-1)])), & - integer :: k=1, 39)] + [(pack([selected_int_kind(k)], & + selected_int_kind(k) >= 0 .and. & + selected_int_kind(k) /= selected_int_kind(k-1)), & + integer :: k=1, 39)]] integer, parameter, public :: & logical8 = int8, logical16 = int16, logical32 = int32, logical64 = int64 integer, parameter, public :: logical_kinds(*) = [ & - [(logical8, integer :: j=1, count([logical8 >= 0]))], & - [(logical16, integer :: j=1, count([logical16 >= 0]))], & - [(logical32, integer :: j=1, count([logical32 >= 0]))], & - [(logical64, integer :: j=1, count([logical64 >= 0]))]] + pack([logical8], logical8 >= 0), & + pack([logical16], logical16 >= 0), & + pack([logical32], logical32 >= 0), & + pack([logical64], logical64 >= 0)] integer, parameter :: & selectedReal16 = selected_real_kind(3, 4), & ! IEEE half @@ -129,35 +120,40 @@ module iso_fortran_env digits(real(0,kind=safeReal128)) == 113) integer, parameter, public :: real_kinds(*) = [ & - [(real16, integer :: j=1, count([real16 >= 0]))], & - [(bfloat16, integer :: j=1, count([bfloat16 >= 0]))], & - [(real32, integer :: j=1, count([real32 >= 0]))], & - [(real64, integer :: j=1, count([real64 >= 0]))], & - [(real80, integer :: j=1, count([real80 >= 0]))], & - [(real64x2, integer :: j=1, count([real64x2 >= 0]))], & - [(real128, integer :: j=1, count([real128 >= 0]))]] - - integer, parameter, public :: current_team = -1, initial_team = -2, parent_team = -3 - - integer, parameter, public :: output_unit = FORTRAN_DEFAULT_OUTPUT_UNIT - integer, parameter, public :: input_unit = FORTRAN_DEFAULT_INPUT_UNIT - integer, parameter, public :: error_unit = FORTRAN_ERROR_UNIT - integer, parameter, public :: iostat_end = FORTRAN_RUNTIME_IOSTAT_END - integer, parameter, public :: iostat_eor = FORTRAN_RUNTIME_IOSTAT_EOR - integer, parameter, public :: iostat_inquire_internal_unit = & - FORTRAN_RUNTIME_IOSTAT_INQUIRE_INTERNAL_UNIT + pack([real16], real16 >= 0), & + pack([bfloat16], bfloat16 >= 0), & + pack([real32], real32 >= 0), & + pack([real64], real64 >= 0), & + pack([real80], real80 >= 0), & + pack([real64x2], real64x2 >= 0), & + pack([real128], real128 >= 0)] + + integer, parameter, public :: current_team = -1, & + initial_team = -2, & + parent_team = -3 integer, parameter, public :: character_storage_size = 8 integer, parameter, public :: file_storage_size = 8 - integer, parameter, public :: numeric_storage_size = 32 - integer, parameter, public :: stat_failed_image = FORTRAN_RUNTIME_STAT_FAILED_IMAGE - integer, parameter, public :: stat_locked = FORTRAN_RUNTIME_STAT_LOCKED - integer, parameter, public :: & - stat_locked_other_image = FORTRAN_RUNTIME_STAT_LOCKED_OTHER_IMAGE - integer, parameter, public :: stat_stopped_image = FORTRAN_RUNTIME_STAT_STOPPED_IMAGE - integer, parameter, public :: stat_unlocked = FORTRAN_RUNTIME_STAT_UNLOCKED + intrinsic :: __builtin_numeric_storage_size + ! This value depends on any -fdefault-integer-N and -fdefault-real-N + ! compiler options that are active when the module file is read. + integer, parameter, public :: numeric_storage_size = & + __builtin_numeric_storage_size() + + ! From Runtime/magic-numbers.h: integer, parameter, public :: & + output_unit = FORTRAN_DEFAULT_OUTPUT_UNIT, & + input_unit = FORTRAN_DEFAULT_INPUT_UNIT, & + error_unit = FORTRAN_ERROR_UNIT, & + iostat_end = FORTRAN_RUNTIME_IOSTAT_END, & + iostat_eor = FORTRAN_RUNTIME_IOSTAT_EOR, & + iostat_inquire_internal_unit = FORTRAN_RUNTIME_IOSTAT_INQUIRE_INTERNAL_UNIT, & + stat_failed_image = FORTRAN_RUNTIME_STAT_FAILED_IMAGE, & + stat_locked = FORTRAN_RUNTIME_STAT_LOCKED, & + stat_locked_other_image = FORTRAN_RUNTIME_STAT_LOCKED_OTHER_IMAGE, & + stat_stopped_image = FORTRAN_RUNTIME_STAT_STOPPED_IMAGE, & + stat_unlocked = FORTRAN_RUNTIME_STAT_UNLOCKED, & stat_unlocked_failed_image = FORTRAN_RUNTIME_STAT_UNLOCKED_FAILED_IMAGE end module iso_fortran_env diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt index c0e4cff698e3c..2a65a22ab674c 100644 --- a/flang/runtime/CMakeLists.txt +++ b/flang/runtime/CMakeLists.txt @@ -171,10 +171,7 @@ set(sources utf.cpp ) -option(FLANG_EXPERIMENTAL_CUDA_RUNTIME - "Compile Fortran runtime as CUDA sources (experimental)" OFF - ) -set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation") +include(AddFlangOffloadRuntime) # List of files that are buildable for all devices. set(supported_files @@ -227,128 +224,8 @@ set(supported_files utf.cpp ) -if (FLANG_EXPERIMENTAL_CUDA_RUNTIME) - if (BUILD_SHARED_LIBS) - message(FATAL_ERROR - "BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime" - ) - endif() - - enable_language(CUDA) - - # TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION - # work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION. - set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) - - # Treat all supported sources as CUDA files. - set_source_files_properties(${supported_files} PROPERTIES LANGUAGE CUDA) - set(CUDA_COMPILE_OPTIONS) - if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang") - # Allow varargs. - set(CUDA_COMPILE_OPTIONS - -Xclang -fcuda-allow-variadic-functions - ) - endif() - if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA") - set(CUDA_COMPILE_OPTIONS - --expt-relaxed-constexpr - # Disable these warnings: - # 'long double' is treated as 'double' in device code - -Xcudafe --diag_suppress=20208 - -Xcudafe --display_error_number - ) - endif() - set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS - "${CUDA_COMPILE_OPTIONS}" - ) - - if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include") - # When using libcudacxx headers files, we have to use them - # for all files of F18 runtime. - include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include) - add_compile_definitions(RT_USE_LIBCUDACXX=1) - endif() -endif() - -set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING - "Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'") - -set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING - "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')") - -if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off") - # 'host_device' build only works with Clang compiler currently. - # The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use - # the in-tree built Clang. We may have a mode that would use the in-tree - # built Clang. - # - # 'nohost' is supposed to produce an LLVM Bitcode library, - # and it has to be done with a C/C++ compiler producing LLVM Bitcode - # compatible with the LLVM toolchain version distributed with the Flang - # compiler. - # In general, the in-tree built Clang should be used for 'nohost' build. - # Note that 'nohost' build does not produce the host version of Flang - # runtime library, so there will be two separate distributable objects. - # 'nohost' build is a TODO. - - if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device") - message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime") - endif() - if (BUILD_SHARED_LIBS) - message(FATAL_ERROR - "BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime" - ) - endif() - - if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND - "${CMAKE_C_COMPILER_ID}" MATCHES "Clang") - - set(all_amdgpu_architectures - "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906" - "gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030" - "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036" - "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151" - ) - set(all_nvptx_architectures - "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62" - "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90" - ) - set(all_gpu_architectures - "${all_amdgpu_architectures};${all_nvptx_architectures}" - ) - # TODO: support auto detection on the build system. - if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all") - set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures}) - endif() - list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES) - - string(REPLACE ";" "," compile_for_architectures - "${FLANG_OMP_DEVICE_ARCHITECTURES}" - ) - - set(OMP_COMPILE_OPTIONS - -fopenmp - -fvisibility=hidden - -fopenmp-cuda-mode - --offload-arch=${compile_for_architectures} - # Force LTO for the device part. - -foffload-lto - ) - set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS - "${OMP_COMPILE_OPTIONS}" - ) - - # Enable "declare target" in the source code. - set_source_files_properties(${supported_files} - PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD - ) - else() - message(FATAL_ERROR - "Flang runtime build is not supported for these compilers:\n" - "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n" - "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}") - endif() -endif() +enable_cuda_compilation("${supported_files}") +enable_omp_offload_compilation("${supported_files}") if (NOT TARGET FortranFloat128Math) # If FortranFloat128Math is not defined, then we are not building diff --git a/flang/runtime/buffer.h b/flang/runtime/buffer.h index ca1baea12efaf..41a1abb1b2d90 100644 --- a/flang/runtime/buffer.h +++ b/flang/runtime/buffer.h @@ -11,8 +11,8 @@ #ifndef FORTRAN_RUNTIME_BUFFER_H_ #define FORTRAN_RUNTIME_BUFFER_H_ -#include "freestanding-tools.h" #include "io-error.h" +#include "flang/Runtime/freestanding-tools.h" #include "flang/Runtime/memory.h" #include #include diff --git a/flang/runtime/command.cpp b/flang/runtime/command.cpp index fabfe601688bb..b573c5dfd797b 100644 --- a/flang/runtime/command.cpp +++ b/flang/runtime/command.cpp @@ -16,9 +16,7 @@ #include #ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#define NOMINMAX -#include +#include "flang/Common/windows-include.h" // On Windows GetCurrentProcessId returns a DWORD aka uint32_t #include diff --git a/flang/runtime/descriptor-io.cpp b/flang/runtime/descriptor-io.cpp index 93df51cf22d3f..380ad425d925f 100644 --- a/flang/runtime/descriptor-io.cpp +++ b/flang/runtime/descriptor-io.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "descriptor-io.h" -#include "freestanding-tools.h" #include "flang/Common/restorer.h" +#include "flang/Runtime/freestanding-tools.h" namespace Fortran::runtime::io::descr { RT_OFFLOAD_API_GROUP_BEGIN diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp index 935b7c299b256..37989bbcee0ab 100644 --- a/flang/runtime/edit-input.cpp +++ b/flang/runtime/edit-input.cpp @@ -7,12 +7,12 @@ //===----------------------------------------------------------------------===// #include "edit-input.h" -#include "freestanding-tools.h" #include "namelist.h" #include "utf.h" #include "flang/Common/optional.h" #include "flang/Common/real.h" #include "flang/Common/uint128.h" +#include "flang/Runtime/freestanding-tools.h" #include #include diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp index b74067a377774..b2c9665a28df2 100644 --- a/flang/runtime/environment.cpp +++ b/flang/runtime/environment.cpp @@ -49,6 +49,7 @@ static void SetEnvironmentDefaults(const EnvironmentDefaultList *envDefaults) { } } +RT_OFFLOAD_API_GROUP_BEGIN Fortran::common::optional GetConvertFromString( const char *x, std::size_t n) { static const char *keywords[]{ @@ -68,6 +69,7 @@ Fortran::common::optional GetConvertFromString( return Fortran::common::nullopt; } } +RT_OFFLOAD_API_GROUP_END void ExecutionEnvironment::Configure(int ac, const char *av[], const char *env[], const EnvironmentDefaultList *envDefaults) { diff --git a/flang/runtime/environment.h b/flang/runtime/environment.h index 6c56993fb1d6e..b8b9f10e4e57f 100644 --- a/flang/runtime/environment.h +++ b/flang/runtime/environment.h @@ -31,7 +31,7 @@ RT_OFFLOAD_VAR_GROUP_END // External unformatted I/O data conversions enum class Convert { Unknown, Native, LittleEndian, BigEndian, Swap }; -Fortran::common::optional GetConvertFromString( +RT_API_ATTRS Fortran::common::optional GetConvertFromString( const char *, std::size_t); struct ExecutionEnvironment { diff --git a/flang/runtime/execute.cpp b/flang/runtime/execute.cpp index c84930c5c3287..0f5bc5059e21d 100644 --- a/flang/runtime/execute.cpp +++ b/flang/runtime/execute.cpp @@ -16,9 +16,7 @@ #include #include #ifdef _WIN32 -#define LEAN_AND_MEAN -#define NOMINMAX -#include +#include "flang/Common/windows-include.h" #else #include #include diff --git a/flang/runtime/file.cpp b/flang/runtime/file.cpp index 67764f1f56262..acd5d33d4bb87 100644 --- a/flang/runtime/file.cpp +++ b/flang/runtime/file.cpp @@ -17,9 +17,8 @@ #include #include #ifdef _WIN32 -#define NOMINMAX +#include "flang/Common/windows-include.h" #include -#include #else #include #endif diff --git a/flang/runtime/format.h b/flang/runtime/format.h index f57cf92044871..5329f2482d3e4 100644 --- a/flang/runtime/format.h +++ b/flang/runtime/format.h @@ -12,11 +12,11 @@ #define FORTRAN_RUNTIME_FORMAT_H_ #include "environment.h" -#include "freestanding-tools.h" #include "io-error.h" #include "flang/Common/Fortran.h" #include "flang/Common/optional.h" #include "flang/Decimal/decimal.h" +#include "flang/Runtime/freestanding-tools.h" #include namespace Fortran::runtime { diff --git a/flang/runtime/internal-unit.cpp b/flang/runtime/internal-unit.cpp index 35766306ccefb..4097ea659edd4 100644 --- a/flang/runtime/internal-unit.cpp +++ b/flang/runtime/internal-unit.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "internal-unit.h" -#include "freestanding-tools.h" #include "io-error.h" #include "flang/Runtime/descriptor.h" +#include "flang/Runtime/freestanding-tools.h" #include #include diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 3a86c9fa7375e..ccb5b576451dd 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -25,8 +25,9 @@ #include namespace Fortran::runtime::io { +RT_EXT_API_GROUP_BEGIN -const char *InquiryKeywordHashDecode( +RT_API_ATTRS const char *InquiryKeywordHashDecode( char *buffer, std::size_t n, InquiryKeywordHash hash) { if (n < 1) { return nullptr; @@ -44,7 +45,7 @@ const char *InquiryKeywordHashDecode( } template -Cookie BeginInternalArrayListIO(const Descriptor &descriptor, +RT_API_ATTRS Cookie BeginInternalArrayListIO(const Descriptor &descriptor, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; @@ -54,14 +55,14 @@ Cookie BeginInternalArrayListIO(const Descriptor &descriptor, ->ioStatementState(); } -Cookie IONAME(BeginInternalArrayListOutput)(const Descriptor &descriptor, +Cookie IODEF(BeginInternalArrayListOutput)(const Descriptor &descriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalArrayListIO( descriptor, scratchArea, scratchBytes, sourceFile, sourceLine); } -Cookie IONAME(BeginInternalArrayListInput)(const Descriptor &descriptor, +Cookie IODEF(BeginInternalArrayListInput)(const Descriptor &descriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalArrayListIO( @@ -69,7 +70,7 @@ Cookie IONAME(BeginInternalArrayListInput)(const Descriptor &descriptor, } template -Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, +RT_API_ATTRS Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { @@ -80,7 +81,7 @@ Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, ->ioStatementState(); } -Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, +Cookie IODEF(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -89,7 +90,7 @@ Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, sourceLine); } -Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &descriptor, +Cookie IODEF(BeginInternalArrayFormattedInput)(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -110,14 +111,14 @@ RT_API_ATTRS Cookie BeginInternalListIO( ->ioStatementState(); } -Cookie IONAME(BeginInternalListOutput)(char *internal, +Cookie IODEF(BeginInternalListOutput)(char *internal, std::size_t internalLength, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalListIO(internal, internalLength, scratchArea, scratchBytes, sourceFile, sourceLine); } -Cookie IONAME(BeginInternalListInput)(const char *internal, +Cookie IODEF(BeginInternalListInput)(const char *internal, std::size_t internalLength, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalListIO(internal, internalLength, @@ -125,7 +126,7 @@ Cookie IONAME(BeginInternalListInput)(const char *internal, } template -Cookie BeginInternalFormattedIO( +RT_API_ATTRS Cookie BeginInternalFormattedIO( std::conditional_t *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void ** /*scratchArea*/, @@ -138,7 +139,7 @@ Cookie BeginInternalFormattedIO( ->ioStatementState(); } -Cookie IONAME(BeginInternalFormattedOutput)(char *internal, +Cookie IODEF(BeginInternalFormattedOutput)(char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -147,7 +148,7 @@ Cookie IONAME(BeginInternalFormattedOutput)(char *internal, sourceFile, sourceLine); } -Cookie IONAME(BeginInternalFormattedInput)(const char *internal, +Cookie IODEF(BeginInternalFormattedInput)(const char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -227,24 +228,22 @@ RT_API_ATTRS Cookie BeginExternalListIO( } } -RT_EXT_API_GROUP_BEGIN Cookie IODEF(BeginExternalListOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( unitNumber, sourceFile, sourceLine); } -RT_EXT_API_GROUP_END -Cookie IONAME(BeginExternalListInput)( +Cookie IODEF(BeginExternalListInput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( unitNumber, sourceFile, sourceLine); } template -Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, - const Descriptor *formatDescriptor, ExternalUnit unitNumber, - const char *sourceFile, int sourceLine) { +RT_API_ATTRS Cookie BeginExternalFormattedIO(const char *format, + std::size_t formatLength, const Descriptor *formatDescriptor, + ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; ExternalFileUnit *unit{GetOrCreateUnit( @@ -286,14 +285,14 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, } } -Cookie IONAME(BeginExternalFormattedOutput)(const char *format, +Cookie IODEF(BeginExternalFormattedOutput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, formatDescriptor, unitNumber, sourceFile, sourceLine); } -Cookie IONAME(BeginExternalFormattedInput)(const char *format, +Cookie IODEF(BeginExternalFormattedInput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, @@ -301,7 +300,7 @@ Cookie IONAME(BeginExternalFormattedInput)(const char *format, } template -Cookie BeginUnformattedIO( +RT_API_ATTRS Cookie BeginUnformattedIO( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -352,19 +351,19 @@ Cookie BeginUnformattedIO( } } -Cookie IONAME(BeginUnformattedOutput)( +Cookie IODEF(BeginUnformattedOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginUnformattedIO( unitNumber, sourceFile, sourceLine); } -Cookie IONAME(BeginUnformattedInput)( +Cookie IODEF(BeginUnformattedInput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginUnformattedIO( unitNumber, sourceFile, sourceLine); } -Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=) +Cookie IODEF(BeginOpenUnit)( // OPEN(without NEWUNIT=) ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; bool wasExtant{false}; @@ -384,7 +383,7 @@ Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=) } } -Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) +Cookie IODEF(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; ExternalFileUnit &unit{ @@ -394,7 +393,7 @@ Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) sourceLine); } -Cookie IONAME(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, +Cookie IODEF(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -410,12 +409,12 @@ Cookie IONAME(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, terminator, unitNumber, id == 0 ? IostatOk : IostatBadWaitUnit); } } -Cookie IONAME(BeginWaitAll)( +Cookie IODEF(BeginWaitAll)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return IONAME(BeginWait)(unitNumber, 0 /*no ID=*/, sourceFile, sourceLine); } -Cookie IONAME(BeginClose)( +Cookie IODEF(BeginClose)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -434,7 +433,7 @@ Cookie IONAME(BeginClose)( } } -Cookie IONAME(BeginFlush)( +Cookie IODEF(BeginFlush)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -452,7 +451,7 @@ Cookie IONAME(BeginFlush)( } } -Cookie IONAME(BeginBackspace)( +Cookie IODEF(BeginBackspace)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -470,7 +469,7 @@ Cookie IONAME(BeginBackspace)( } } -Cookie IONAME(BeginEndfile)( +Cookie IODEF(BeginEndfile)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -490,7 +489,7 @@ Cookie IONAME(BeginEndfile)( } } -Cookie IONAME(BeginRewind)( +Cookie IODEF(BeginRewind)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -510,7 +509,7 @@ Cookie IONAME(BeginRewind)( } } -Cookie IONAME(BeginInquireUnit)( +Cookie IODEF(BeginInquireUnit)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -530,14 +529,14 @@ Cookie IONAME(BeginInquireUnit)( } } -Cookie IONAME(BeginInquireFile)(const char *path, std::size_t pathLength, +Cookie IODEF(BeginInquireFile)(const char *path, std::size_t pathLength, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; auto trimmed{SaveDefaultCharacter( path, TrimTrailingSpaces(path, pathLength), terminator)}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp( - trimmed.get(), std::strlen(trimmed.get()))}) { + trimmed.get(), Fortran::runtime::strlen(trimmed.get()))}) { // INQUIRE(FILE=) to a connected unit if (ChildIo * child{unit->GetChildIo()}) { return &child->BeginIoStatement( @@ -554,7 +553,7 @@ Cookie IONAME(BeginInquireFile)(const char *path, std::size_t pathLength, } } -Cookie IONAME(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { +Cookie IODEF(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; return &New{oom}(sourceFile, sourceLine) .release() @@ -563,7 +562,7 @@ Cookie IONAME(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { // Control list items -void IONAME(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, +void IODEF(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, bool hasEnd, bool hasEor, bool hasIoMsg) { IoErrorHandler &handler{cookie->GetIoErrorHandler()}; if (hasIoStat) { @@ -583,8 +582,8 @@ void IONAME(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, } } -static bool YesOrNo(const char *keyword, std::size_t length, const char *what, - IoErrorHandler &handler) { +static RT_API_ATTRS bool YesOrNo(const char *keyword, std::size_t length, + const char *what, IoErrorHandler &handler) { static const char *keywords[]{"YES", "NO", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { case 0: @@ -598,8 +597,7 @@ static bool YesOrNo(const char *keyword, std::size_t length, const char *what, } } -bool IONAME(SetAdvance)( - Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetAdvance)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; bool nonAdvancing{!YesOrNo(keyword, length, "ADVANCE", handler)}; @@ -616,7 +614,7 @@ bool IONAME(SetAdvance)( return !handler.InError(); } -bool IONAME(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"NULL", "ZERO", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -633,8 +631,7 @@ bool IONAME(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IONAME(SetDecimal)( - Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetDecimal)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"COMMA", "POINT", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -651,7 +648,7 @@ bool IONAME(SetDecimal)( } } -bool IONAME(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"APOSTROPHE", "QUOTE", "NONE", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -671,14 +668,14 @@ bool IONAME(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IONAME(SetPad)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetPad)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; io.mutableModes().pad = YesOrNo(keyword, length, "PAD", handler); return !handler.InError(); } -bool IONAME(SetPos)(Cookie cookie, std::int64_t pos) { +bool IODEF(SetPos)(Cookie cookie, std::int64_t pos) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *unit{io.GetExternalFileUnit()}) { @@ -689,7 +686,7 @@ bool IONAME(SetPos)(Cookie cookie, std::int64_t pos) { return false; } -bool IONAME(SetRec)(Cookie cookie, std::int64_t rec) { +bool IODEF(SetRec)(Cookie cookie, std::int64_t rec) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *unit{io.GetExternalFileUnit()}) { @@ -705,7 +702,7 @@ bool IONAME(SetRec)(Cookie cookie, std::int64_t rec) { return true; } -bool IONAME(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"UP", "DOWN", "ZERO", "NEAREST", "COMPATIBLE", "PROCESSOR_DEFINED", nullptr}; @@ -735,7 +732,7 @@ bool IONAME(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IONAME(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{ "PLUS", "SUPPRESS", "PROCESSOR_DEFINED", nullptr}; @@ -754,7 +751,7 @@ bool IONAME(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IONAME(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -790,7 +787,7 @@ bool IONAME(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IONAME(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -832,7 +829,7 @@ bool IONAME(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IONAME(SetAsynchronous)( +bool IODEF(SetAsynchronous)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; @@ -859,7 +856,7 @@ bool IONAME(SetAsynchronous)( return !handler.InError(); } -bool IONAME(SetCarriagecontrol)( +bool IODEF(SetCarriagecontrol)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -891,8 +888,7 @@ bool IONAME(SetCarriagecontrol)( } } -bool IONAME(SetConvert)( - Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetConvert)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -916,7 +912,7 @@ bool IONAME(SetConvert)( } } -bool IONAME(SetEncoding)( +bool IODEF(SetEncoding)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -948,7 +944,7 @@ bool IONAME(SetEncoding)( return true; } -bool IONAME(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -976,7 +972,7 @@ bool IONAME(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IONAME(SetPosition)( +bool IODEF(SetPosition)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -1009,7 +1005,7 @@ bool IONAME(SetPosition)( return true; } -bool IONAME(SetRecl)(Cookie cookie, std::size_t n) { +bool IODEF(SetRecl)(Cookie cookie, std::size_t n) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -1036,7 +1032,7 @@ bool IONAME(SetRecl)(Cookie cookie, std::size_t n) { } } -bool IONAME(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; if (auto *open{io.get_if()}) { if (open->completedOperation()) { @@ -1090,7 +1086,7 @@ bool IONAME(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { "SetStatus() called when not in an OPEN or CLOSE statement"); } -bool IONAME(SetFile)(Cookie cookie, const char *path, std::size_t chars) { +bool IODEF(SetFile)(Cookie cookie, const char *path, std::size_t chars) { IoStatementState &io{*cookie}; if (auto *open{io.get_if()}) { if (open->completedOperation()) { @@ -1107,7 +1103,7 @@ bool IONAME(SetFile)(Cookie cookie, const char *path, std::size_t chars) { return false; } -bool IONAME(GetNewUnit)(Cookie cookie, int &unit, int kind) { +bool IODEF(GetNewUnit)(Cookie cookie, int &unit, int kind) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -1135,15 +1131,15 @@ bool IONAME(GetNewUnit)(Cookie cookie, int &unit, int kind) { // Data transfers -bool IONAME(OutputDescriptor)(Cookie cookie, const Descriptor &descriptor) { +bool IODEF(OutputDescriptor)(Cookie cookie, const Descriptor &descriptor) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) { +bool IODEF(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputInteger8)(Cookie cookie, std::int8_t n) { +bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger8")) { return false; } @@ -1154,7 +1150,7 @@ bool IONAME(OutputInteger8)(Cookie cookie, std::int8_t n) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputInteger16)(Cookie cookie, std::int16_t n) { +bool IODEF(OutputInteger16)(Cookie cookie, std::int16_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger16")) { return false; } @@ -1165,7 +1161,6 @@ bool IONAME(OutputInteger16)(Cookie cookie, std::int16_t n) { return descr::DescriptorIO(*cookie, descriptor); } -RT_EXT_API_GROUP_BEGIN bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger32")) { return false; @@ -1176,9 +1171,8 @@ bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { TypeCategory::Integer, 4, reinterpret_cast(&n), 0); return descr::DescriptorIO(*cookie, descriptor); } -RT_EXT_API_GROUP_END -bool IONAME(OutputInteger64)(Cookie cookie, std::int64_t n) { +bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger64")) { return false; } @@ -1190,7 +1184,7 @@ bool IONAME(OutputInteger64)(Cookie cookie, std::int64_t n) { } #ifdef __SIZEOF_INT128__ -bool IONAME(OutputInteger128)(Cookie cookie, common::int128_t n) { +bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger128")) { return false; } @@ -1202,7 +1196,7 @@ bool IONAME(OutputInteger128)(Cookie cookie, common::int128_t n) { } #endif -bool IONAME(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { +bool IODEF(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { if (!cookie->CheckFormattedStmtType("InputInteger")) { return false; } @@ -1213,7 +1207,7 @@ bool IONAME(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputReal32)(Cookie cookie, float x) { +bool IODEF(OutputReal32)(Cookie cookie, float x) { if (!cookie->CheckFormattedStmtType("OutputReal32")) { return false; } @@ -1223,7 +1217,7 @@ bool IONAME(OutputReal32)(Cookie cookie, float x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputReal64)(Cookie cookie, double x) { +bool IODEF(OutputReal64)(Cookie cookie, double x) { if (!cookie->CheckFormattedStmtType("OutputReal64")) { return false; } @@ -1233,7 +1227,7 @@ bool IONAME(OutputReal64)(Cookie cookie, double x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputReal32)(Cookie cookie, float &x) { +bool IODEF(InputReal32)(Cookie cookie, float &x) { if (!cookie->CheckFormattedStmtType("InputReal32")) { return false; } @@ -1243,7 +1237,7 @@ bool IONAME(InputReal32)(Cookie cookie, float &x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputReal64)(Cookie cookie, double &x) { +bool IODEF(InputReal64)(Cookie cookie, double &x) { if (!cookie->CheckFormattedStmtType("InputReal64")) { return false; } @@ -1253,7 +1247,7 @@ bool IONAME(InputReal64)(Cookie cookie, double &x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputComplex32)(Cookie cookie, float r, float i) { +bool IODEF(OutputComplex32)(Cookie cookie, float r, float i) { if (!cookie->CheckFormattedStmtType("OutputComplex32")) { return false; } @@ -1265,7 +1259,7 @@ bool IONAME(OutputComplex32)(Cookie cookie, float r, float i) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputComplex64)(Cookie cookie, double r, double i) { +bool IODEF(OutputComplex64)(Cookie cookie, double r, double i) { if (!cookie->CheckFormattedStmtType("OutputComplex64")) { return false; } @@ -1277,7 +1271,7 @@ bool IONAME(OutputComplex64)(Cookie cookie, double r, double i) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputComplex32)(Cookie cookie, float z[2]) { +bool IODEF(InputComplex32)(Cookie cookie, float z[2]) { if (!cookie->CheckFormattedStmtType("InputComplex32")) { return false; } @@ -1288,7 +1282,7 @@ bool IONAME(InputComplex32)(Cookie cookie, float z[2]) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputComplex64)(Cookie cookie, double z[2]) { +bool IODEF(InputComplex64)(Cookie cookie, double z[2]) { if (!cookie->CheckFormattedStmtType("InputComplex64")) { return false; } @@ -1299,7 +1293,7 @@ bool IONAME(InputComplex64)(Cookie cookie, double z[2]) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputCharacter)( +bool IODEF(OutputCharacter)( Cookie cookie, const char *x, std::size_t length, int kind) { if (!cookie->CheckFormattedStmtType("OutputCharacter")) { return false; @@ -1311,11 +1305,11 @@ bool IONAME(OutputCharacter)( return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputAscii)(Cookie cookie, const char *x, std::size_t length) { +bool IODEF(OutputAscii)(Cookie cookie, const char *x, std::size_t length) { return IONAME(OutputCharacter(cookie, x, length, 1)); } -bool IONAME(InputCharacter)( +bool IODEF(InputCharacter)( Cookie cookie, char *x, std::size_t length, int kind) { if (!cookie->CheckFormattedStmtType("InputCharacter")) { return false; @@ -1326,11 +1320,11 @@ bool IONAME(InputCharacter)( return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputAscii)(Cookie cookie, char *x, std::size_t length) { +bool IODEF(InputAscii)(Cookie cookie, char *x, std::size_t length) { return IONAME(InputCharacter)(cookie, x, length, 1); } -bool IONAME(OutputLogical)(Cookie cookie, bool truth) { +bool IODEF(OutputLogical)(Cookie cookie, bool truth) { if (!cookie->CheckFormattedStmtType("OutputLogical")) { return false; } @@ -1341,7 +1335,7 @@ bool IONAME(OutputLogical)(Cookie cookie, bool truth) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputLogical)(Cookie cookie, bool &truth) { +bool IODEF(InputLogical)(Cookie cookie, bool &truth) { if (!cookie->CheckFormattedStmtType("InputLogical")) { return false; } @@ -1352,17 +1346,17 @@ bool IONAME(InputLogical)(Cookie cookie, bool &truth) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputDerivedType)(Cookie cookie, const Descriptor &descriptor, +bool IODEF(OutputDerivedType)(Cookie cookie, const Descriptor &descriptor, const NonTbpDefinedIoTable *table) { return descr::DescriptorIO(*cookie, descriptor, table); } -bool IONAME(InputDerivedType)(Cookie cookie, const Descriptor &descriptor, +bool IODEF(InputDerivedType)(Cookie cookie, const Descriptor &descriptor, const NonTbpDefinedIoTable *table) { return descr::DescriptorIO(*cookie, descriptor, table); } -std::size_t IONAME(GetSize)(Cookie cookie) { +std::size_t IODEF(GetSize)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1379,7 +1373,7 @@ std::size_t IONAME(GetSize)(Cookie cookie) { return 0; } -std::size_t IONAME(GetIoLength)(Cookie cookie) { +std::size_t IODEF(GetIoLength)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1395,7 +1389,7 @@ std::size_t IONAME(GetIoLength)(Cookie cookie) { return 0; } -void IONAME(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { +void IODEF(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1406,7 +1400,7 @@ void IONAME(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { } } -AsynchronousId IONAME(GetAsynchronousId)(Cookie cookie) { +AsynchronousId IODEF(GetAsynchronousId)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *ext{io.get_if()}) { @@ -1419,24 +1413,24 @@ AsynchronousId IONAME(GetAsynchronousId)(Cookie cookie) { return 0; } -bool IONAME(InquireCharacter)(Cookie cookie, InquiryKeywordHash inquiry, +bool IODEF(InquireCharacter)(Cookie cookie, InquiryKeywordHash inquiry, char *result, std::size_t length) { IoStatementState &io{*cookie}; return io.Inquire(inquiry, result, length); } -bool IONAME(InquireLogical)( +bool IODEF(InquireLogical)( Cookie cookie, InquiryKeywordHash inquiry, bool &result) { IoStatementState &io{*cookie}; return io.Inquire(inquiry, result); } -bool IONAME(InquirePendingId)(Cookie cookie, AsynchronousId id, bool &result) { +bool IODEF(InquirePendingId)(Cookie cookie, AsynchronousId id, bool &result) { IoStatementState &io{*cookie}; return io.Inquire(HashInquiryKeyword("PENDING"), id, result); } -bool IONAME(InquireInteger64)( +bool IODEF(InquireInteger64)( Cookie cookie, InquiryKeywordHash inquiry, std::int64_t &result, int kind) { IoStatementState &io{*cookie}; std::int64_t n{0}; // safe "undefined" value @@ -1452,17 +1446,15 @@ bool IONAME(InquireInteger64)( return false; } -RT_EXT_API_GROUP_BEGIN enum Iostat IODEF(EndIoStatement)(Cookie cookie) { IoStatementState &io{*cookie}; return static_cast(io.EndIoStatement()); } -RT_EXT_API_GROUP_END template -static enum Iostat CheckUnitNumberInRangeImpl(INT unit, bool handleError, - char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, - int sourceLine) { +static RT_API_ATTRS enum Iostat CheckUnitNumberInRangeImpl(INT unit, + bool handleError, char *ioMsg, std::size_t ioMsgLength, + const char *sourceFile, int sourceLine) { static_assert(sizeof(INT) >= sizeof(ExternalUnit), "only intended to be used when the INT to ExternalUnit conversion is " "narrowing"); @@ -1494,15 +1486,15 @@ static enum Iostat CheckUnitNumberInRangeImpl(INT unit, bool handleError, return IostatOk; } -enum Iostat IONAME(CheckUnitNumberInRange64)(std::int64_t unit, - bool handleError, char *ioMsg, std::size_t ioMsgLength, - const char *sourceFile, int sourceLine) { +enum Iostat IODEF(CheckUnitNumberInRange64)(std::int64_t unit, bool handleError, + char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, + int sourceLine) { return CheckUnitNumberInRangeImpl( unit, handleError, ioMsg, ioMsgLength, sourceFile, sourceLine); } #ifdef __SIZEOF_INT128__ -enum Iostat IONAME(CheckUnitNumberInRange128)(common::int128_t unit, +enum Iostat IODEF(CheckUnitNumberInRange128)(common::int128_t unit, bool handleError, char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, int sourceLine) { return CheckUnitNumberInRangeImpl( @@ -1525,3 +1517,5 @@ void std::__libcpp_verbose_abort(char const *format, ...) { std::abort(); } #endif + +RT_EXT_API_GROUP_END diff --git a/flang/runtime/io-error.cpp b/flang/runtime/io-error.cpp index b006b82f62249..7a90966f81047 100644 --- a/flang/runtime/io-error.cpp +++ b/flang/runtime/io-error.cpp @@ -109,8 +109,6 @@ void IoErrorHandler::SignalPendingError() { SignalError(error); } -RT_OFFLOAD_API_GROUP_END - void IoErrorHandler::SignalErrno() { SignalError(errno); } bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { @@ -127,7 +125,10 @@ bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { // in LLVM v9.0.1 with inadequate modification for Fortran, // since rectified. bool ok{false}; -#if HAVE_STRERROR_R +#if defined(RT_DEVICE_COMPILATION) + // strerror_r is not available on device. + msg = "errno description is not available on device"; +#elif HAVE_STRERROR_R // strerror_r is thread-safe. #if defined(__GLIBC__) && defined(_GNU_SOURCE) // glibc defines its own incompatible version of strerror_r @@ -157,4 +158,6 @@ bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { return false; } } + +RT_OFFLOAD_API_GROUP_END } // namespace Fortran::runtime::io diff --git a/flang/runtime/io-error.h b/flang/runtime/io-error.h index 0fe11c9185c0a..426573e2faf00 100644 --- a/flang/runtime/io-error.h +++ b/flang/runtime/io-error.h @@ -61,7 +61,7 @@ class IoErrorHandler : public Terminator { RT_API_ATTRS void SignalPendingError(); RT_API_ATTRS int GetIoStat() const { return ioStat_; } - bool GetIoMsg(char *, std::size_t); + RT_API_ATTRS bool GetIoMsg(char *, std::size_t); private: enum Flag : std::uint8_t { diff --git a/flang/runtime/lock.h b/flang/runtime/lock.h index 9f27a8295c468..46ca28703a45b 100644 --- a/flang/runtime/lock.h +++ b/flang/runtime/lock.h @@ -25,9 +25,7 @@ #if USE_PTHREADS #include #elif defined(_WIN32) -// Do not define macros for "min" and "max" -#define NOMINMAX -#include +#include "flang/Common/windows-include.h" #else #include #endif diff --git a/flang/runtime/memory.cpp b/flang/runtime/memory.cpp index de6c4c72fdac1..c7068ad6479a1 100644 --- a/flang/runtime/memory.cpp +++ b/flang/runtime/memory.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "flang/Runtime/memory.h" -#include "freestanding-tools.h" #include "terminator.h" #include "tools.h" +#include "flang/Runtime/freestanding-tools.h" #include namespace Fortran::runtime { diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp index b502d41a8d5c8..b9eed2101ecfc 100644 --- a/flang/runtime/namelist.cpp +++ b/flang/runtime/namelist.cpp @@ -17,16 +17,20 @@ namespace Fortran::runtime::io { +RT_VAR_GROUP_BEGIN // Max size of a group, symbol or component identifier that can appear in // NAMELIST input, plus a byte for NUL termination. -static constexpr std::size_t nameBufferSize{201}; +static constexpr RT_CONST_VAR_ATTRS std::size_t nameBufferSize{201}; +RT_VAR_GROUP_END -static inline char32_t GetComma(IoStatementState &io) { +RT_OFFLOAD_API_GROUP_BEGIN + +static inline RT_API_ATTRS char32_t GetComma(IoStatementState &io) { return io.mutableModes().editingFlags & decimalComma ? char32_t{';'} : char32_t{','}; } -bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { +bool IODEF(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { IoStatementState &io{*cookie}; io.CheckFormattedStmtType("OutputNamelist"); io.mutableModes().inNamelist = true; @@ -40,7 +44,8 @@ bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { if ((connection.NeedAdvance(prefixLen) && !(io.AdvanceRecord() && EmitAscii(io, " ", 1))) || !EmitAscii(io, prefix, prefixLen) || - (connection.NeedAdvance(std::strlen(str) + (suffix != ' ')) && + (connection.NeedAdvance( + Fortran::runtime::strlen(str) + (suffix != ' ')) && !(io.AdvanceRecord() && EmitAscii(io, " ", 1)))) { return false; } @@ -84,20 +89,20 @@ bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { return EmitUpperCase("/", 1, "", ' '); } -static constexpr bool IsLegalIdStart(char32_t ch) { +static constexpr RT_API_ATTRS bool IsLegalIdStart(char32_t ch) { return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || ch == '@'; } -static constexpr bool IsLegalIdChar(char32_t ch) { +static constexpr RT_API_ATTRS bool IsLegalIdChar(char32_t ch) { return IsLegalIdStart(ch) || (ch >= '0' && ch <= '9'); } -static constexpr char NormalizeIdChar(char32_t ch) { +static constexpr RT_API_ATTRS char NormalizeIdChar(char32_t ch) { return static_cast(ch >= 'A' && ch <= 'Z' ? ch - 'A' + 'a' : ch); } -static bool GetLowerCaseName( +static RT_API_ATTRS bool GetLowerCaseName( IoStatementState &io, char buffer[], std::size_t maxLength) { std::size_t byteLength{0}; if (auto ch{io.GetNextNonBlank(byteLength)}) { @@ -119,7 +124,7 @@ static bool GetLowerCaseName( return false; } -static Fortran::common::optional GetSubscriptValue( +static RT_API_ATTRS Fortran::common::optional GetSubscriptValue( IoStatementState &io) { Fortran::common::optional value; std::size_t byteCount{0}; @@ -152,8 +157,8 @@ static Fortran::common::optional GetSubscriptValue( return value; } -static bool HandleSubscripts(IoStatementState &io, Descriptor &desc, - const Descriptor &source, const char *name) { +static RT_API_ATTRS bool HandleSubscripts(IoStatementState &io, + Descriptor &desc, const Descriptor &source, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; // Allow for blanks in subscripts; they're nonstandard, but not // ambiguous within the parentheses. @@ -252,7 +257,7 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc, return false; } -static void StorageSequenceExtension( +static RT_API_ATTRS void StorageSequenceExtension( Descriptor &desc, const Descriptor &source) { // Support the near-universal extension of NAMELIST input into a // designatable storage sequence identified by its initial scalar array @@ -274,7 +279,7 @@ static void StorageSequenceExtension( } } -static bool HandleSubstring( +static RT_API_ATTRS bool HandleSubstring( IoStatementState &io, Descriptor &desc, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; auto pair{desc.type().GetCategoryAndKind()}; @@ -335,7 +340,7 @@ static bool HandleSubstring( return false; } -static bool HandleComponent(IoStatementState &io, Descriptor &desc, +static RT_API_ATTRS bool HandleComponent(IoStatementState &io, Descriptor &desc, const Descriptor &source, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; char compName[nameBufferSize]; @@ -344,7 +349,8 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc, if (const typeInfo::DerivedType * type{addendum ? addendum->derivedType() : nullptr}) { if (const typeInfo::Component * - comp{type->FindDataComponent(compName, std::strlen(compName))}) { + comp{type->FindDataComponent( + compName, Fortran::runtime::strlen(compName))}) { bool createdDesc{false}; if (comp->rank() > 0 && source.rank() > 0) { // If base and component are both arrays, the component name @@ -408,7 +414,7 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc, // Advance to the terminal '/' of a namelist group or leading '&'/'$' // of the next. -static void SkipNamelistGroup(IoStatementState &io) { +static RT_API_ATTRS void SkipNamelistGroup(IoStatementState &io) { std::size_t byteCount{0}; while (auto ch{io.GetNextNonBlank(byteCount)}) { io.HandleRelativePosition(byteCount); @@ -431,7 +437,7 @@ static void SkipNamelistGroup(IoStatementState &io) { } } -bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { +bool IODEF(InputNamelist)(Cookie cookie, const NamelistGroup &group) { IoStatementState &io{*cookie}; io.CheckFormattedStmtType("InputNamelist"); io.mutableModes().inNamelist = true; @@ -470,7 +476,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { handler.SignalError("NAMELIST input group has no name"); return false; } - if (std::strcmp(group.groupName, name) == 0) { + if (Fortran::runtime::strcmp(group.groupName, name) == 0) { break; // found it } SkipNamelistGroup(io); @@ -489,7 +495,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { } std::size_t itemIndex{0}; for (; itemIndex < group.items; ++itemIndex) { - if (std::strcmp(name, group.item[itemIndex].name) == 0) { + if (Fortran::runtime::strcmp(name, group.item[itemIndex].name) == 0) { break; } } @@ -590,8 +596,6 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { return true; } -RT_OFFLOAD_API_GROUP_BEGIN - bool IsNamelistNameOrSlash(IoStatementState &io) { if (auto *listInput{ io.get_if>()}) { diff --git a/flang/runtime/tools.h b/flang/runtime/tools.h index 5d7d99c08179d..52049c511f13e 100644 --- a/flang/runtime/tools.h +++ b/flang/runtime/tools.h @@ -9,12 +9,12 @@ #ifndef FORTRAN_RUNTIME_TOOLS_H_ #define FORTRAN_RUNTIME_TOOLS_H_ -#include "freestanding-tools.h" #include "stat.h" #include "terminator.h" #include "flang/Common/optional.h" #include "flang/Runtime/cpp-type.h" #include "flang/Runtime/descriptor.h" +#include "flang/Runtime/freestanding-tools.h" #include "flang/Runtime/memory.h" #include #include diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/lib/.keep b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/crtbegin.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbegin.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbeginT.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbeginT.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtfastmath.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtfastmath.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbegin.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbeginT.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbeginT.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtfastmath.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtfastmath.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold new file mode 100755 index 0000000000000..b23e55619b2ff --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.lld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.lld new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/lib/.keep b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index bf3660d57cbb4..48f48f5384fdc 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -81,7 +81,7 @@ ! CHECK-NEXT: -fopenmp-targets= ! CHECK-NEXT: Specify comma-separated list of triples OpenMP offloading targets to be supported ! CHECK-NEXT: -fopenmp-version= -! CHECK-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 51 for OpenMP 5.1). Default value is 51 for Clang +! CHECK-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 51 for OpenMP 5.1). Default value is 11 for Flang ! CHECK-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! CHECK-NEXT: -foptimization-record-file= ! CHECK-NEXT: Specify the output name of the file containing the optimization remarks. Implies -fsave-optimization-record. On Darwin platforms, this cannot be used with multiple -arch options. @@ -104,6 +104,9 @@ ! CHECK-NEXT: -fversion-loops-for-stride ! CHECK-NEXT: Create unit-strided versions of loops ! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. +! CHECK-NEXT: --gcc-install-dir= +! CHECK-NEXT: Use GCC installation in the specified directory. The directory ends with path components like 'lib{,32,64}/gcc{,-cross}/$triple/$version'. Note: executables (e.g. ld) used by the compiler are not overridden by the selected GCC installation +! CHECK-NEXT: --gcc-toolchain= Specify a directory where Clang can find 'include' and 'lib{,32,64}/gcc{,-cross}/$triple/$version'. Clang will use the GCC installation with the largest version ! CHECK-NEXT: -gline-directives-only Emit debug line info directives only ! CHECK-NEXT: -gline-tables-only Emit debug line number tables only ! CHECK-NEXT: -gpulibc Link the LLVM C Library for GPUs diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index b4280a454e312..38f74395a678a 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -69,7 +69,7 @@ ! HELP-NEXT: -fopenmp-targets= ! HELP-NEXT: Specify comma-separated list of triples OpenMP offloading targets to be supported ! HELP-NEXT: -fopenmp-version= -! HELP-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 51 for OpenMP 5.1). Default value is 51 for Clang +! HELP-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 51 for OpenMP 5.1). Default value is 11 for Flang ! HELP-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-NEXT: -foptimization-record-file= ! HELP-NEXT: Specify the output name of the file containing the optimization remarks. Implies -fsave-optimization-record. On Darwin platforms, this cannot be used with multiple -arch options. @@ -92,6 +92,9 @@ ! HELP-NEXT: -fversion-loops-for-stride ! HELP-NEXT: Create unit-strided versions of loops ! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. +! HELP-NEXT: --gcc-install-dir= +! HELP-NEXT: Use GCC installation in the specified directory. The directory ends with path components like 'lib{,32,64}/gcc{,-cross}/$triple/$version'. Note: executables (e.g. ld) used by the compiler are not overridden by the selected GCC installation +! HELP-NEXT: --gcc-toolchain= Specify a directory where Clang can find 'include' and 'lib{,32,64}/gcc{,-cross}/$triple/$version'. Clang will use the GCC installation with the largest version ! HELP-NEXT: -gline-directives-only Emit debug line info directives only ! HELP-NEXT: -gline-tables-only Emit debug line number tables only ! HELP-NEXT: -gpulibc Link the LLVM C Library for GPUs @@ -223,7 +226,7 @@ ! HELP-FC1-NEXT: Generate code only for an OpenMP target device. ! HELP-FC1-NEXT: -fopenmp-target-debug Enable debugging in the OpenMP offloading device RTL ! HELP-FC1-NEXT: -fopenmp-version= -! HELP-FC1-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 51 for OpenMP 5.1). Default value is 51 for Clang +! HELP-FC1-NEXT: Set OpenMP version (e.g. 45 for OpenMP 4.5, 51 for OpenMP 5.1). Default value is 11 for Flang ! HELP-FC1-NEXT: -fopenmp Parse OpenMP pragmas and generate parallel code. ! HELP-FC1-NEXT: -fpass-plugin= Load pass plugin from a dynamic shared object file (only with new pass manager). ! HELP-FC1-NEXT: -fppc-native-vector-element-order diff --git a/flang/test/Driver/fopenmp.f90 b/flang/test/Driver/fopenmp.f90 new file mode 100644 index 0000000000000..c71d34dc9e7e0 --- /dev/null +++ b/flang/test/Driver/fopenmp.f90 @@ -0,0 +1,61 @@ +! RUN: %flang -target x86_64-linux-gnu -fopenmp=libomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-OPENMP +! RUN: %flang -target x86_64-linux-gnu -fopenmp=libgomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-NO-OPENMP +! RUN: %flang -target x86_64-linux-gnu -fopenmp=libiomp5 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-OPENMP +! RUN: %flang -target x86_64-apple-darwin -fopenmp=libomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-OPENMP +! RUN: %flang -target x86_64-apple-darwin -fopenmp=libgomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-NO-OPENMP +! RUN: %flang -target x86_64-apple-darwin -fopenmp=libiomp5 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-OPENMP +! RUN: %flang -target x86_64-freebsd -fopenmp=libomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-OPENMP +! RUN: %flang -target x86_64-freebsd -fopenmp=libgomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-NO-OPENMP +! RUN: %flang -target x86_64-freebsd -fopenmp=libiomp5 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-OPENMP +! RUN: %flang -target x86_64-windows-gnu -fopenmp=libomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-OPENMP +! RUN: %flang -target x86_64-windows-gnu -fopenmp=libgomp -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-NO-OPENMP --check-prefix=CHECK-WARNING +! RUN: %flang -target x86_64-windows-gnu -fopenmp=libiomp5 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FC1-OPENMP + +! CHECK-FC1-OPENMP: "-fc1" +! CHECK-FC1-OPENMP: "-fopenmp" +! +! CHECK-WARNING: warning: The library '-fopenmp=={{.*}}' is not supported, openmp is not be enabled +! CHECK-FC1-NO-OPENMP: "-fc1" +! CHECK-FC1-NO-OPENMP-NOT: "-fopenmp" +! +! RUN: %flang -target x86_64-linux-gnu -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-OMP +! RUN: %flang -target x86_64-linux-gnu -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-GOMP --check-prefix=CHECK-LD-GOMP-RT +! RUN: %flang -target x86_64-linux-gnu -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-IOMP5 +! +! RUN: %flang -target x86_64-darwin -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-OMP +! RUN: %flang -target x86_64-darwin -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-GOMP --check-prefix=CHECK-LD-GOMP-NO-RT +! RUN: %flang -target x86_64-darwin -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-IOMP5 +! +! RUN: %flang -target x86_64-freebsd -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-OMP +! RUN: %flang -target x86_64-freebsd -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-GOMP --check-prefix=CHECK-LD-GOMP-NO-RT +! RUN: %flang -target x86_64-freebsd -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-IOMP5 +! +! RUN: %flang -target x86_64-windows-gnu -fopenmp=libomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-OMP +! RUN: %flang -target x86_64-windows-gnu -fopenmp=libgomp %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-GOMP --check-prefix=CHECK-LD-GOMP-NO-RT +! RUN: %flang -target x86_64-windows-gnu -fopenmp=libiomp5 %s -o %t -### 2>&1 | FileCheck %s --check-prefix=CHECK-LD-IOMP5MD +! +! CHECK-LD-OMP: "{{.*}}ld{{(.exe)?}}" +! CHECK-LD-OMP: "-lomp" +! +! CHECK-LD-GOMP: "{{.*}}ld{{(.exe)?}}" +! CHECK-LD-GOMP: "-lgomp" +! CHECK-LD-GOMP-RT: "-lrt" +! CHECK-LD-GOMP-NO-RT-NOT: "-lrt" +! +! CHECK-LD-IOMP5: "{{.*}}ld{{(.exe)?}}" +! CHECK-LD-IOMP5: "-liomp5" +! +! CHECK-LD-IOMP5MD: "{{.*}}ld{{(.exe)?}}" +! CHECK-LD-IOMP5MD: "-liomp5md" +! +! We'd like to check that the default is sane, but until we have the ability +! to *always* semantically analyze OpenMP without always generating runtime +! calls (in the event of an unsupported runtime), we don't have a good way to +! test the CC1 invocation. Instead, just ensure we do eventually link *some* +! OpenMP runtime. +! +! CHECK-LD-ANY: "{{.*}}ld{{(.exe)?}}" +! CHECK-LD-ANY: "-l{{(omp|gomp|iomp5)}}" +! +! CHECK-LD-ANYMD: "{{.*}}ld{{(.exe)?}}" +! CHECK-LD-ANYMD: "-l{{(omp|gomp|iomp5md)}}" diff --git a/flang/test/Driver/gcc-toolchain-install-dir.f90 b/flang/test/Driver/gcc-toolchain-install-dir.f90 new file mode 100644 index 0000000000000..5a073b0c51712 --- /dev/null +++ b/flang/test/Driver/gcc-toolchain-install-dir.f90 @@ -0,0 +1,21 @@ +!! Test that --gcc-toolchain and --gcc-install-dir options are working as expected. +!! It does not test cross-compiling (--sysroot), so crtbegin.o, libgcc/compiler-rt, libc, libFortranRuntime, etc. are not supposed to be affected. +!! PREFIX is captured twice because the driver escapes backslashes (occuring in Windows paths) in the -### output, but not on the "Selected GCC installation:" line. + +! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=i386-unknown-linux-gnu --gcc-install-dir=%S/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0 | FileCheck %s --check-prefix=CHECK-I386 +! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=i386-unknown-linux-gnu --gcc-toolchain=%S/Inputs/basic_cross_linux_tree/usr | FileCheck %s --check-prefix=CHECK-I386 +! CHECK-I386: Selected GCC installation: [[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0 +! CHECK-I386: "-fc1" "-triple" "i386-unknown-linux-gnu" +! CHECK-I386: "[[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/bin{{/|\\\\}}as" +! CHECK-I386: "[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/bin{{/|\\\\}}ld" {{.*}} "-m" "elf_i386" +! CHECK-I386-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0" +! CHECK-I386-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/lib" + +! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=x86_64-unknown-linux-gnu --gcc-install-dir=%S/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0 | FileCheck %s --check-prefix=CHECK-X86-64 +! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=x86_64-unknown-linux-gnu --gcc-toolchain=%S/Inputs/basic_cross_linux_tree/usr | FileCheck %s --check-prefix=CHECK-X86-64 +! CHECK-X86-64: Selected GCC installation: [[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0 +! CHECK-X86-64: "-fc1" "-triple" "x86_64-unknown-linux-gnu" +! CHECK-X86-64: "[[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/bin{{/|\\\\}}as" "--64" +! CHECK-X86-64: "[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/bin{{/|\\\\}}ld" {{.*}} "-m" "elf_x86_64" +! CHECK-X86-64-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0" +! CHECK-X86-64-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/lib" diff --git a/flang/test/Driver/msvc-dependent-lib-flags.f90 b/flang/test/Driver/msvc-dependent-lib-flags.f90 index 7c1f962e339f9..643dbe9e949cb 100644 --- a/flang/test/Driver/msvc-dependent-lib-flags.f90 +++ b/flang/test/Driver/msvc-dependent-lib-flags.f90 @@ -4,7 +4,7 @@ ! RUN: %flang -### --target=aarch64-windows-msvc -fms-runtime-lib=dll_dbg %S/Inputs/hello.f90 -v 2>&1 | FileCheck %s --check-prefixes=MSVC-DLL-DEBUG ! MSVC: -fc1 -! MSVC-SAME: --dependent-lib=clang_rt.builtins-aarch64.lib +! MSVC-SAME: --dependent-lib=clang_rt.builtins.lib ! MSVC-SAME: -D_MT ! MSVC-SAME: --dependent-lib=libcmt ! MSVC-SAME: --dependent-lib=Fortran_main.static.lib @@ -12,7 +12,7 @@ ! MSVC-SAME: --dependent-lib=FortranDecimal.static.lib ! MSVC-DEBUG: -fc1 -! MSVC-DEBUG-SAME: --dependent-lib=clang_rt.builtins-aarch64.lib +! MSVC-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib ! MSVC-DEBUG-SAME: -D_MT ! MSVC-DEBUG-SAME: -D_DEBUG ! MSVC-DEBUG-SAME: --dependent-lib=libcmtd @@ -21,7 +21,7 @@ ! MSVC-DEBUG-SAME: --dependent-lib=FortranDecimal.static_dbg.lib ! MSVC-DLL: -fc1 -! MSVC-DLL-SAME: --dependent-lib=clang_rt.builtins-aarch64.lib +! MSVC-DLL-SAME: --dependent-lib=clang_rt.builtins.lib ! MSVC-DLL-SAME: -D_MT ! MSVC-DLL-SAME: -D_DLL ! MSVC-DLL-SAME: --dependent-lib=msvcrt @@ -30,7 +30,7 @@ ! MSVC-DLL-SAME: --dependent-lib=FortranDecimal.dynamic.lib ! MSVC-DLL-DEBUG: -fc1 -! MSVC-DLL-DEBUG-SAME: --dependent-lib=clang_rt.builtins-aarch64.lib +! MSVC-DLL-DEBUG-SAME: --dependent-lib=clang_rt.builtins.lib ! MSVC-DLL-DEBUG-SAME: -D_MT ! MSVC-DLL-DEBUG-SAME: -D_DEBUG ! MSVC-DLL-DEBUG-SAME: -D_DLL diff --git a/flang/test/Driver/omp-driver-offload.f90 b/flang/test/Driver/omp-driver-offload.f90 index 9b62699030c68..7e9a73627cd75 100644 --- a/flang/test/Driver/omp-driver-offload.f90 +++ b/flang/test/Driver/omp-driver-offload.f90 @@ -57,9 +57,14 @@ ! RUN: --target=aarch64-unknown-linux-gnu \ ! RUN: | FileCheck %s --check-prefix=OPENMP-OFFLOAD-ARGS ! OPENMP-OFFLOAD-ARGS: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu" {{.*}} "-fopenmp" {{.*}}.f90" -! OPENMP-OFFLOAD-ARGS-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp" {{.*}} "-fopenmp-host-ir-file-path" "{{.*}}.bc" "-fopenmp-is-target-device" {{.*}}.f90" +! OPENMP-OFFLOAD-ARGS-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "amdgcn-amd-amdhsa" +! OPENMP-OFFLOAD-ARGS-SAME: "-fopenmp" +! OPENMP-OFFLOAD-ARGS-SAME: "-fopenmp-host-ir-file-path" "{{.*}}.bc" "-fopenmp-is-target-device" +! OPENMP-OFFLOAD-ARGS-SAME: {{.*}}.f90" ! OPENMP-OFFLOAD-ARGS: "{{[^"]*}}clang-offload-packager{{.*}}" {{.*}} "--image=file={{.*}}.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=openmp" -! OPENMP-OFFLOAD-ARGS-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu" {{.*}} "-fopenmp" {{.*}} "-fembed-offload-object={{.*}}.out" {{.*}}.bc" +! OPENMP-OFFLOAD-ARGS-NEXT: "{{[^"]*}}flang-new" "-fc1" "-triple" "aarch64-unknown-linux-gnu" +! OPENMP-OFFLOAD-ARGS-SAME: "-fopenmp" +! OPENMP-OFFLOAD-ARGS-SAME: "-fembed-offload-object={{.*}}.out" {{.*}}.bc" ! Test -fopenmp with offload for RTL Flag Options ! RUN: %flang -### %s -o %t 2>&1 \ diff --git a/flang/test/Evaluate/folding32.f90 b/flang/test/Evaluate/folding32.f90 new file mode 100644 index 0000000000000..e4c8b26ca8fdc --- /dev/null +++ b/flang/test/Evaluate/folding32.f90 @@ -0,0 +1,6 @@ +! RUN: %python %S/test_folding.py %s %flang_fc1 +! Fold NORM2 reduction of array with non-default lower bound +module m + real, parameter :: a(2:3) = 0.0 + logical, parameter :: test1 = norm2(a) == 0. +end diff --git a/flang/test/Lower/CUDA/cuda-data-attribute.cuf b/flang/test/Lower/CUDA/cuda-data-attribute.cuf index 94aa62352c2a0..937c981bddd36 100644 --- a/flang/test/Lower/CUDA/cuda-data-attribute.cuf +++ b/flang/test/Lower/CUDA/cuda-data-attribute.cuf @@ -19,16 +19,20 @@ subroutine local_var_attrs real, device :: rd real, allocatable, managed :: rm real, allocatable, pinned :: rp + real, unified :: ru end subroutine ! CHECK-LABEL: func.func @_QMcuda_varPlocal_var_attrs() ! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) ! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %{{.*}}:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsErd"} : (!fir.ref) -> !fir.ref ! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErm"} : (!fir.ref>>) -> !fir.ref>> ! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFlocal_var_attrsErp"} : (!fir.ref>>) -> !fir.ref>> +! FIR: %{{.*}} = fir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFlocal_var_attrsEru"} : (!fir.ref) -> !fir.ref subroutine dummy_arg_device(dd) real, device :: dd @@ -51,4 +55,11 @@ end subroutine ! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>> {fir.bindc_name = "dp", fir.cuda_attr = #fir.cuda}) { ! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] {cuda_attr = #fir.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMcuda_varFdummy_arg_pinnedEdp"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +subroutine dummy_arg_unified(du) + real, unified :: du +end subroutine +! CHECK-LABEL: func.func @_QMcuda_varPdummy_arg_unified( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref {fir.bindc_name = "du", fir.cuda_attr = #fir.cuda}) +! CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] {cuda_attr = #fir.cuda, uniq_name = "_QMcuda_varFdummy_arg_unifiedEdu"} : (!fir.ref) -> (!fir.ref, !fir.ref) + end module diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index 54226b8623e6a..4ebd736315bcb 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -2,6 +2,12 @@ ! Test CUDA Fortran data transfer using assignment statements. +module mod1 + type :: t1 + integer :: i + end type +end + subroutine sub1() integer, device :: m integer, device :: adev(10) @@ -55,3 +61,61 @@ end ! CHECK: %[[ASSOC:.*]]:3 = hlfir.associate %[[ELEMENTAL]](%{{.*}}) {uniq_name = ".cuf_host_tmp"} : (!hlfir.expr<10xi32>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>, i1) ! CHECK: fir.cuda_data_transfer %[[ASSOC]]#0 to %[[ADEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> ! CHECK: hlfir.end_associate %[[ASSOC]]#1, %[[ASSOC]]#2 : !fir.ref>, i1 + +subroutine sub2() + integer, device :: m + integer, device :: adev(10), bdev(10) + integer :: i, ahost(10), bhost(10) + + ahost = adev + + i = m + + ahost(1:5) = adev(1:5) + + bdev = adev + + ! Implicit data transfer of adev before evaluation. + bhost = ahost + adev + +end + +! CHECK-LABEL: func.func @_QPsub2() +! CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Eadev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Eahost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[BDEV:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Ebdev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub2Ebhost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsub2Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[M:.*]]:2 = hlfir.declare %{{.*}} {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Em"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[AHOST]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> +! CHECK: fir.cuda_data_transfer %[[M]]#0 to %[[I]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref, !fir.ref + +! CHECK: %[[DES_ADEV:.*]] = hlfir.designate %[[ADEV]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> +! CHECK: %[[DES_AHOST:.*]] = hlfir.designate %[[AHOST]]#0 (%{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} : (!fir.ref>, index, index, index, !fir.shape<1>) -> !fir.ref> +! CHECK: fir.cuda_data_transfer %[[DES_ADEV]] to %[[DES_AHOST]] {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> + +! CHECK: fir.cuda_data_transfer %[[ADEV]]#0 to %[[BDEV]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> + +! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<10xi32> {bindc_name = ".tmp", uniq_name = ""} +! CHECK: %[[DECL_TEMP:.*]]:2 = hlfir.declare %[[TEMP]](%{{.*}}) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.heap>, !fir.heap>) +! CHECK: %[[ADEV_TEMP:.*]]:2 = hlfir.declare %21#0 {cuda_attr = #fir.cuda, uniq_name = "_QFsub2Eadev"} : (!fir.heap>) -> (!fir.heap>, !fir.heap>) +! CHECK: fir.cuda_data_transfer %[[ADEV]]#1 to %[[DECL_TEMP]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.heap> +! CHECK: %[[ELEMENTAL:.*]] = hlfir.elemental %{{.*}} unordered : (!fir.shape<1>) -> !hlfir.expr<10xi32> +! CHECK: hlfir.assign %[[ELEMENTAL]] to %[[BHOST]]#0 : !hlfir.expr<10xi32>, !fir.ref> +! CHECK: fir.freemem %[[DECL_TEMP]]#0 : !fir.heap> + +subroutine sub3() + use mod1 + type(t1), device :: t + integer :: ahost(10), bhost(10) + + bhost = ahost + t%i +end + +! CHECK-LABEL: func.func @_QPsub3() +! CHECK: %[[TMP:.*]] = fir.alloca !fir.type<_QMmod1Tt1{i:i32}> {bindc_name = ".tmp"} +! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Eahost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[BHOST:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub3Ebhost"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[T:.*]]:2 = hlfir.declare %7 {cuda_attr = #fir.cuda, uniq_name = "_QFsub3Et"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: %[[TMP_DECL:.*]]:2 = hlfir.declare %0 {uniq_name = ".tmp"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +! CHECK: fir.cuda_data_transfer %[[T]]#1 to %[[TMP_DECL]]#0 {transfer_kind = #fir.cuda_transfer} : !fir.ref>, !fir.ref> diff --git a/flang/test/Lower/Intrinsics/maskl.f90 b/flang/test/Lower/Intrinsics/maskl.f90 index fab0122f6be74..ea77df480d525 100644 --- a/flang/test/Lower/Intrinsics/maskl.f90 +++ b/flang/test/Lower/Intrinsics/maskl.f90 @@ -1,17 +1,18 @@ -! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s +! RUN: bbc -emit-fir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s ! CHECK-LABEL: maskl_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskl_test(a, b) integer :: a integer :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 32 : i32 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i32 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i32 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskl(a) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i32 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i32 - ! CHECK: %[[BITS:.*]] = arith.constant 32 : i32 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_VAL]] : i32 ! CHECK: %[[SHIFT:.*]] = arith.shli %[[C__1]], %[[LEN]] : i32 ! CHECK: %[[IS0:.*]] = arith.cmpi eq, %[[A_VAL]], %[[C__0]] : i32 @@ -20,16 +21,17 @@ subroutine maskl_test(a, b) end subroutine maskl_test ! CHECK-LABEL: maskl1_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskl1_test(a, b) integer :: a integer(kind=1) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 8 : i8 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i8 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i8 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskl(a, 1) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i8 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i8 - ! CHECK: %[[BITS:.*]] = arith.constant 8 : i8 ! CHECK: %[[A_CONV:.*]] = fir.convert %[[A_VAL]] : (i32) -> i8 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_CONV]] : i8 ! CHECK: %[[SHIFT:.*]] = arith.shli %[[C__1]], %[[LEN]] : i8 @@ -39,16 +41,17 @@ subroutine maskl1_test(a, b) end subroutine maskl1_test ! CHECK-LABEL: maskl2_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskl2_test(a, b) integer :: a integer(kind=2) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 16 : i16 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i16 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i16 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskl(a, 2) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i16 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i16 - ! CHECK: %[[BITS:.*]] = arith.constant 16 : i16 ! CHECK: %[[A_CONV:.*]] = fir.convert %[[A_VAL]] : (i32) -> i16 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_CONV]] : i16 ! CHECK: %[[SHIFT:.*]] = arith.shli %[[C__1]], %[[LEN]] : i16 @@ -58,16 +61,17 @@ subroutine maskl2_test(a, b) end subroutine maskl2_test ! CHECK-LABEL: maskl4_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskl4_test(a, b) integer :: a integer(kind=4) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 32 : i32 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i32 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i32 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskl(a, 4) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i32 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i32 - ! CHECK: %[[BITS:.*]] = arith.constant 32 : i32 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_VAL]] : i32 ! CHECK: %[[SHIFT:.*]] = arith.shli %[[C__1]], %[[LEN]] : i32 ! CHECK: %[[IS0:.*]] = arith.cmpi eq, %[[A_VAL]], %[[C__0]] : i32 @@ -76,16 +80,17 @@ subroutine maskl4_test(a, b) end subroutine maskl4_test ! CHECK-LABEL: maskl8_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskl8_test(a, b) integer :: a integer(kind=8) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 64 : i64 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i64 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i64 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskl(a, 8) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i64 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i64 - ! CHECK: %[[BITS:.*]] = arith.constant 64 : i64 ! CHECK: %[[A_CONV:.*]] = fir.convert %[[A_VAL]] : (i32) -> i64 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_CONV]] : i64 ! CHECK: %[[SHIFT:.*]] = arith.shli %[[C__1]], %[[LEN]] : i64 @@ -94,8 +99,21 @@ subroutine maskl8_test(a, b) ! CHECK: fir.store %[[RESULT]] to %[[B]] : !fir.ref end subroutine maskl8_test -! TODO: Code containing 128-bit integer literals current breaks. This is -! probably related to the issue linked below. When that is fixed, a test -! for kind=16 should be added here. -! -! https://github.com/llvm/llvm-project/issues/56446 +subroutine maskl16_test(a, b) + integer :: a + integer(16) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 128 : i128 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i128 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i128 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb + + ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref + b = maskl(a, 16) + ! CHECK: %[[A_CONV:.*]] = fir.convert %[[A_VAL]] : (i32) -> i128 + ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_CONV]] : i128 + ! CHECK: %[[SHIFT:.*]] = arith.shli %[[C__1]], %[[LEN]] : i128 + ! CHECK: %[[IS0:.*]] = arith.cmpi eq, %[[A_CONV]], %[[C__0]] : i128 + ! CHECK: %[[RESULT:.*]] = arith.select %[[IS0]], %[[C__0]], %[[SHIFT]] : i128 + ! CHECK: fir.store %[[RESULT]] to %[[B]] : !fir.ref +end subroutine diff --git a/flang/test/Lower/Intrinsics/maskr.f90 b/flang/test/Lower/Intrinsics/maskr.f90 index 85077a19541c5..8c87da2a5829c 100644 --- a/flang/test/Lower/Intrinsics/maskr.f90 +++ b/flang/test/Lower/Intrinsics/maskr.f90 @@ -1,17 +1,18 @@ -! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s +! RUN: bbc -emit-fir %s -o - | FileCheck %s +! RUN: %flang_fc1 -emit-fir %s -o - | FileCheck %s ! CHECK-LABEL: maskr_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskr_test(a, b) integer :: a integer :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 32 : i32 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i32 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i32 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskr(a) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i32 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i32 - ! CHECK: %[[BITS:.*]] = arith.constant 32 : i32 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_VAL]] : i32 ! CHECK: %[[SHIFT:.*]] = arith.shrui %[[C__1]], %[[LEN]] : i32 ! CHECK: %[[IS0:.*]] = arith.cmpi eq, %[[A_VAL]], %[[C__0]] : i32 @@ -20,16 +21,17 @@ subroutine maskr_test(a, b) end subroutine maskr_test ! CHECK-LABEL: maskr1_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskr1_test(a, b) integer :: a integer(kind=1) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 8 : i8 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i8 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i8 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskr(a, 1) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i8 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i8 - ! CHECK: %[[BITS:.*]] = arith.constant 8 : i8 ! CHECK: %[[A_CONV:.*]] = fir.convert %[[A_VAL]] : (i32) -> i8 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_CONV]] : i8 ! CHECK: %[[SHIFT:.*]] = arith.shrui %[[C__1]], %[[LEN]] : i8 @@ -39,16 +41,17 @@ subroutine maskr1_test(a, b) end subroutine maskr1_test ! CHECK-LABEL: maskr2_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskr2_test(a, b) integer :: a integer(kind=2) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 16 : i16 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i16 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i16 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskr(a, 2) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i16 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i16 - ! CHECK: %[[BITS:.*]] = arith.constant 16 : i16 ! CHECK: %[[A_CONV:.*]] = fir.convert %[[A_VAL]] : (i32) -> i16 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_CONV]] : i16 ! CHECK: %[[SHIFT:.*]] = arith.shrui %[[C__1]], %[[LEN]] : i16 @@ -58,16 +61,17 @@ subroutine maskr2_test(a, b) end subroutine maskr2_test ! CHECK-LABEL: maskr4_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskr4_test(a, b) integer :: a integer(kind=4) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 32 : i32 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i32 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i32 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskr(a, 4) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i32 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i32 - ! CHECK: %[[BITS:.*]] = arith.constant 32 : i32 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_VAL]] : i32 ! CHECK: %[[SHIFT:.*]] = arith.shrui %[[C__1]], %[[LEN]] : i32 ! CHECK: %[[IS0:.*]] = arith.cmpi eq, %[[A_VAL]], %[[C__0]] : i32 @@ -76,16 +80,17 @@ subroutine maskr4_test(a, b) end subroutine maskr4_test ! CHECK-LABEL: maskr8_test -! CHECK-SAME: %[[A:.*]]: !fir.ref{{.*}}, %[[B:.*]]: !fir.ref{{.*}} subroutine maskr8_test(a, b) integer :: a integer(kind=8) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 64 : i64 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i64 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i64 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref b = maskr(a, 8) - ! CHECK: %[[C__0:.*]] = arith.constant 0 : i64 - ! CHECK: %[[C__1:.*]] = arith.constant -1 : i64 - ! CHECK: %[[BITS:.*]] = arith.constant 64 : i64 ! CHECK: %[[A_CONV:.*]] = fir.convert %[[A_VAL]] : (i32) -> i64 ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_CONV]] : i64 ! CHECK: %[[SHIFT:.*]] = arith.shrui %[[C__1]], %[[LEN]] : i64 @@ -94,8 +99,21 @@ subroutine maskr8_test(a, b) ! CHECK: fir.store %[[RESULT]] to %[[B]] : !fir.ref end subroutine maskr8_test -! TODO: Code containing 128-bit integer literals current breaks. This is -! probably related to the issue linked below. When that is fixed, a test -! for kind=16 should be added here. -! -! https://github.com/llvm/llvm-project/issues/56446 +subroutine maskr16_test(a, b) + integer :: a + integer(16) :: b + ! CHECK-DAG: %[[BITS:.*]] = arith.constant 128 : i128 + ! CHECK-DAG: %[[C__1:.*]] = arith.constant -1 : i128 + ! CHECK-DAG: %[[C__0:.*]] = arith.constant 0 : i128 + ! CHECK: %[[A:.*]] = fir.declare %{{.*}}Ea + ! CHECK: %[[B:.*]] = fir.declare %{{.*}}Eb + + ! CHECK: %[[A_VAL:.*]] = fir.load %[[A]] : !fir.ref + b = maskr(a, 16) + ! CHECK: %[[A_CONV:.*]] = fir.convert %[[A_VAL]] : (i32) -> i128 + ! CHECK: %[[LEN:.*]] = arith.subi %[[BITS]], %[[A_CONV]] : i128 + ! CHECK: %[[SHIFT:.*]] = arith.shrui %[[C__1]], %[[LEN]] : i128 + ! CHECK: %[[IS0:.*]] = arith.cmpi eq, %[[A_CONV]], %[[C__0]] : i128 + ! CHECK: %[[RESULT:.*]] = arith.select %[[IS0]], %[[C__0]], %[[SHIFT]] : i128 + ! CHECK: fir.store %[[RESULT]] to %[[B]] : !fir.ref +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-reduction-complex-mul.f90 b/flang/test/Lower/OpenMP/parallel-reduction-complex-mul.f90 new file mode 100644 index 0000000000000..376defb823581 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-reduction-complex-mul.f90 @@ -0,0 +1,50 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK-LABEL: omp.declare_reduction +!CHECK-SAME: @[[RED_NAME:.*]] : !fir.complex<8> init { +!CHECK: ^bb0(%{{.*}}: !fir.complex<8>): +!CHECK: %[[C0_1:.*]] = arith.constant 1.000000e+00 : f64 +!CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f64 +!CHECK: %[[UNDEF:.*]] = fir.undefined !fir.complex<8> +!CHECK: %[[RES_1:.*]] = fir.insert_value %[[UNDEF]], %[[C0_1]], [0 : index] +!CHECK: %[[RES_2:.*]] = fir.insert_value %[[RES_1]], %[[C0_2]], [1 : index] +!CHECK: omp.yield(%[[RES_2]] : !fir.complex<8>) +!CHECK: } combiner { +!CHECK: ^bb0(%[[ARG0:.*]]: !fir.complex<8>, %[[ARG1:.*]]: !fir.complex<8>): +!CHECK: %[[RES:.*]] = fir.mulc %[[ARG0]], %[[ARG1]] {{.*}}: !fir.complex<8> +!CHECK: omp.yield(%[[RES]] : !fir.complex<8>) +!CHECK: } + +!CHECK-LABEL: func.func @_QPsimple_complex_mul +!CHECK: %[[CREF:.*]] = fir.alloca !fir.complex<8> {bindc_name = "c", {{.*}}} +!CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %[[CREF]] {uniq_name = "_QFsimple_complex_mulEc"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[C_START_RE:.*]] = arith.constant 0.000000e+00 : f64 +!CHECK: %[[C_START_IM:.*]] = arith.constant 0.000000e+00 : f64 +!CHECK: %[[UNDEF_1:.*]] = fir.undefined !fir.complex<8> +!CHECK: %[[VAL_1:.*]] = fir.insert_value %[[UNDEF_1]], %[[C_START_RE]], [0 : index] +!CHECK: %[[VAL_2:.*]] = fir.insert_value %[[VAL_1]], %[[C_START_IM]], [1 : index] +!CHECK: hlfir.assign %[[VAL_2]] to %[[C_DECL]]#0 : !fir.complex<8>, !fir.ref> +!CHECK: omp.parallel reduction(@[[RED_NAME]] %[[C_DECL]]#0 -> %[[PRV:.+]] : !fir.ref>) { +!CHECK: %[[P_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[LPRV:.+]] = fir.load %[[P_DECL]]#0 : !fir.ref> +!CHECK: %[[C_INCR_RE:.*]] = arith.constant 1.000000e+00 : f64 +!CHECK: %[[C_INCR_IM:.*]] = arith.constant -2.000000e+00 : f64 +!CHECK: %[[UNDEF_2:.*]] = fir.undefined !fir.complex<8> +!CHECK: %[[INCR_1:.*]] = fir.insert_value %[[UNDEF_2]], %[[C_INCR_RE]], [0 : index] +!CHECK: %[[INCR_2:.*]] = fir.insert_value %[[INCR_1]], %[[C_INCR_IM]], [1 : index] +!CHECK: %[[RES:.+]] = fir.mulc %[[LPRV]], %[[INCR_2]] {{.*}} : !fir.complex<8> +!CHECK: hlfir.assign %[[RES]] to %[[P_DECL]]#0 : !fir.complex<8>, !fir.ref> +!CHECK: omp.terminator +!CHECK: } +!CHECK: return +subroutine simple_complex_mul + complex(8) :: c + c = 0 + + !$omp parallel reduction(*:c) + c = c * cmplx(1, -2) + !$omp end parallel + + print *, c +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-reduction-complex.f90 b/flang/test/Lower/OpenMP/parallel-reduction-complex.f90 new file mode 100644 index 0000000000000..bc5a6b475e256 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-reduction-complex.f90 @@ -0,0 +1,50 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +!CHECK-LABEL: omp.declare_reduction +!CHECK-SAME: @[[RED_NAME:.*]] : !fir.complex<8> init { +!CHECK: ^bb0(%{{.*}}: !fir.complex<8>): +!CHECK: %[[C0_1:.*]] = arith.constant 0.000000e+00 : f64 +!CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f64 +!CHECK: %[[UNDEF:.*]] = fir.undefined !fir.complex<8> +!CHECK: %[[RES_1:.*]] = fir.insert_value %[[UNDEF]], %[[C0_1]], [0 : index] +!CHECK: %[[RES_2:.*]] = fir.insert_value %[[RES_1]], %[[C0_2]], [1 : index] +!CHECK: omp.yield(%[[RES_2]] : !fir.complex<8>) +!CHECK: } combiner { +!CHECK: ^bb0(%[[ARG0:.*]]: !fir.complex<8>, %[[ARG1:.*]]: !fir.complex<8>): +!CHECK: %[[RES:.*]] = fir.addc %[[ARG0]], %[[ARG1]] {{.*}}: !fir.complex<8> +!CHECK: omp.yield(%[[RES]] : !fir.complex<8>) +!CHECK: } + +!CHECK-LABEL: func.func @_QPsimple_complex_add +!CHECK: %[[CREF:.*]] = fir.alloca !fir.complex<8> {bindc_name = "c", {{.*}}} +!CHECK: %[[C_DECL:.*]]:2 = hlfir.declare %[[CREF]] {uniq_name = "_QFsimple_complex_addEc"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[C_START_RE:.*]] = arith.constant 0.000000e+00 : f64 +!CHECK: %[[C_START_IM:.*]] = arith.constant 0.000000e+00 : f64 +!CHECK: %[[UNDEF_1:.*]] = fir.undefined !fir.complex<8> +!CHECK: %[[VAL_1:.*]] = fir.insert_value %[[UNDEF_1]], %[[C_START_RE]], [0 : index] +!CHECK: %[[VAL_2:.*]] = fir.insert_value %[[VAL_1]], %[[C_START_IM]], [1 : index] +!CHECK: hlfir.assign %[[VAL_2]] to %[[C_DECL]]#0 : !fir.complex<8>, !fir.ref> +!CHECK: omp.parallel reduction(@[[RED_NAME]] %[[C_DECL]]#0 -> %[[PRV:.+]] : !fir.ref>) { +!CHECK: %[[P_DECL:.+]]:2 = hlfir.declare %[[PRV]] {{.*}} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) +!CHECK: %[[LPRV:.+]] = fir.load %[[P_DECL]]#0 : !fir.ref> +!CHECK: %[[C_INCR_RE:.*]] = arith.constant 1.000000e+00 : f64 +!CHECK: %[[C_INCR_IM:.*]] = arith.constant 0.000000e+00 : f64 +!CHECK: %[[UNDEF_2:.*]] = fir.undefined !fir.complex<8> +!CHECK: %[[INCR_1:.*]] = fir.insert_value %[[UNDEF_2]], %[[C_INCR_RE]], [0 : index] +!CHECK: %[[INCR_2:.*]] = fir.insert_value %[[INCR_1]], %[[C_INCR_IM]], [1 : index] +!CHECK: %[[RES:.+]] = fir.addc %[[LPRV]], %[[INCR_2]] {{.*}} : !fir.complex<8> +!CHECK: hlfir.assign %[[RES]] to %[[P_DECL]]#0 : !fir.complex<8>, !fir.ref> +!CHECK: omp.terminator +!CHECK: } +!CHECK: return +subroutine simple_complex_add + complex(8) :: c + c = 0 + + !$omp parallel reduction(+:c) + c = c + 1 + !$omp end parallel + + print *, c +end subroutine diff --git a/flang/test/Lower/allocate-source-allocatables-2.f90 b/flang/test/Lower/allocate-source-allocatables-2.f90 new file mode 100644 index 0000000000000..39b9f04a5f67a --- /dev/null +++ b/flang/test/Lower/allocate-source-allocatables-2.f90 @@ -0,0 +1,49 @@ +! RUN: bbc -emit-hlfir %s -o - | FileCheck %s +! Test lowering of extension of SOURCE allocation (non deferred length +! of character allocate-object need not to match the SOURCE length, truncation +! and padding are performed instead as in assignments). + +subroutine test() +! CHECK-LABEL: func.func @_QPtest() { +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %{{.*}} {{.*}}Ec_deferred +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %{{.*}} typeparams %[[VAL_6:.*]] {{.*}}Ec_longer +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %{{.*}} typeparams %[[VAL_11:.*]] {{.*}}Ec_shorter +! CHECK: %[[VAL_17:.*]]:2 = hlfir.declare %{{.*}} typeparams %[[VAL_16:.*]] {{{.*}}Ec_source + character(5) :: c_source = "hello" + character(2), allocatable :: c_shorter + character(:), allocatable :: c_deferred + character(7), allocatable :: c_longer +! CHECK: %[[VAL_18:.*]] = arith.constant false +! CHECK: %[[VAL_22:.*]] = fir.embox %[[VAL_17]]#1 : (!fir.ref>) -> !fir.box> + +! CHECK: %[[VAL_23:.*]] = fir.convert %[[VAL_14]]#1 : (!fir.ref>>>) -> !fir.ref> +! CHECK: %[[VAL_24:.*]] = fir.convert %[[VAL_22]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_26:.*]] = fir.call @_FortranAAllocatableAllocateSource(%[[VAL_23]], %[[VAL_24]], %[[VAL_18]] + +! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.ref>>>) -> !fir.ref> +! CHECK: %[[VAL_28:.*]] = fir.convert %[[VAL_16]] : (index) -> i64 +! CHECK: %[[VAL_29:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_30:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_31:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_32:.*]] = fir.call @_FortranAAllocatableInitCharacterForAllocate(%[[VAL_27]], %[[VAL_28]], %[[VAL_29]], %[[VAL_30]], %[[VAL_31]] +! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.ref>>>) -> !fir.ref> +! CHECK: %[[VAL_34:.*]] = fir.convert %[[VAL_22]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_36:.*]] = fir.call @_FortranAAllocatableAllocateSource(%[[VAL_33]], %[[VAL_34]], %[[VAL_18]], + +! CHECK-NOT: AllocatableInitCharacterForAllocate +! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_9]]#1 : (!fir.ref>>>) -> !fir.ref> +! CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_22]] : (!fir.box>) -> !fir.box +! CHECK: %[[VAL_40:.*]] = fir.call @_FortranAAllocatableAllocateSource(%[[VAL_37]], %[[VAL_38]], %[[VAL_18]], + allocate(c_shorter, c_deferred, c_longer, source=c_source) + +! Expect at runtime: +! ZZheZZ +! ZZhelloZZ +! ZZhello ZZ + write(*,"('ZZ',A,'ZZ')") c_shorter + write(*,"('ZZ',A,'ZZ')") c_deferred + write(*,"('ZZ',A,'ZZ')") c_longer +end subroutine + + call test() +end diff --git a/flang/test/Lower/stop-statement.f90 b/flang/test/Lower/stop-statement.f90 index bc94a7ee23a61..cf0665cf5dbd1 100644 --- a/flang/test/Lower/stop-statement.f90 +++ b/flang/test/Lower/stop-statement.f90 @@ -21,10 +21,10 @@ subroutine stop_code() ! CHECK-LABEL: stop_error subroutine stop_error() error stop - ! CHECK-DAG: %[[c0:.*]] = arith.constant 0 : i32 + ! CHECK-DAG: %[[c_1:.*]] = arith.constant 1 : i32 ! CHECK-DAG: %[[true:.*]] = arith.constant true ! CHECK-DAG: %[[false:.*]] = arith.constant false - ! CHECK: fir.call @_Fortran{{.*}}StopStatement(%[[c0]], %[[true]], %[[false]]) + ! CHECK: fir.call @_Fortran{{.*}}StopStatement(%[[c_1]], %[[true]], %[[false]]) ! CHECK-NEXT: fir.unreachable end subroutine diff --git a/flang/test/Parser/compiler-directives.f90 b/flang/test/Parser/compiler-directives.f90 index 67e8d5b292aa0..d4c99ae12f14e 100644 --- a/flang/test/Parser/compiler-directives.f90 +++ b/flang/test/Parser/compiler-directives.f90 @@ -23,4 +23,8 @@ module m !dir$ optimize : 1 !dir$ loop count (10000) !dir$ loop count (1, 500, 5000, 10000) + type stuff + real(8), allocatable :: d(:) + !dir$ align : 1024 :: d + end type stuff end diff --git a/flang/test/Semantics/cuf03.cuf b/flang/test/Semantics/cuf03.cuf index 41bfbb7678136..574add9faaade 100644 --- a/flang/test/Semantics/cuf03.cuf +++ b/flang/test/Semantics/cuf03.cuf @@ -1,6 +1,12 @@ ! RUN: %python %S/test_errors.py %s %flang_fc1 ! Exercise CUDA data attribute checks module m + type :: t1 + integer :: i + end type + type :: t2 + real, unified :: r(10) ! ok + end type real, constant :: mc ! ok real, constant :: mci = 1. ! ok !ERROR: Object 'mcl' with ATTRIBUTES(CONSTANT) may not be allocatable, pointer, or target @@ -48,14 +54,20 @@ module m real, texture, pointer :: mt !ERROR: 'bigint' has intrinsic type 'INTEGER(16)' that is not available on the device integer(16), device :: bigint + !ERROR: Object 'um' with ATTRIBUTES(UNIFIED) must be declared in a host subprogram + real, unified :: um + contains attributes(device) subroutine devsubr(n,da) integer, intent(in) :: n - real, device :: da(*) ! ok + !ERROR: Object 'da' with ATTRIBUTES(DEVICE) may not be assumed size + real, device :: da(*) real, managed :: ma(n) ! ok !WARNING: Pointer 'dp' may not be associated in a device subprogram real, device, pointer :: dp real, constant :: rc ! ok + !ERROR: Object 'u' with ATTRIBUTES(UNIFIED) must be declared in a host subprogram + real, unified :: u end subroutine subroutine host() @@ -69,4 +81,10 @@ module m rs = 1 ! ok end subroutine + subroutine host2() + real, unified :: ru ! ok + type(t1), unified :: tu ! ok + type(t2) :: t ! ok + end subroutine + end module diff --git a/flang/test/Semantics/cuf09.cuf b/flang/test/Semantics/cuf09.cuf index 4bc93132044fd..d2d4d239815e4 100644 --- a/flang/test/Semantics/cuf09.cuf +++ b/flang/test/Semantics/cuf09.cuf @@ -7,6 +7,14 @@ module m do k=1,10 end do end + attributes(device) subroutine devsub2 + real, device :: x(10) + print*,'from device' + print '(f10.5)', (x(ivar), ivar = 1, 10) + write(*,*), "Hello world from device!" + !WARNING: I/O statement might not be supported on device + write(12,'(10F4.1)'), x + end end program main diff --git a/flang/test/Semantics/cuf11.cuf b/flang/test/Semantics/cuf11.cuf new file mode 100644 index 0000000000000..96108e2b24556 --- /dev/null +++ b/flang/test/Semantics/cuf11.cuf @@ -0,0 +1,12 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +subroutine sub1() + real, device :: adev(10), bdev(10) + real :: ahost(10) + +!ERROR: More than one reference to a CUDA object on the right hand side of the assigment + ahost = adev + bdev + + ahost = adev + adev + +end subroutine diff --git a/flang/test/Semantics/intrinsics04.f90 b/flang/test/Semantics/intrinsics04.f90 new file mode 100644 index 0000000000000..a7d646e5c016e --- /dev/null +++ b/flang/test/Semantics/intrinsics04.f90 @@ -0,0 +1,25 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! A potentially absent actual argument cannot require data type conversion. +subroutine s(o,a,p) + integer(2), intent(in), optional :: o + integer(2), intent(in), allocatable :: a + integer(2), intent(in), pointer :: p + !ERROR: An actual argument to MAX/MIN requiring data conversion may not be OPTIONAL, POINTER, or ALLOCATABLE + print *, max(1, 2, o) + !ERROR: An actual argument to MAX/MIN requiring data conversion may not be OPTIONAL, POINTER, or ALLOCATABLE + print *, max(1, 2, a) + !ERROR: An actual argument to MAX/MIN requiring data conversion may not be OPTIONAL, POINTER, or ALLOCATABLE + print *, max(1, 2, p) + !ERROR: An actual argument to MAX/MIN requiring data conversion may not be OPTIONAL, POINTER, or ALLOCATABLE + print *, min(1, 2, o) + !ERROR: An actual argument to MAX/MIN requiring data conversion may not be OPTIONAL, POINTER, or ALLOCATABLE + print *, min(1, 2, a) + !ERROR: An actual argument to MAX/MIN requiring data conversion may not be OPTIONAL, POINTER, or ALLOCATABLE + print *, min(1, 2, p) + print *, max(1_2, 2_2, o) ! ok + print *, max(1_2, 2_2, a) ! ok + print *, max(1_2, 2_2, p) ! ok + print *, min(1_2, 2_2, o) ! ok + print *, min(1_2, 2_2, a) ! ok + print *, min(1_2, 2_2, p) ! ok +end diff --git a/flang/test/Semantics/numeric_storage_size.f90 b/flang/test/Semantics/numeric_storage_size.f90 new file mode 100644 index 0000000000000..720297c0feb30 --- /dev/null +++ b/flang/test/Semantics/numeric_storage_size.f90 @@ -0,0 +1,40 @@ +! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s --check-prefix=CHECK +! RUN: %flang_fc1 -fdebug-unparse -fdefault-integer-8 %s 2>&1 | FileCheck %s --check-prefix=CHECK-I8 +! RUN: %flang_fc1 -fdebug-unparse %s -fdefault-real-8 2>&1 | FileCheck %s --check-prefix=CHECK-R8 +! RUN: %flang_fc1 -fdebug-unparse %s -fdefault-integer-8 -fdefault-real-8 2>&1 | FileCheck %s --check-prefix=CHECK-I8-R8 + +use iso_fortran_env + +!CHECK-NOT: warning +!CHECK: nss = 32_4 +!CHECK-I8: warning: NUMERIC_STORAGE_SIZE from ISO_FORTRAN_ENV is not well-defined when default INTEGER and REAL are not consistent due to compiler options +!CHECK-I8: nss = 32_4 +!CHECK-R8: warning: NUMERIC_STORAGE_SIZE from ISO_FORTRAN_ENV is not well-defined when default INTEGER and REAL are not consistent due to compiler options +!CHECK-R8: nss = 32_4 +!CHECK-I8-R8: nss = 64_4 +integer, parameter :: nss = numeric_storage_size + +!CHECK: iss = 32_4 +!CHECK-I8: iss = 64_8 +!CHECK-R8: iss = 32_4 +!CHECK-I8-R8: iss = 64_8 +integer, parameter :: iss = storage_size(1) + +!CHECK: rss = 32_4 +!CHECK-I8: rss = 32_8 +!CHECK-R8: rss = 64_4 +!CHECK-I8-R8: rss = 64_8 +integer, parameter :: rss = storage_size(1.) + +!CHECK: zss = 64_4 +!CHECK-I8: zss = 64_8 +!CHECK-R8: zss = 128_4 +!CHECK-I8-R8: zss = 128_8 +integer, parameter :: zss = storage_size((1.,0.)) + +!CHECK: lss = 32_4 +!CHECK-I8: lss = 64_8 +!CHECK-R8: lss = 32_4 +!CHECK-I8-R8: lss = 64_8 +integer, parameter :: lss = storage_size(.true.) +end diff --git a/flang/test/Semantics/resolve21.f90 b/flang/test/Semantics/resolve21.f90 index 3be7602b539d2..76f83d554fc27 100644 --- a/flang/test/Semantics/resolve21.f90 +++ b/flang/test/Semantics/resolve21.f90 @@ -16,15 +16,15 @@ subroutine s1 external :: w !ERROR: 'z' is not an object of derived type; it is implicitly typed i = z%i - !ERROR: 's1' is an invalid base for a component reference + !ERROR: 's1' is not an object and may not be used as the base of a component reference or type parameter inquiry i = s1%i !ERROR: 'j' is not an object of derived type i = j%i !ERROR: Component 'j' not found in derived type 't' i = x%j - !ERROR: 'v' is an invalid base for a component reference + !ERROR: 'v' is not an object and may not be used as the base of a component reference or type parameter inquiry i = v%i - !ERROR: 'w' is an invalid base for a component reference + !ERROR: 'w' is not an object and may not be used as the base of a component reference or type parameter inquiry i = w%i i = x%i !OK end subroutine diff --git a/flang/test/Semantics/resolve29.f90 b/flang/test/Semantics/resolve29.f90 index 3e6a8a0ba6976..c6a9b036c5828 100644 --- a/flang/test/Semantics/resolve29.f90 +++ b/flang/test/Semantics/resolve29.f90 @@ -3,6 +3,7 @@ module m1 type t1 end type type t3 + integer t3c end type interface subroutine s1(x) @@ -64,6 +65,17 @@ subroutine s9() end type type(t2) x end + subroutine s10() + !Forward shadowing derived type in IMPLICIT + !(supported by all other compilers) + implicit type(t1) (c) ! forward shadow + implicit type(t3) (d) ! host associated + type t1 + integer a + end type + c%a = 1 + d%t3c = 2 + end end module module m2 integer, parameter :: ck = kind('a') diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 3a31f4df1607a..e266055a4bf01 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -17,8 +17,6 @@ set(MODULES "ieee_features" "iso_c_binding" "iso_fortran_env" - "__fortran_builtins" - "__fortran_type_info" ) # Create module files directly from the top-level module source directory. @@ -27,22 +25,20 @@ set(MODULES # can't be used for generating module files. if (NOT CMAKE_CROSSCOMPILING) foreach(filename ${MODULES}) - set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) - if(${filename} STREQUAL "__fortran_builtins") - set(depends "") - elseif(${filename} STREQUAL "__ppc_types") - set(depends "") + set(depends "") + if(${filename} STREQUAL "__fortran_builtins" OR + ${filename} STREQUAL "__ppc_types") elseif(${filename} STREQUAL "__ppc_intrinsics" OR ${filename} STREQUAL "mma") set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) else() set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) if(NOT ${filename} STREQUAL "__fortran_type_info") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) + set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_type_info.mod) endif() if(${filename} STREQUAL "ieee_arithmetic" OR ${filename} STREQUAL "ieee_exceptions") - set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) + set(depends ${depends} ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_ieee_exceptions.mod) endif() endif() @@ -58,6 +54,7 @@ if (NOT CMAKE_CROSSCOMPILING) endif() endif() + set(base ${FLANG_INTRINSIC_MODULES_DIR}/${filename}) # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support add_custom_command(OUTPUT ${base}.mod COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index a0d79858a896a..175efd89d67e6 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -381,6 +381,7 @@ endforeach() add_subdirectory(include) add_subdirectory(config) +add_subdirectory(hdr) add_subdirectory(src) add_subdirectory(utils) diff --git a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake index bbaeb9f0dc053..45334a54431ef 100644 --- a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake +++ b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake @@ -2,7 +2,9 @@ set(LLVM_LIBC_MPFR_INSTALL_PATH "" CACHE PATH "Path to where MPFR is installed ( if(LLVM_LIBC_MPFR_INSTALL_PATH) set(LIBC_TESTS_CAN_USE_MPFR TRUE) -elseif(LIBC_TARGET_OS_IS_GPU) +elseif(LIBC_TARGET_OS_IS_GPU OR LLVM_LIBC_FULL_BUILD) + # In full build mode, the MPFR library should be built using our own facilities, + # which is currently not possible. set(LIBC_TESTS_CAN_USE_MPFR FALSE) else() try_compile( diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 40a1cfda060e6..5b3a10d55fed3 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -43,6 +43,7 @@ function(_get_common_compile_options output_var flags) list(APPEND compile_options "-fpie") if(LLVM_LIBC_FULL_BUILD) + list(APPEND compile_options "-DLIBC_FULL_BUILD") # Only add -ffreestanding flag in full build mode. list(APPEND compile_options "-ffreestanding") endif() @@ -126,6 +127,7 @@ function(_get_common_test_compile_options output_var c_test flags) list(APPEND compile_options "-fpie") if(LLVM_LIBC_FULL_BUILD) + list(APPEND compile_options "-DLIBC_FULL_BUILD") # Only add -ffreestanding flag in full build mode. list(APPEND compile_options "-ffreestanding") list(APPEND compile_options "-fno-exceptions") @@ -178,5 +180,10 @@ function(_get_hermetic_test_compile_options output_var flags) -Wno-multi-gpu --cuda-path=${LIBC_CUDA_ROOT} -nogpulib -march=${LIBC_GPU_TARGET_ARCHITECTURE} -fno-use-cxa-atexit) endif() + + if(LLVM_LIBC_FULL_BUILD) + list(APPEND compile_options "-DLIBC_FULL_BUILD") + endif() + set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 9e21f5c20d920..f33f9430c7920 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -196,6 +196,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.fenv.fegetround libc.src.fenv.feholdexcept libc.src.fenv.fesetenv + libc.src.fenv.fesetexcept libc.src.fenv.fesetexceptflag libc.src.fenv.fesetround libc.src.fenv.feraiseexcept diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 7664937da0f6e..dad187fa0496d 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -196,6 +196,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.fenv.fegetround libc.src.fenv.feholdexcept libc.src.fenv.fesetenv + libc.src.fenv.fesetexcept libc.src.fenv.fesetexceptflag libc.src.fenv.fesetround libc.src.fenv.feraiseexcept diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt index 6b89ce55d72b6..aea2f6d5771e8 100644 --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -107,6 +107,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.fenv.fegetround libc.src.fenv.feholdexcept libc.src.fenv.fesetenv + libc.src.fenv.fesetexcept libc.src.fenv.fesetexceptflag libc.src.fenv.fesetround libc.src.fenv.feraiseexcept diff --git a/libc/config/darwin/x86_64/entrypoints.txt b/libc/config/darwin/x86_64/entrypoints.txt index 5a1a6a15ef30c..09fe3d7b47687 100644 --- a/libc/config/darwin/x86_64/entrypoints.txt +++ b/libc/config/darwin/x86_64/entrypoints.txt @@ -101,6 +101,7 @@ set(TARGET_LIBM_ENTRYPOINTS # libc.src.fenv.fegetround # libc.src.fenv.feholdexcept # libc.src.fenv.fesetenv + # libc.src.fenv.fesetexcept # libc.src.fenv.fesetexceptflag # libc.src.fenv.fesetround # libc.src.fenv.feraiseexcept diff --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td index adaf5bfd747ac..523ad49ffa3fd 100644 --- a/libc/config/gpu/api.td +++ b/libc/config/gpu/api.td @@ -64,7 +64,11 @@ def StdIOAPI : PublicAPI<"stdio.h"> { SimpleMacroDef<"_IOLBF", "1">, SimpleMacroDef<"_IONBF", "2">, ]; - let Types = ["size_t", "FILE"]; + let Types = [ + "FILE", + "off_t", + "size_t", + ]; } def IntTypesAPI : PublicAPI<"inttypes.h"> { diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 8bf99459f7898..f5f5c437685a2 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -319,6 +319,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.fenv.fegetround libc.src.fenv.feholdexcept libc.src.fenv.fesetenv + libc.src.fenv.fesetexcept libc.src.fenv.fesetexceptflag libc.src.fenv.fesetround libc.src.fenv.feraiseexcept diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index eb5ed8089850e..9964971f191b7 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -49,7 +49,10 @@ def CTypeAPI : PublicAPI<"ctype.h"> { } def FCntlAPI : PublicAPI<"fcntl.h"> { - let Types = ["mode_t"]; + let Types = [ + "mode_t", + "off_t", + ]; } def IntTypesAPI : PublicAPI<"inttypes.h"> { @@ -77,7 +80,12 @@ def StdIOAPI : PublicAPI<"stdio.h"> { SimpleMacroDef<"_IOLBF", "1">, SimpleMacroDef<"_IONBF", "2">, ]; - let Types = ["size_t", "FILE", "cookie_io_functions_t"]; + let Types = [ + "FILE", + "cookie_io_functions_t", + "off_t", + "size_t", + ]; } def StdlibAPI : PublicAPI<"stdlib.h"> { diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index 04baa4c1cf93a..fca50735d320b 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -187,6 +187,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.fenv.fegetround libc.src.fenv.feholdexcept libc.src.fenv.fesetenv + libc.src.fenv.fesetexcept libc.src.fenv.fesetexceptflag libc.src.fenv.fesetround libc.src.fenv.feraiseexcept diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 4dc5e9d33f0f8..71289789158f4 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -327,6 +327,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.fenv.fegetround libc.src.fenv.feholdexcept libc.src.fenv.fesetenv + libc.src.fenv.fesetexcept libc.src.fenv.fesetexceptflag libc.src.fenv.fesetround libc.src.fenv.feraiseexcept diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 2742c33ae4784..6bb53cb76220f 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -337,6 +337,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.fenv.fegetround libc.src.fenv.feholdexcept libc.src.fenv.fesetenv + libc.src.fenv.fesetexcept libc.src.fenv.fesetexceptflag libc.src.fenv.fesetround libc.src.fenv.feraiseexcept @@ -494,6 +495,9 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.round libc.src.math.roundf libc.src.math.roundl + libc.src.math.roundeven + libc.src.math.roundevenf + libc.src.math.roundevenl libc.src.math.scalbn libc.src.math.scalbnf libc.src.math.scalbnl @@ -555,6 +559,7 @@ if(LIBC_TYPES_HAS_FLOAT128) libc.src.math.nextdownf128 libc.src.math.nextupf128 libc.src.math.rintf128 + libc.src.math.roundevenf128 libc.src.math.roundf128 libc.src.math.sqrtf128 libc.src.math.truncf128 diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index c38125a646227..c46c947bf3135 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -105,6 +105,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.fenv.fegetround libc.src.fenv.feholdexcept libc.src.fenv.fesetenv + libc.src.fenv.fesetexcept libc.src.fenv.fesetexceptflag libc.src.fenv.fesetround libc.src.fenv.feraiseexcept diff --git a/libc/docs/c23.rst b/libc/docs/c23.rst index 3f64722bc8e64..4138c9d7104f3 100644 --- a/libc/docs/c23.rst +++ b/libc/docs/c23.rst @@ -20,7 +20,7 @@ Additions: * fenv.h - * fesetexcept + * fesetexcept |check| * fetestexceptflag * fegetmode * fesetmode @@ -54,7 +54,7 @@ Additions: * pown* * powr* * rootn* - * roundeven* + * roundeven* |check| * fromfp* * ufromfp* * fromfpx* diff --git a/libc/docs/ctype.rst b/libc/docs/ctype.rst new file mode 100644 index 0000000000000..7d77dadccc9ba --- /dev/null +++ b/libc/docs/ctype.rst @@ -0,0 +1,55 @@ +.. include:: check.rst + +ctype.h Functions +================= + +.. list-table:: + :widths: auto + :align: center + :header-rows: 1 + + * - Function + - Implemented + - Standard + * - isalnum + - |check| + - 7.4.1.1 + * - isalpha + - |check| + - 7.4.1.2 + * - isblank + - |check| + - 7.4.1.3 + * - iscntrl + - |check| + - 7.4.1.4 + * - isdigit + - |check| + - 7.4.1.5 + * - isgraph + - |check| + - 7.4.1.6 + * - islower + - |check| + - 7.4.1.7 + * - isprint + - |check| + - 7.4.1.8 + * - ispunct + - |check| + - 7.4.1.9 + * - isspace + - |check| + - 7.4.1.10 + * - isupper + - |check| + - 7.4.1.11 + * - isxdigit + - |check| + - 7.4.1.12 + * - tolower + - |check| + - 7.4.2.1 + * - toupper + - |check| + - 7.4.2.2 diff --git a/libc/docs/fenv.rst b/libc/docs/fenv.rst new file mode 100644 index 0000000000000..6574fb7246ddd --- /dev/null +++ b/libc/docs/fenv.rst @@ -0,0 +1,64 @@ +.. include:: check.rst + +fenv.h Functions +================ + +.. list-table:: + :widths: auto + :align: center + :header-rows: 1 + + * - Function + - Implemented + - Standard + * - fe_dec_getround + - + - 7.6.5.3 + * - fe_dec_setround + - + - 7.6.5.6 + * - feclearexcept + - |check| + - 7.6.4.1 + * - fegetenv + - |check| + - 7.6.6.1 + * - fegetexceptflag + - |check| + - 7.6.4.2 + * - fegetmode + - + - 7.6.5.1 + * - fegetround + - |check| + - 7.6.5.2 + * - feholdexcept + - |check| + - 7.6.6.2 + * - feraiseexcept + - |check| + - 7.6.4.3 + * - fesetenv + - |check| + - 7.6.6.3 + * - fesetexcept + - + - 7.6.4.4 + * - fesetexceptflag + - |check| + - 7.6.4.5 + * - fesetmode + - + - 7.6.5.4 + * - fesetround + - |check| + - 7.6.5.5 + * - fetestexcept + - |check| + - 7.6.4.7 + * - fetestexceptflag + - + - 7.6.4.6 + * - feupdateenv + - |check| + - 7.6.6.4 diff --git a/libc/docs/index.rst b/libc/docs/index.rst index 370fcd843974e..8470c8d9287c2 100644 --- a/libc/docs/index.rst +++ b/libc/docs/index.rst @@ -66,8 +66,10 @@ stages there is no ABI stability in any form. strings stdio stdbit + fenv libc_search c23 + ctype .. toctree:: :hidden: diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 970a43ca87c9e..7a7b6c9c8db5d 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -206,7 +206,7 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | round | |check| | |check| | |check| | | |check| | 7.12.9.6 | F.10.6.6 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| roundeven | | | | | | 7.12.9.8 | F.10.6.8 | +| roundeven | |check| | |check| | |check| | | |check| | 7.12.9.8 | F.10.6.8 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | scalbn | |check| | |check| | |check| | | | 7.12.6.19 | F.10.3.19 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt index 278e914e3fbe9..d4f6db71fdd84 100644 --- a/libc/fuzzing/__support/CMakeLists.txt +++ b/libc/fuzzing/__support/CMakeLists.txt @@ -3,5 +3,5 @@ add_libc_fuzzer( SRCS uint_fuzz.cpp DEPENDS - libc.src.__support.uint + libc.src.__support.big_int ) diff --git a/libc/fuzzing/__support/uint_fuzz.cpp b/libc/fuzzing/__support/uint_fuzz.cpp index f48f00d3b4ba1..07149f511b838 100644 --- a/libc/fuzzing/__support/uint_fuzz.cpp +++ b/libc/fuzzing/__support/uint_fuzz.cpp @@ -1,5 +1,5 @@ #include "src/__support/CPP/bit.h" -#include "src/__support/UInt.h" +#include "src/__support/big_int.h" #include "src/string/memory_utils/inline_memcpy.h" using namespace LIBC_NAMESPACE; diff --git a/libc/fuzzing/math/CMakeLists.txt b/libc/fuzzing/math/CMakeLists.txt index 86c864083d206..6990a04922a5c 100644 --- a/libc/fuzzing/math/CMakeLists.txt +++ b/libc/fuzzing/math/CMakeLists.txt @@ -8,6 +8,7 @@ add_libc_fuzzer( SingleInputSingleOutputDiff.h TwoInputSingleOutputDiff.h DEPENDS + libc.hdr.math_macros libc.src.math.ceil libc.src.math.ceilf libc.src.math.ceill diff --git a/libc/fuzzing/math/RemQuoDiff.h b/libc/fuzzing/math/RemQuoDiff.h index 95a9866f29dbb..84a6a24ce5271 100644 --- a/libc/fuzzing/math/RemQuoDiff.h +++ b/libc/fuzzing/math/RemQuoDiff.h @@ -11,7 +11,7 @@ #include "src/__support/FPUtil/FPBits.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include #include diff --git a/libc/fuzzing/stdlib/CMakeLists.txt b/libc/fuzzing/stdlib/CMakeLists.txt index 711b0fd9820f9..204bc619318da 100644 --- a/libc/fuzzing/stdlib/CMakeLists.txt +++ b/libc/fuzzing/stdlib/CMakeLists.txt @@ -22,6 +22,7 @@ add_libc_fuzzer( SRCS strtofloat_fuzz.cpp DEPENDS + libc.hdr.math_macros libc.src.stdlib.atof libc.src.stdlib.strtof libc.src.stdlib.strtod diff --git a/libc/fuzzing/stdlib/strtofloat_fuzz.cpp b/libc/fuzzing/stdlib/strtofloat_fuzz.cpp index b000321854d16..c158162ba6238 100644 --- a/libc/fuzzing/stdlib/strtofloat_fuzz.cpp +++ b/libc/fuzzing/stdlib/strtofloat_fuzz.cpp @@ -16,7 +16,7 @@ #include "src/__support/FPUtil/FPBits.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include #include diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt new file mode 100644 index 0000000000000..4ca7db5e98d60 --- /dev/null +++ b/libc/hdr/CMakeLists.txt @@ -0,0 +1,42 @@ +function(add_proxy_header_library target_name) + cmake_parse_arguments( + "ADD_PROXY_HEADER" + "" # Optional arguments + "" # Single value arguments + "DEPENDS;FULL_BUILD_DEPENDS" # Multi-value arguments + ${ARGN} + ) + + set(deps "") + if(ADD_PROXY_HEADER_DEPENDS) + list(APPEND deps ${ADD_PROXY_HEADER_DEPENDS}) + endif() + + if(LLVM_LIBC_FULL_BUILD AND ADD_PROXY_HEADER_FULL_BUILD_DEPENDS) + list(APPEND deps ${ADD_PROXY_HEADER_FULL_BUILD_DEPENDS}) + endif() + + add_header_library( + ${target_name} + ${ADD_PROXY_HEADER_UNPARSED_ARGUMENTS} + DEPENDS ${deps} + ) +endfunction() + +add_proxy_header_library( + math_macros + HDRS + math_macros.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-macros.math_macros + libc.include.math +) + +add_proxy_header_library( + fenv_macros + HDRS + fenv_macros.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-macros.fenv_macros + libc.incude.fenv +) diff --git a/libc/hdr/fenv_macros.h b/libc/hdr/fenv_macros.h new file mode 100644 index 0000000000000..1ad28cc278a97 --- /dev/null +++ b/libc/hdr/fenv_macros.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from fenv.h ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_FENV_MACROS_H +#define LLVM_LIBC_HDR_FENV_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/fenv-macros.h" + +#else // Overlay mode + +#include + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_FENV_MACROS_H diff --git a/libc/hdr/math_macros.h b/libc/hdr/math_macros.h new file mode 100644 index 0000000000000..d13c5ff7647ad --- /dev/null +++ b/libc/hdr/math_macros.h @@ -0,0 +1,43 @@ +//===-- Definition of macros from math.h ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_MATH_MACROS_H +#define LLVM_LIBC_HDR_MATH_MACROS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-macros/math-macros.h" + +#else // Overlay mode + +#include + +// Some older math.h header does not have FP_INT_* constants yet. +#ifndef FP_INT_UPWARD +#define FP_INT_UPWARD 0 +#endif // FP_INT_UPWARD + +#ifndef FP_INT_DOWNWARD +#define FP_INT_DOWNWARD 1 +#endif // FP_INT_DOWNWARD + +#ifndef FP_INT_TOWARDZERO +#define FP_INT_TOWARDZERO 2 +#endif // FP_INT_TOWARDZERO + +#ifndef FP_INT_TONEARESTFROMZERO +#define FP_INT_TONEARESTFROMZERO 3 +#endif // FP_INT_TONEARESTFROMZERO + +#ifndef FP_INT_TONEAREST +#define FP_INT_TONEAREST 4 +#endif // FP_INT_TONEAREST + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_MATH_MACROS_H diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 4203f0bc901b2..02c7dc8fbc0b3 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -41,9 +41,10 @@ add_gen_header( DEF_FILE fcntl.h.def GEN_HDR fcntl.h DEPENDS - .llvm_libc_common_h .llvm-libc-macros.fcntl_macros .llvm-libc-types.mode_t + .llvm-libc-types.off_t + .llvm_libc_common_h ) add_gen_header( @@ -264,13 +265,14 @@ add_gen_header( DEF_FILE stdio.h.def GEN_HDR stdio.h DEPENDS - .llvm_libc_common_h .llvm-libc-macros.file_seek_macros .llvm-libc-macros.stdio_macros - .llvm-libc-types.size_t - .llvm-libc-types.ssize_t .llvm-libc-types.FILE .llvm-libc-types.cookie_io_functions_t + .llvm-libc-types.off_t + .llvm-libc-types.size_t + .llvm-libc-types.ssize_t + .llvm_libc_common_h ) add_gen_header( diff --git a/libc/include/llvm-libc-macros/math-macros.h b/libc/include/llvm-libc-macros/math-macros.h index 1497e32044e97..47838969d59ae 100644 --- a/libc/include/llvm-libc-macros/math-macros.h +++ b/libc/include/llvm-libc-macros/math-macros.h @@ -51,32 +51,9 @@ #define math_errhandling (MATH_ERRNO | MATH_ERREXCEPT) #endif -// These must be type-generic functions. The C standard specifies them as -// being macros rather than functions, in fact. However, in C++ it's important -// that there be function declarations that don't interfere with other uses of -// the identifier, even in places with parentheses where a function-like macro -// will be expanded (such as a function declaration in a C++ namespace). - -#ifdef __cplusplus - -template inline constexpr bool isfinite(T x) { - return __builtin_isfinite(x); -} - -template inline constexpr bool isinf(T x) { - return __builtin_isinf(x); -} - -template inline constexpr bool isnan(T x) { - return __builtin_isnan(x); -} - -#else - +// TODO: Move generic functional math macros to a separate header file. #define isfinite(x) __builtin_isfinite(x) #define isinf(x) __builtin_isinf(x) #define isnan(x) __builtin_isnan(x) -#endif - #endif // LLVM_LIBC_MACROS_MATH_MACROS_H diff --git a/libc/spec/posix.td b/libc/spec/posix.td index cfa8d3afedde3..45f7ecfe84e98 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -210,7 +210,10 @@ def POSIX : StandardSpec<"POSIX"> { HeaderSpec FCntl = HeaderSpec< "fcntl.h", [], // Macros - [ModeTType], + [ + ModeTType, + OffTType, + ], [], // Enumerations [ FunctionSpec< @@ -1180,7 +1183,7 @@ def POSIX : StandardSpec<"POSIX"> { HeaderSpec StdIO = HeaderSpec< "stdio.h", [], // Macros - [], // Types + [OffTType], // Types [], // Enumerations [ FunctionSpec< diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index bd62870b07c89..63d0449867114 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -179,6 +179,11 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec, ArgSpec] >, + FunctionSpec< + "fesetexcept", + RetValSpec, + [ArgSpec] + >, FunctionSpec< "fesetexceptflag", RetValSpec, diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 7b1820d9bf353..dcae55e050bf1 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -104,7 +104,7 @@ add_header_library( HDRS integer_to_string.h DEPENDS - .uint + .big_int libc.src.__support.common libc.src.__support.CPP.algorithm libc.src.__support.CPP.limits @@ -204,9 +204,9 @@ add_header_library( ) add_header_library( - uint + big_int HDRS - UInt.h + big_int.h DEPENDS .math_extras .number_pair @@ -220,9 +220,9 @@ add_header_library( add_header_library( uint128 HDRS - UInt128.h + uint128.h DEPENDS - .uint + .big_int libc.src.__support.macros.properties.types ) diff --git a/libc/src/__support/CPP/bit.h b/libc/src/__support/CPP/bit.h index 80f50fd221efa..8a8951a18bfa1 100644 --- a/libc/src/__support/CPP/bit.h +++ b/libc/src/__support/CPP/bit.h @@ -72,6 +72,14 @@ has_single_bit(T value) { /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. +// clang-19+, gcc-14+ +#if __has_builtin(__builtin_ctzg) +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countr_zero(T value) { + return __builtin_ctzg(value, cpp::numeric_limits::digits); +} +#else template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countr_zero(T value) { @@ -99,6 +107,7 @@ ADD_SPECIALIZATION(countr_zero, unsigned short, __builtin_ctzs) ADD_SPECIALIZATION(countr_zero, unsigned int, __builtin_ctz) ADD_SPECIALIZATION(countr_zero, unsigned long, __builtin_ctzl) ADD_SPECIALIZATION(countr_zero, unsigned long long, __builtin_ctzll) +#endif // __has_builtin(__builtin_ctzg) /// Count number of 0's from the most significant bit to the least /// stopping at the first 1. @@ -106,6 +115,14 @@ ADD_SPECIALIZATION(countr_zero, unsigned long long, __builtin_ctzll) /// Only unsigned integral types are allowed. /// /// Returns cpp::numeric_limits::digits on an input of 0. +// clang-19+, gcc-14+ +#if __has_builtin(__builtin_clzg) +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +countl_zero(T value) { + return __builtin_clzg(value, cpp::numeric_limits::digits); +} +#else template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countl_zero(T value) { @@ -129,6 +146,7 @@ ADD_SPECIALIZATION(countl_zero, unsigned short, __builtin_clzs) ADD_SPECIALIZATION(countl_zero, unsigned int, __builtin_clz) ADD_SPECIALIZATION(countl_zero, unsigned long, __builtin_clzl) ADD_SPECIALIZATION(countl_zero, unsigned long long, __builtin_clzll) +#endif // __has_builtin(__builtin_clzg) #undef ADD_SPECIALIZATION diff --git a/libc/src/__support/FPUtil/BasicOperations.h b/libc/src/__support/FPUtil/BasicOperations.h index 6e4156497618e..e5ac101fedc0e 100644 --- a/libc/src/__support/FPUtil/BasicOperations.h +++ b/libc/src/__support/FPUtil/BasicOperations.h @@ -14,9 +14,9 @@ #include "FEnvImpl.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/UInt128.h" #include "src/__support/common.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/uint128.h" namespace LIBC_NAMESPACE { namespace fputil { diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt index ff155a19758d2..0b5ea83689430 100644 --- a/libc/src/__support/FPUtil/CMakeLists.txt +++ b/libc/src/__support/FPUtil/CMakeLists.txt @@ -4,7 +4,8 @@ add_header_library( FEnvImpl.h DEPENDS libc.include.fenv - libc.include.math + libc.hdr.fenv_macros + libc.hdr.math_macros libc.src.__support.macros.attributes libc.src.errno.errno ) @@ -14,8 +15,7 @@ add_header_library( HDRS rounding_mode.h DEPENDS - libc.include.fenv - libc.include.math + libc.hdr.fenv_macros libc.src.__support.macros.attributes libc.src.__support.macros.properties.architectures libc.src.__support.macros.sanitizer @@ -59,9 +59,9 @@ add_header_library( .fp_bits .fenv_impl .rounding_mode + libc.hdr.math_macros libc.src.__support.CPP.type_traits libc.src.__support.common - libc.include.math libc.src.errno.errno ) @@ -201,8 +201,8 @@ add_header_library( DEPENDS .fp_bits .multiply_add + libc.src.__support.big_int libc.src.__support.common - libc.src.__support.uint libc.src.__support.macros.optimization ) @@ -216,12 +216,12 @@ add_header_library( .dyadic_float .nearest_integer_operations .normal_float + libc.hdr.math_macros libc.src.__support.CPP.bit libc.src.__support.CPP.limits libc.src.__support.CPP.type_traits libc.src.__support.common libc.src.__support.macros.optimization - libc.include.math libc.src.errno.errno ) diff --git a/libc/src/__support/FPUtil/FEnvImpl.h b/libc/src/__support/FPUtil/FEnvImpl.h index 6086d5d3de2dc..4be1a57f0f4b3 100644 --- a/libc/src/__support/FPUtil/FEnvImpl.h +++ b/libc/src/__support/FPUtil/FEnvImpl.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_FENVIMPL_H #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FENVIMPL_H -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/properties/architectures.h" #include "src/errno/libc_errno.h" diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h index 155bff2f55810..ab050360c353b 100644 --- a/libc/src/__support/FPUtil/FPBits.h +++ b/libc/src/__support/FPUtil/FPBits.h @@ -11,13 +11,13 @@ #include "src/__support/CPP/bit.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/UInt128.h" #include "src/__support/common.h" #include "src/__support/libc_assert.h" // LIBC_ASSERT #include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_FLOAT128 #include "src/__support/math_extras.h" // mask_trailing_ones #include "src/__support/sign.h" // Sign +#include "src/__support/uint128.h" #include diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h index 2e69965734644..76b1f07976213 100644 --- a/libc/src/__support/FPUtil/Hypot.h +++ b/libc/src/__support/FPUtil/Hypot.h @@ -15,8 +15,8 @@ #include "rounding_mode.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/UInt128.h" #include "src/__support/common.h" +#include "src/__support/uint128.h" namespace LIBC_NAMESPACE { namespace fputil { diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h index 2c90b4888c2e5..a289c2ef70467 100644 --- a/libc/src/__support/FPUtil/ManipulationFunctions.h +++ b/libc/src/__support/FPUtil/ManipulationFunctions.h @@ -15,7 +15,7 @@ #include "dyadic_float.h" #include "rounding_mode.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/limits.h" // INT_MAX, INT_MIN #include "src/__support/CPP/type_traits.h" diff --git a/libc/src/__support/FPUtil/NearestIntegerOperations.h b/libc/src/__support/FPUtil/NearestIntegerOperations.h index 6b28e7ffb387b..4645ab0b5350b 100644 --- a/libc/src/__support/FPUtil/NearestIntegerOperations.h +++ b/libc/src/__support/FPUtil/NearestIntegerOperations.h @@ -13,7 +13,7 @@ #include "FPBits.h" #include "rounding_mode.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/common.h" diff --git a/libc/src/__support/FPUtil/aarch64/FEnvImpl.h b/libc/src/__support/FPUtil/aarch64/FEnvImpl.h index e0eec17e038c6..4b593cdd8cc4e 100644 --- a/libc/src/__support/FPUtil/aarch64/FEnvImpl.h +++ b/libc/src/__support/FPUtil/aarch64/FEnvImpl.h @@ -20,6 +20,7 @@ #include #include +#include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FPBits.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h b/libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h index fd915373020ec..773d6bfe9f892 100644 --- a/libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h +++ b/libc/src/__support/FPUtil/aarch64/fenv_darwin_impl.h @@ -20,6 +20,7 @@ #include #include +#include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FPBits.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/__support/FPUtil/arm/FEnvImpl.h b/libc/src/__support/FPUtil/arm/FEnvImpl.h index ac4673cf20f63..ddb0edcf82780 100644 --- a/libc/src/__support/FPUtil/arm/FEnvImpl.h +++ b/libc/src/__support/FPUtil/arm/FEnvImpl.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_FENVIMPL_H #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_FENVIMPL_H +#include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/attributes.h" // For LIBC_INLINE - #include #include diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index e0c205f52383b..12a69228d36c7 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -12,7 +12,7 @@ #include "FPBits.h" #include "multiply_add.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/UInt.h" +#include "src/__support/big_int.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include @@ -122,7 +122,8 @@ template struct DyadicFloat { int exp_lo = exp_hi - static_cast(PRECISION) - 1; - MantissaType m_hi(mantissa >> shift); + MantissaType m_hi = + shift >= MantissaType::BITS ? MantissaType(0) : mantissa >> shift; T d_hi = FPBits::create_value( sign, exp_hi, @@ -130,7 +131,8 @@ template struct DyadicFloat { IMPLICIT_MASK) .get_val(); - MantissaType round_mask = MantissaType(1) << (shift - 1); + MantissaType round_mask = + shift > MantissaType::BITS ? 0 : MantissaType(1) << (shift - 1); MantissaType sticky_mask = round_mask - MantissaType(1); bool round_bit = !(mantissa & round_mask).is_zero(); diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt index 0ae62f40dc616..09eede1570962 100644 --- a/libc/src/__support/FPUtil/generic/CMakeLists.txt +++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt @@ -4,7 +4,6 @@ add_header_library( sqrt.h sqrt_80_bit_long_double.h DEPENDS - libc.include.fenv libc.src.__support.common libc.src.__support.CPP.bit libc.src.__support.CPP.type_traits @@ -41,5 +40,4 @@ add_header_library( libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization - libc.src.math.generic.math_utils ) diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index f03af9246337f..f403aa7333b39 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -14,9 +14,9 @@ #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/UInt128.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/uint128.h" namespace LIBC_NAMESPACE { namespace fputil { diff --git a/libc/src/__support/FPUtil/generic/FMod.h b/libc/src/__support/FPUtil/generic/FMod.h index 24fb264b779b7..211ab926d28b0 100644 --- a/libc/src/__support/FPUtil/generic/FMod.h +++ b/libc/src/__support/FPUtil/generic/FMod.h @@ -15,7 +15,6 @@ #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/math/generic/math_utils.h" namespace LIBC_NAMESPACE { namespace fputil { diff --git a/libc/src/__support/FPUtil/generic/sqrt.h b/libc/src/__support/FPUtil/generic/sqrt.h index b6b4aaecb2ccc..7e7600ba6502a 100644 --- a/libc/src/__support/FPUtil/generic/sqrt.h +++ b/libc/src/__support/FPUtil/generic/sqrt.h @@ -15,8 +15,8 @@ #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/UInt128.h" #include "src/__support/common.h" +#include "src/__support/uint128.h" namespace LIBC_NAMESPACE { namespace fputil { diff --git a/libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h b/libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h index 656ade4f77353..6308ffe95493e 100644 --- a/libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h +++ b/libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h @@ -13,8 +13,8 @@ #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/UInt128.h" #include "src/__support/common.h" +#include "src/__support/uint128.h" namespace LIBC_NAMESPACE { namespace fputil { diff --git a/libc/src/__support/FPUtil/riscv/FEnvImpl.h b/libc/src/__support/FPUtil/riscv/FEnvImpl.h index b73c4798b0539..a5224330f339a 100644 --- a/libc/src/__support/FPUtil/riscv/FEnvImpl.h +++ b/libc/src/__support/FPUtil/riscv/FEnvImpl.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_RISCV_FENVIMPL_H #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_RISCV_FENVIMPL_H +#include "hdr/fenv_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/attributes.h" // For LIBC_INLINE_ASM #include "src/__support/macros/config.h" // For LIBC_INLINE diff --git a/libc/src/__support/FPUtil/rounding_mode.h b/libc/src/__support/FPUtil/rounding_mode.h index 91a5b9c50e7c2..aa5e00fa560bc 100644 --- a/libc/src/__support/FPUtil/rounding_mode.h +++ b/libc/src/__support/FPUtil/rounding_mode.h @@ -9,10 +9,9 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_ROUNDING_MODE_H #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_ROUNDING_MODE_H +#include "hdr/fenv_macros.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE -#include - namespace LIBC_NAMESPACE::fputil { // Quick free-standing test whether fegetround() == FE_UPWARD. diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h index 5dcae518bb6f8..05506c04fc079 100644 --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -198,12 +198,9 @@ template struct Process { /// convergent, otherwise the compiler will sink the store and deadlock. [[clang::convergent]] LIBC_INLINE void unlock(uint64_t lane_mask, uint32_t index) { - // Do not move any writes past the unlock + // Do not move any writes past the unlock. atomic_thread_fence(cpp::MemoryOrder::RELEASE); - // Wait for other threads in the warp to finish using the lock - gpu::sync_lane(lane_mask); - // Use exactly one thread to clear the nth bit in the lock array Must // restrict to a single thread to avoid one thread dropping the lock, then // an unrelated warp claiming the lock, then a second thread in this warp @@ -331,6 +328,9 @@ template struct Port { LIBC_INLINE uint16_t get_index() const { return index; } LIBC_INLINE void close() { + // Wait for all lanes to finish using the port. + gpu::sync_lane(lane_mask); + // The server is passive, if it own the buffer when it closes we need to // give ownership back to the client. if (owns_buffer && T) diff --git a/libc/src/__support/UInt.h b/libc/src/__support/big_int.h similarity index 99% rename from libc/src/__support/UInt.h rename to libc/src/__support/big_int.h index c1e55ceef2111..e2061c4300702 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/big_int.h @@ -249,19 +249,15 @@ LIBC_INLINE constexpr bool is_negative(cpp::array &array) { enum Direction { LEFT, RIGHT }; // A bitwise shift on an array of elements. -// TODO: Make the result UB when 'offset' is greater or equal to the number of -// bits in 'array'. This will allow for better code performance. +// 'offset' must be less than TOTAL_BITS (i.e., sizeof(word) * CHAR_BIT * N) +// otherwise the behavior is undefined. template LIBC_INLINE constexpr cpp::array shift(cpp::array array, size_t offset) { static_assert(direction == LEFT || direction == RIGHT); constexpr size_t WORD_BITS = cpp::numeric_limits::digits; - constexpr size_t TOTAL_BITS = N * WORD_BITS; - if (LIBC_UNLIKELY(offset == 0)) - return array; - if (LIBC_UNLIKELY(offset >= TOTAL_BITS)) - return {}; #ifdef LIBC_TYPES_HAS_INT128 + constexpr size_t TOTAL_BITS = N * WORD_BITS; if constexpr (TOTAL_BITS == 128) { using type = cpp::conditional_t; auto tmp = cpp::bit_cast(array); @@ -272,6 +268,8 @@ LIBC_INLINE constexpr cpp::array shift(cpp::array array, return cpp::bit_cast>(tmp); } #endif + if (LIBC_UNLIKELY(offset == 0)) + return array; const bool is_neg = is_signed && is_negative(array); constexpr auto at = [](size_t index) -> int { // reverse iteration when direction == LEFT. diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 4c59cfd99c2e6..09b13324f25bb 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -15,7 +15,7 @@ #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/UInt.h" +#include "src/__support/big_int.h" #include "src/__support/common.h" #include "src/__support/libc_assert.h" #include "src/__support/macros/attributes.h" diff --git a/libc/src/__support/hash.h b/libc/src/__support/hash.h index 6b362ba831891..d1218fdc25927 100644 --- a/libc/src/__support/hash.h +++ b/libc/src/__support/hash.h @@ -11,8 +11,8 @@ #include "src/__support/CPP/bit.h" // rotl #include "src/__support/CPP/limits.h" // numeric_limits -#include "src/__support/UInt128.h" // UInt128 #include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/uint128.h" // UInt128 #include // For uint64_t namespace LIBC_NAMESPACE { diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h index e99799c3512e2..5fb67464090ce 100644 --- a/libc/src/__support/integer_literals.h +++ b/libc/src/__support/integer_literals.h @@ -14,8 +14,8 @@ #define LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H #include "src/__support/CPP/limits.h" // CHAR_BIT -#include "src/__support/UInt128.h" // UInt128 #include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/uint128.h" // UInt128 #include // size_t #include // uintxx_t diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index f72d00d1a7456..375f0e82960e3 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -67,7 +67,7 @@ #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/UInt.h" // make_integral_or_big_int_unsigned_t +#include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t #include "src/__support/common.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index f622b7edaa8a7..cd0c07629f876 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -17,13 +17,13 @@ #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/dyadic_float.h" #include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/UInt128.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/detailed_powers_of_ten.h" #include "src/__support/high_precision_decimal.h" #include "src/__support/str_to_integer.h" #include "src/__support/str_to_num_result.h" +#include "src/__support/uint128.h" #include "src/errno/libc_errno.h" // For ERANGE namespace LIBC_NAMESPACE { diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h index 02c71d40a1c0a..6db851ab0e65a 100644 --- a/libc/src/__support/str_to_integer.h +++ b/libc/src/__support/str_to_integer.h @@ -11,10 +11,10 @@ #include "src/__support/CPP/limits.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/UInt128.h" #include "src/__support/common.h" #include "src/__support/ctype_utils.h" #include "src/__support/str_to_num_result.h" +#include "src/__support/uint128.h" #include "src/errno/libc_errno.h" // For ERANGE namespace LIBC_NAMESPACE { diff --git a/libc/src/__support/UInt128.h b/libc/src/__support/uint128.h similarity index 97% rename from libc/src/__support/UInt128.h rename to libc/src/__support/uint128.h index b6ef9ca18eb01..722e79d0802e2 100644 --- a/libc/src/__support/UInt128.h +++ b/libc/src/__support/uint128.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_UINT128_H #define LLVM_LIBC_SRC___SUPPORT_UINT128_H -#include "UInt.h" +#include "big_int.h" #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 #ifdef LIBC_TYPES_HAS_INT128 diff --git a/libc/src/fenv/CMakeLists.txt b/libc/src/fenv/CMakeLists.txt index 0da539d187bfa..5dcf21de04f1a 100644 --- a/libc/src/fenv/CMakeLists.txt +++ b/libc/src/fenv/CMakeLists.txt @@ -18,7 +18,7 @@ add_entrypoint_object( HDRS fesetround.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -31,7 +31,7 @@ add_entrypoint_object( HDRS feclearexcept.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -44,7 +44,7 @@ add_entrypoint_object( HDRS feraiseexcept.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -57,7 +57,7 @@ add_entrypoint_object( HDRS fetestexcept.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -70,7 +70,7 @@ add_entrypoint_object( HDRS fegetenv.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -83,7 +83,7 @@ add_entrypoint_object( HDRS fesetenv.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -96,7 +96,20 @@ add_entrypoint_object( HDRS fegetexceptflag.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros + libc.src.__support.FPUtil.fenv_impl + COMPILE_OPTIONS + -O2 +) + +add_entrypoint_object( + fesetexcept + SRCS + fesetexcept.cpp + HDRS + fesetexcept.h + DEPENDS + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -109,7 +122,7 @@ add_entrypoint_object( HDRS fesetexceptflag.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -122,7 +135,7 @@ add_entrypoint_object( HDRS feholdexcept.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -135,7 +148,7 @@ add_entrypoint_object( HDRS feupdateenv.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -148,7 +161,7 @@ add_entrypoint_object( HDRS feenableexcept.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -161,7 +174,7 @@ add_entrypoint_object( HDRS fedisableexcept.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 @@ -174,7 +187,7 @@ add_entrypoint_object( HDRS fegetexcept.h DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.__support.FPUtil.fenv_impl COMPILE_OPTIONS -O2 diff --git a/libc/src/fenv/fegetexceptflag.cpp b/libc/src/fenv/fegetexceptflag.cpp index 71b87ce7315d1..c6160da7afbde 100644 --- a/libc/src/fenv/fegetexceptflag.cpp +++ b/libc/src/fenv/fegetexceptflag.cpp @@ -15,7 +15,8 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(int, fegetexceptflag, (fexcept_t * flagp, int excepts)) { - // TODO: Add a compile time check to see if the excepts actually fit in flagp. + static_assert(sizeof(int) >= sizeof(fexcept_t), + "fexcept_t value cannot fit in an int value."); *flagp = static_cast(fputil::test_except(FE_ALL_EXCEPT) & excepts); return 0; } diff --git a/libc/src/fenv/feholdexcept.cpp b/libc/src/fenv/feholdexcept.cpp index 3c73b1f421779..f264c5ae251d3 100644 --- a/libc/src/fenv/feholdexcept.cpp +++ b/libc/src/fenv/feholdexcept.cpp @@ -9,7 +9,6 @@ #include "src/fenv/feholdexcept.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/common.h" - #include namespace LIBC_NAMESPACE { diff --git a/libc/src/fenv/fesetexcept.cpp b/libc/src/fenv/fesetexcept.cpp new file mode 100644 index 0000000000000..9afa7b73b4fb5 --- /dev/null +++ b/libc/src/fenv/fesetexcept.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of fesetexcept function ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/fenv/fesetexcept.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, fesetexcept, (int excepts)) { + return fputil::set_except(excepts); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/math/RandUtils.h b/libc/src/fenv/fesetexcept.h similarity index 56% rename from libc/test/src/math/RandUtils.h rename to libc/src/fenv/fesetexcept.h index fecbd8eaabf2c..40a7303efcb0a 100644 --- a/libc/test/src/math/RandUtils.h +++ b/libc/src/fenv/fesetexcept.h @@ -1,4 +1,4 @@ -//===-- RandUtils.h ---------------------------------------------*- C++ -*-===// +//===-- Implementation header for fesetexcept -------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,16 +6,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_TEST_SRC_MATH_RANDUTILS_H -#define LLVM_LIBC_TEST_SRC_MATH_RANDUTILS_H +#ifndef LLVM_LIBC_SRC_FENV_FESETEXCEPT_H +#define LLVM_LIBC_SRC_FENV_FESETEXCEPT_H namespace LIBC_NAMESPACE { -namespace testutils { -// Wrapper for std::rand. -int rand(); +int fesetexcept(int excepts); -} // namespace testutils } // namespace LIBC_NAMESPACE -#endif // LLVM_LIBC_TEST_SRC_MATH_RANDUTILS_H +#endif // LLVM_LIBC_SRC_FENV_FESETEXCEPT_H diff --git a/libc/src/fenv/fesetexceptflag.cpp b/libc/src/fenv/fesetexceptflag.cpp index 2fe7cb571a8dd..3ff8e270dc0a7 100644 --- a/libc/src/fenv/fesetexceptflag.cpp +++ b/libc/src/fenv/fesetexceptflag.cpp @@ -9,7 +9,6 @@ #include "src/fenv/fesetexceptflag.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/common.h" - #include namespace LIBC_NAMESPACE { diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index c89792b8ac7be..e8f699fabe365 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -293,6 +293,11 @@ add_math_entrypoint_object(roundf) add_math_entrypoint_object(roundl) add_math_entrypoint_object(roundf128) +add_math_entrypoint_object(roundeven) +add_math_entrypoint_object(roundevenf) +add_math_entrypoint_object(roundevenl) +add_math_entrypoint_object(roundevenf128) + add_math_entrypoint_object(scalbn) add_math_entrypoint_object(scalbnf) add_math_entrypoint_object(scalbnl) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index dc77f8b5ddba2..574e000b82a8f 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -95,18 +95,6 @@ add_entrypoint_object( libc.src.__support.FPUtil.nearest_integer_operations ) -add_object_library( - math_utils - SRCS - math_utils.cpp - HDRS - math_utils.h - DEPENDS - libc.include.errno - libc.include.math - libc.src.errno.errno -) - add_header_library( range_reduction HDRS @@ -139,7 +127,6 @@ add_entrypoint_object( ../cosf.h DEPENDS .sincosf_utils - libc.include.math libc.src.errno.errno libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fenv_impl @@ -162,7 +149,6 @@ add_entrypoint_object( DEPENDS .range_reduction .sincosf_utils - libc.include.math libc.src.errno.errno libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fenv_impl @@ -185,7 +171,6 @@ add_entrypoint_object( DEPENDS .range_reduction .sincosf_utils - libc.include.math libc.src.errno.errno libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -207,7 +192,6 @@ add_entrypoint_object( DEPENDS .range_reduction .sincosf_utils - libc.include.math libc.src.errno.errno libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fenv_impl @@ -417,6 +401,55 @@ add_entrypoint_object( libc.src.__support.FPUtil.nearest_integer_operations ) +add_entrypoint_object( + roundeven + SRCS + roundeven.cpp + HDRS + ../roundeven.h + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.FPUtil.nearest_integer_operations +) + +add_entrypoint_object( + roundevenf + SRCS + roundevenf.cpp + HDRS + ../roundevenf.h + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.FPUtil.nearest_integer_operations +) + +add_entrypoint_object( + roundevenl + SRCS + roundevenl.cpp + HDRS + ../roundevenl.h + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.FPUtil.nearest_integer_operations +) + +add_entrypoint_object( + roundevenf128 + SRCS + roundevenf128.cpp + HDRS + ../roundevenf128.h + COMPILE_OPTIONS + -O3 + DEPENDS + libc.src.__support.macros.properties.types + libc.src.__support.FPUtil.nearest_integer_operations +) + add_entrypoint_object( lround SRCS @@ -704,8 +737,6 @@ add_object_library( exp_utils.h SRCS exp_utils.cpp - DEPENDS - .math_utils ) add_entrypoint_object( @@ -721,7 +752,6 @@ add_entrypoint_object( libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.polyeval libc.src.__support.macros.optimization - libc.include.math COMPILE_OPTIONS -O3 ) @@ -736,7 +766,6 @@ add_entrypoint_object( .common_constants .explogxf libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -772,7 +801,6 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.include.errno libc.src.errno.errno - libc.include.math COMPILE_OPTIONS -O3 ) @@ -787,7 +815,6 @@ add_entrypoint_object( .common_constants .explogxf libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -822,7 +849,6 @@ add_header_library( libc.src.__support.common libc.include.errno libc.src.errno.errno - libc.include.math ) add_entrypoint_object( @@ -868,7 +894,6 @@ add_entrypoint_object( .common_constants .explogxf libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -902,7 +927,6 @@ add_header_library( libc.src.__support.common libc.include.errno libc.src.errno.errno - libc.include.math COMPILE_OPTIONS -O3 ) @@ -929,7 +953,6 @@ add_entrypoint_object( .common_constants .explogxf libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.dyadic_float @@ -965,7 +988,6 @@ add_entrypoint_object( libc.src.__support.macros.optimization libc.include.errno libc.src.errno.errno - libc.include.math COMPILE_OPTIONS -O3 ) @@ -982,7 +1004,6 @@ add_entrypoint_object( .exp2f_impl .explogxf libc.include.errno - libc.include.math libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.FPUtil.fenv_impl @@ -2706,7 +2727,6 @@ add_object_library( libc.src.__support.common libc.include.errno libc.src.errno.errno - libc.include.math COMPILE_OPTIONS -O3 ) diff --git a/libc/src/math/generic/exp_utils.cpp b/libc/src/math/generic/exp_utils.cpp index afdaea347478d..ad13919578ec1 100644 --- a/libc/src/math/generic/exp_utils.cpp +++ b/libc/src/math/generic/exp_utils.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "exp_utils.h" -#include "math_utils.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/math/generic/explogxf.h b/libc/src/math/generic/explogxf.h index 8817ba1011a8c..f7d04f517ce57 100644 --- a/libc/src/math/generic/explogxf.h +++ b/libc/src/math/generic/explogxf.h @@ -10,7 +10,6 @@ #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H #include "common_constants.h" -#include "math_utils.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/optional.h" #include "src/__support/FPUtil/FEnvImpl.h" diff --git a/libc/src/math/generic/log1p.cpp b/libc/src/math/generic/log1p.cpp index 83bd753cde5da..2b187080a057b 100644 --- a/libc/src/math/generic/log1p.cpp +++ b/libc/src/math/generic/log1p.cpp @@ -28,8 +28,9 @@ using LIBC_NAMESPACE::operator""_u128; namespace { -// Extra errors from P is from using x^2 to reduce evaluation latency. -constexpr double P_ERR = 0x1.0p-50; +// Extra errors from P is from using x^2 to reduce evaluation latency and +// directional rounding. +constexpr double P_ERR = 0x1.0p-49; // log(2) with 128-bit precision generated by SageMath with: // def format_hex(value): diff --git a/libc/src/math/generic/log_range_reduction.h b/libc/src/math/generic/log_range_reduction.h index 64c0fc3aa4f53..d12da47a2cfae 100644 --- a/libc/src/math/generic/log_range_reduction.h +++ b/libc/src/math/generic/log_range_reduction.h @@ -11,7 +11,7 @@ #include "common_constants.h" #include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/UInt128.h" +#include "src/__support/uint128.h" namespace LIBC_NAMESPACE { diff --git a/libc/src/math/generic/math_utils.cpp b/libc/src/math/generic/math_utils.cpp deleted file mode 100644 index 14bbb2babc60a..0000000000000 --- a/libc/src/math/generic/math_utils.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- Implementation of math utils --------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "math_utils.h" - -namespace LIBC_NAMESPACE { - -constexpr float XFlowValues::OVERFLOW_VALUE = 0x1p97f; -constexpr float XFlowValues::UNDERFLOW_VALUE = 0x1p-95f; -constexpr float XFlowValues::MAY_UNDERFLOW_VALUE = 0x1.4p-75f; - -constexpr double XFlowValues::OVERFLOW_VALUE = 0x1p769; -constexpr double XFlowValues::UNDERFLOW_VALUE = 0x1p-767; -constexpr double XFlowValues::MAY_UNDERFLOW_VALUE = 0x1.8p-538; - -} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/math_utils.h b/libc/src/math/generic/math_utils.h deleted file mode 100644 index cced761fc8c82..0000000000000 --- a/libc/src/math/generic/math_utils.h +++ /dev/null @@ -1,95 +0,0 @@ -//===-- Collection of utils for implementing math functions -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_MATH_GENERIC_MATH_UTILS_H -#define LLVM_LIBC_SRC_MATH_GENERIC_MATH_UTILS_H - -#include "include/llvm-libc-macros/math-macros.h" -#include "src/__support/CPP/bit.h" -#include "src/__support/CPP/type_traits.h" -#include "src/__support/common.h" -#include "src/errno/libc_errno.h" - -#include - -// TODO: evaluate which functions from this file are actually used. - -namespace LIBC_NAMESPACE { - -// TODO: Remove this, or move it to exp_utils.cpp which is its only user. -LIBC_INLINE double as_double(uint64_t x) { return cpp::bit_cast(x); } - -// Values to trigger underflow and overflow. -template struct XFlowValues; - -template <> struct XFlowValues { - static const float OVERFLOW_VALUE; - static const float UNDERFLOW_VALUE; - static const float MAY_UNDERFLOW_VALUE; -}; - -template <> struct XFlowValues { - static const double OVERFLOW_VALUE; - static const double UNDERFLOW_VALUE; - static const double MAY_UNDERFLOW_VALUE; -}; - -template LIBC_INLINE T with_errno(T x, int err) { - if (math_errhandling & MATH_ERRNO) - libc_errno = err; - return x; -} - -template LIBC_INLINE void force_eval(T x) { - volatile T y LIBC_UNUSED = x; -} - -template LIBC_INLINE T opt_barrier(T x) { - volatile T y = x; - return y; -} - -template struct IsFloatOrDouble { - static constexpr bool - Value = // NOLINT so that this Value can match the ones for IsSame - cpp::is_same_v || cpp::is_same_v; -}; - -template -using EnableIfFloatOrDouble = cpp::enable_if_t::Value, int>; - -template = 0> -T xflow(uint32_t sign, T y) { - // Underflow happens when two extremely small values are multiplied. - // Likewise, overflow happens when two large values are multiplied. - y = opt_barrier(sign ? -y : y) * y; - return with_errno(y, ERANGE); -} - -template = 0> T overflow(uint32_t sign) { - return xflow(sign, XFlowValues::OVERFLOW_VALUE); -} - -template = 0> T underflow(uint32_t sign) { - return xflow(sign, XFlowValues::UNDERFLOW_VALUE); -} - -template = 0> -T may_underflow(uint32_t sign) { - return xflow(sign, XFlowValues::MAY_UNDERFLOW_VALUE); -} - -template = 0> -LIBC_INLINE constexpr float invalid(T x) { - T y = (x - x) / (x - x); - return isnan(x) ? y : with_errno(y, EDOM); -} - -} // namespace LIBC_NAMESPACE - -#endif // LLVM_LIBC_SRC_MATH_GENERIC_MATH_UTILS_H diff --git a/libc/src/math/generic/roundeven.cpp b/libc/src/math/generic/roundeven.cpp new file mode 100644 index 0000000000000..5f2adf9b5fce6 --- /dev/null +++ b/libc/src/math/generic/roundeven.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of roundeven function ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/roundeven.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(double, roundeven, (double x)) { + return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/roundevenf.cpp b/libc/src/math/generic/roundevenf.cpp new file mode 100644 index 0000000000000..353bec74ecf02 --- /dev/null +++ b/libc/src/math/generic/roundevenf.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of roundevenf function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/roundevenf.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float, roundevenf, (float x)) { + return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/roundevenf128.cpp b/libc/src/math/generic/roundevenf128.cpp new file mode 100644 index 0000000000000..259388c86fd33 --- /dev/null +++ b/libc/src/math/generic/roundevenf128.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of roundevenf128 function --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/roundevenf128.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float128, roundevenf128, (float128 x)) { + return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/roundevenl.cpp b/libc/src/math/generic/roundevenl.cpp new file mode 100644 index 0000000000000..f8f429faeec8a --- /dev/null +++ b/libc/src/math/generic/roundevenl.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of roundevenl function -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/roundevenl.h" +#include "src/__support/FPUtil/NearestIntegerOperations.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(long double, roundevenl, (long double x)) { + return fputil::round_using_specific_rounding_mode(x, FP_INT_TONEAREST); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/math/RandUtils.cpp b/libc/src/math/roundeven.h similarity index 60% rename from libc/test/src/math/RandUtils.cpp rename to libc/src/math/roundeven.h index 0d09764f6056d..9c76b1fe334ab 100644 --- a/libc/test/src/math/RandUtils.cpp +++ b/libc/src/math/roundeven.h @@ -1,4 +1,4 @@ -//===-- RandUtils.cpp -----------------------------------------------------===// +//===-- Implementation header for roundeven ---------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,14 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "RandUtils.h" - -#include +#ifndef LLVM_LIBC_SRC_MATH_ROUNDEVEN_H +#define LLVM_LIBC_SRC_MATH_ROUNDEVEN_H namespace LIBC_NAMESPACE { -namespace testutils { -int rand() { return std::rand(); } +double roundeven(double x); -} // namespace testutils } // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_ROUNDEVEN_H diff --git a/libc/src/math/roundevenf.h b/libc/src/math/roundevenf.h new file mode 100644 index 0000000000000..447e7fd940c18 --- /dev/null +++ b/libc/src/math/roundevenf.h @@ -0,0 +1,18 @@ +//===-- Implementation header for roundevenf --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_ROUNDEVENF_H +#define LLVM_LIBC_SRC_MATH_ROUNDEVENF_H + +namespace LIBC_NAMESPACE { + +float roundevenf(float x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_ROUNDEVENF_H diff --git a/libc/src/math/roundevenf128.h b/libc/src/math/roundevenf128.h new file mode 100644 index 0000000000000..589839d090756 --- /dev/null +++ b/libc/src/math/roundevenf128.h @@ -0,0 +1,20 @@ +//===-- Implementation header for roundevenf128 -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_ROUNDEVENF128_H +#define LLVM_LIBC_SRC_MATH_ROUNDEVENF128_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +float128 roundevenf128(float128 x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_ROUNDEVENF128_H diff --git a/libc/src/math/roundevenl.h b/libc/src/math/roundevenl.h new file mode 100644 index 0000000000000..a2f3397e4479a --- /dev/null +++ b/libc/src/math/roundevenl.h @@ -0,0 +1,18 @@ +//===-- Implementation header for roundevenl --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_ROUNDEVENL_H +#define LLVM_LIBC_SRC_MATH_ROUNDEVENL_H + +namespace LIBC_NAMESPACE { + +long double roundevenl(long double x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_ROUNDEVENL_H diff --git a/libc/src/stdio/fseeko.h b/libc/src/stdio/fseeko.h index 3202ed2f97d0e..77fb41215c318 100644 --- a/libc/src/stdio/fseeko.h +++ b/libc/src/stdio/fseeko.h @@ -10,7 +10,6 @@ #define LLVM_LIBC_SRC_STDIO_FSEEKO_H #include -#include namespace LIBC_NAMESPACE { diff --git a/libc/src/stdio/ftello.h b/libc/src/stdio/ftello.h index 0fdf13ab6bdbc..5ab17f9244a5a 100644 --- a/libc/src/stdio/ftello.h +++ b/libc/src/stdio/ftello.h @@ -10,7 +10,6 @@ #define LLVM_LIBC_SRC_STDIO_FTELLO_H #include -#include namespace LIBC_NAMESPACE { diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 7db79c54beb0a..21ff0d43ab728 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -82,21 +82,21 @@ add_object_library( float_dec_converter.h fixed_converter.h #TODO: Check if this should be disabled when fixed unavail DEPENDS - .writer .core_structs .printf_config + .writer + libc.src.__support.big_int + libc.src.__support.common libc.src.__support.CPP.limits libc.src.__support.CPP.span libc.src.__support.CPP.string_view - libc.src.__support.FPUtil.fp_bits + libc.src.__support.float_to_string libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode - libc.src.__support.common + libc.src.__support.integer_to_string libc.src.__support.libc_assert - libc.src.__support.uint libc.src.__support.uint128 - libc.src.__support.integer_to_string - libc.src.__support.float_to_string ) diff --git a/libc/src/stdio/printf_core/float_dec_converter.h b/libc/src/stdio/printf_core/float_dec_converter.h index c4e8aaa2f0e2e..666e4c9fa75e1 100644 --- a/libc/src/stdio/printf_core/float_dec_converter.h +++ b/libc/src/stdio/printf_core/float_dec_converter.h @@ -12,7 +12,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/UInt.h" // is_big_int_v +#include "src/__support/big_int.h" // is_big_int_v #include "src/__support/float_to_string.h" #include "src/__support/integer_to_string.h" #include "src/__support/libc_assert.h" diff --git a/libc/test/CMakeLists.txt b/libc/test/CMakeLists.txt index 745a9a04b4af8..5e26a1000633b 100644 --- a/libc/test/CMakeLists.txt +++ b/libc/test/CMakeLists.txt @@ -18,10 +18,6 @@ add_subdirectory(include) add_subdirectory(src) add_subdirectory(utils) -if(LLVM_LIBC_FULL_BUILD AND NOT LIBC_TARGET_OS_IS_BAREMETAL) - add_subdirectory(IntegrationTest) -endif() - if(NOT LLVM_LIBC_FULL_BUILD) return() endif() @@ -31,4 +27,6 @@ if(NOT ${LIBC_TARGET_OS} STREQUAL "linux" AND # Integration tests are currently only available for linux and the GPU. return() endif() + +add_subdirectory(IntegrationTest) add_subdirectory(integration) diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index d830d22bb540e..4411170502ed6 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -68,6 +68,7 @@ add_unittest_framework_library( Test.h TestLogger.h DEPENDS + libc.src.__support.big_int libc.src.__support.c_string libc.src.__support.CPP.string libc.src.__support.CPP.string_view @@ -75,7 +76,6 @@ add_unittest_framework_library( libc.src.__support.fixed_point.fx_rep libc.src.__support.macros.properties.types libc.src.__support.OSUtil.osutil - libc.src.__support.uint libc.src.__support.uint128 ) @@ -103,9 +103,9 @@ add_header_library( HDRS StringUtils.h DEPENDS + libc.src.__support.big_int libc.src.__support.CPP.string libc.src.__support.CPP.type_traits - libc.src.__support.uint ) add_unittest_framework_library( diff --git a/libc/test/UnitTest/FPMatcher.h b/libc/test/UnitTest/FPMatcher.h index f4553eac5c8a5..a76e0b8ef6f6f 100644 --- a/libc/test/UnitTest/FPMatcher.h +++ b/libc/test/UnitTest/FPMatcher.h @@ -18,7 +18,7 @@ #include "test/UnitTest/StringUtils.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace LIBC_NAMESPACE { namespace testing { diff --git a/libc/test/UnitTest/LibcDeathTestExecutors.cpp b/libc/test/UnitTest/LibcDeathTestExecutors.cpp index e891c4e3c0b58..fa6d16410bb7a 100644 --- a/libc/test/UnitTest/LibcDeathTestExecutors.cpp +++ b/libc/test/UnitTest/LibcDeathTestExecutors.cpp @@ -19,7 +19,7 @@ namespace testing { bool Test::testProcessKilled(testutils::FunctionCaller *Func, int Signal, const char *LHSStr, const char *RHSStr, internal::Location Loc) { - testutils::ProcessStatus Result = testutils::invoke_in_subprocess(Func, 500); + testutils::ProcessStatus Result = testutils::invoke_in_subprocess(Func, 1000); if (const char *error = Result.get_error()) { Ctx->markFail(); @@ -31,7 +31,7 @@ bool Test::testProcessKilled(testutils::FunctionCaller *Func, int Signal, if (Result.timed_out()) { Ctx->markFail(); tlog << Loc; - tlog << "Process timed out after " << 500 << " milliseconds.\n"; + tlog << "Process timed out after " << 1000 << " milliseconds.\n"; return false; } @@ -62,7 +62,7 @@ bool Test::testProcessKilled(testutils::FunctionCaller *Func, int Signal, bool Test::testProcessExits(testutils::FunctionCaller *Func, int ExitCode, const char *LHSStr, const char *RHSStr, internal::Location Loc) { - testutils::ProcessStatus Result = testutils::invoke_in_subprocess(Func, 500); + testutils::ProcessStatus Result = testutils::invoke_in_subprocess(Func, 1000); if (const char *error = Result.get_error()) { Ctx->markFail(); @@ -74,7 +74,7 @@ bool Test::testProcessExits(testutils::FunctionCaller *Func, int ExitCode, if (Result.timed_out()) { Ctx->markFail(); tlog << Loc; - tlog << "Process timed out after " << 500 << " milliseconds.\n"; + tlog << "Process timed out after " << 1000 << " milliseconds.\n"; return false; } diff --git a/libc/test/UnitTest/LibcTest.cpp b/libc/test/UnitTest/LibcTest.cpp index 03cd25191ecd5..846ad331e5237 100644 --- a/libc/test/UnitTest/LibcTest.cpp +++ b/libc/test/UnitTest/LibcTest.cpp @@ -11,9 +11,9 @@ #include "include/llvm-libc-macros/stdfix-macros.h" #include "src/__support/CPP/string.h" #include "src/__support/CPP/string_view.h" -#include "src/__support/UInt128.h" #include "src/__support/fixed_point/fx_rep.h" #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 +#include "src/__support/uint128.h" #include "test/UnitTest/TestLogger.h" #if __STDC_HOSTED__ diff --git a/libc/test/UnitTest/RoundingModeUtils.cpp b/libc/test/UnitTest/RoundingModeUtils.cpp index c8f32f81e7134..cb34c5eab421e 100644 --- a/libc/test/UnitTest/RoundingModeUtils.cpp +++ b/libc/test/UnitTest/RoundingModeUtils.cpp @@ -10,7 +10,7 @@ #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/rounding_mode.h" -#include +#include "hdr/fenv_macros.h" namespace LIBC_NAMESPACE { namespace fputil { diff --git a/libc/test/UnitTest/StringUtils.h b/libc/test/UnitTest/StringUtils.h index cab0b58f96905..61d74b49d4c98 100644 --- a/libc/test/UnitTest/StringUtils.h +++ b/libc/test/UnitTest/StringUtils.h @@ -11,7 +11,7 @@ #include "src/__support/CPP/string.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/UInt.h" +#include "src/__support/big_int.h" namespace LIBC_NAMESPACE { diff --git a/libc/test/UnitTest/TestLogger.cpp b/libc/test/UnitTest/TestLogger.cpp index 4756188b46cb0..feba4b5ddd39b 100644 --- a/libc/test/UnitTest/TestLogger.cpp +++ b/libc/test/UnitTest/TestLogger.cpp @@ -1,10 +1,10 @@ #include "test/UnitTest/TestLogger.h" #include "src/__support/CPP/string.h" #include "src/__support/CPP/string_view.h" -#include "src/__support/OSUtil/io.h" // write_to_stderr -#include "src/__support/UInt.h" // is_big_int -#include "src/__support/UInt128.h" +#include "src/__support/OSUtil/io.h" // write_to_stderr +#include "src/__support/big_int.h" // is_big_int #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 +#include "src/__support/uint128.h" #include diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index f70ffda3f700e..a5e7a2a4dee72 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -3,7 +3,7 @@ function(add_fp_unittest name) "MATH_UNITTEST" "NEED_MPFR;UNIT_TEST_ONLY;HERMETIC_TEST_ONLY" # Optional arguments "" # Single value arguments - "LINK_LIBRARIES" # Multi-value arguments + "LINK_LIBRARIES;DEPENDS" # Multi-value arguments ${ARGN} ) @@ -24,15 +24,21 @@ function(add_fp_unittest name) message(FATAL_ERROR "Hermetic math test cannot require MPFR.") endif() set(test_type UNIT_TEST_ONLY) - list(APPEND MATH_UNITTEST_LINK_LIBRARIES libcMPFRWrapper libc_math_test_utils -lmpfr -lgmp) + list(APPEND MATH_UNITTEST_LINK_LIBRARIES libcMPFRWrapper -lmpfr -lgmp) endif() list(APPEND MATH_UNITTEST_LINK_LIBRARIES LibcFPTestHelpers) + set(deps libc.hdr.math_macros) + if(MATH_UNITTEST_DEPENDS) + list(APPEND deps ${MATH_UNITTEST_DEPENDS}) + endif() + add_libc_test( ${name} ${test_type} LINK_LIBRARIES "${MATH_UNITTEST_LINK_LIBRARIES}" "${MATH_UNITTEST_UNPARSED_ARGUMENTS}" + DEPENDS "${deps}" ) endfunction(add_fp_unittest) diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 51b897f8b595a..5d1230f5f3a70 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -78,11 +78,11 @@ add_libc_test( SRCS integer_to_string_test.cpp DEPENDS + libc.src.__support.big_int libc.src.__support.CPP.limits libc.src.__support.CPP.string_view libc.src.__support.integer_literals libc.src.__support.integer_to_string - libc.src.__support.uint libc.src.__support.uint128 ) @@ -101,15 +101,15 @@ endif() if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX) add_libc_test( - uint_test + big_int_test SUITE libc-support-tests SRCS - uint_test.cpp + big_int_test.cpp DEPENDS + libc.src.__support.big_int libc.src.__support.CPP.optional libc.src.__support.macros.properties.types - libc.src.__support.uint ) endif() diff --git a/libc/test/src/__support/CPP/CMakeLists.txt b/libc/test/src/__support/CPP/CMakeLists.txt index 74aa0c705ec46..708548f812c66 100644 --- a/libc/test/src/__support/CPP/CMakeLists.txt +++ b/libc/test/src/__support/CPP/CMakeLists.txt @@ -17,9 +17,9 @@ add_libc_test( SRCS bit_test.cpp DEPENDS + libc.src.__support.big_int libc.src.__support.CPP.bit libc.src.__support.macros.properties.types - libc.src.__support.uint ) add_libc_test( @@ -59,9 +59,9 @@ add_libc_test( SRCS limits_test.cpp DEPENDS + libc.src.__support.big_int libc.src.__support.CPP.limits libc.src.__support.macros.properties.types - libc.src.__support.uint ) add_libc_test( diff --git a/libc/test/src/__support/CPP/bit_test.cpp b/libc/test/src/__support/CPP/bit_test.cpp index 875b47e6a1980..299623d2ca240 100644 --- a/libc/test/src/__support/CPP/bit_test.cpp +++ b/libc/test/src/__support/CPP/bit_test.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/CPP/bit.h" -#include "src/__support/UInt.h" +#include "src/__support/big_int.h" #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 #include "test/UnitTest/Test.h" diff --git a/libc/test/src/__support/CPP/limits_test.cpp b/libc/test/src/__support/CPP/limits_test.cpp index efcd6839d0733..bcf7d5ed6a6e7 100644 --- a/libc/test/src/__support/CPP/limits_test.cpp +++ b/libc/test/src/__support/CPP/limits_test.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/CPP/limits.h" -#include "src/__support/UInt.h" +#include "src/__support/big_int.h" #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 #include "test/UnitTest/Test.h" diff --git a/libc/test/src/__support/FPUtil/dyadic_float_test.cpp b/libc/test/src/__support/FPUtil/dyadic_float_test.cpp index 5ee9aaad56382..809381ed47b59 100644 --- a/libc/test/src/__support/FPUtil/dyadic_float_test.cpp +++ b/libc/test/src/__support/FPUtil/dyadic_float_test.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/UInt.h" +#include "src/__support/big_int.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" diff --git a/libc/test/src/__support/FPUtil/rounding_mode_test.cpp b/libc/test/src/__support/FPUtil/rounding_mode_test.cpp index 8077a5aab7afd..5d62bc8c9ae93 100644 --- a/libc/test/src/__support/FPUtil/rounding_mode_test.cpp +++ b/libc/test/src/__support/FPUtil/rounding_mode_test.cpp @@ -10,7 +10,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include +#include "hdr/fenv_macros.h" using LIBC_NAMESPACE::testing::mpfr::ForceRoundingMode; using LIBC_NAMESPACE::testing::mpfr::RoundingMode; diff --git a/libc/test/src/__support/uint_test.cpp b/libc/test/src/__support/big_int_test.cpp similarity index 98% rename from libc/test/src/__support/uint_test.cpp rename to libc/test/src/__support/big_int_test.cpp index 5696e54c73f36..1c4f0ac29171f 100644 --- a/libc/test/src/__support/uint_test.cpp +++ b/libc/test/src/__support/big_int_test.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "src/__support/CPP/optional.h" -#include "src/__support/UInt.h" +#include "src/__support/big_int.h" #include "src/__support/integer_literals.h" // parse_unsigned_bigint #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 -#include "include/llvm-libc-macros/math-macros.h" // HUGE_VALF, HUGE_VALF +#include "hdr/math_macros.h" // HUGE_VALF, HUGE_VALF #include "test/UnitTest/Test.h" namespace LIBC_NAMESPACE { @@ -192,7 +192,7 @@ TYPED_TEST(LlvmLibcUIntClassTest, Masks, Types) { TYPED_TEST(LlvmLibcUIntClassTest, CountBits, Types) { if constexpr (!T::SIGNED) { - for (size_t i = 0; i <= T::BITS; ++i) { + for (size_t i = 0; i < T::BITS; ++i) { const auto l_one = T::all_ones() << i; // 0b111...000 const auto r_one = T::all_ones() >> i; // 0b000...111 const int zeros = i; @@ -559,10 +559,6 @@ TEST(LlvmLibcUIntClassTest, ShiftLeftTests) { LL_UInt128 result5({0, 0x2468ace000000000}); EXPECT_EQ((val2 << 100), result5); - LL_UInt128 result6({0, 0}); - EXPECT_EQ((val2 << 128), result6); - EXPECT_EQ((val2 << 256), result6); - LL_UInt192 val3({1, 0, 0}); LL_UInt192 result7({0, 1, 0}); EXPECT_EQ((val3 << 64), result7); @@ -589,10 +585,6 @@ TEST(LlvmLibcUIntClassTest, ShiftRightTests) { LL_UInt128 result5({0x0000000001234567, 0}); EXPECT_EQ((val2 >> 100), result5); - LL_UInt128 result6({0, 0}); - EXPECT_EQ((val2 >> 128), result6); - EXPECT_EQ((val2 >> 256), result6); - LL_UInt128 v1({0x1111222233334444, 0xaaaabbbbccccdddd}); LL_UInt128 r1({0xaaaabbbbccccdddd, 0}); EXPECT_EQ((v1 >> 64), r1); diff --git a/libc/test/src/__support/high_precision_decimal_test.cpp b/libc/test/src/__support/high_precision_decimal_test.cpp index 2bb28bcdab021..7a3c323b06d51 100644 --- a/libc/test/src/__support/high_precision_decimal_test.cpp +++ b/libc/test/src/__support/high_precision_decimal_test.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/UInt128.h" #include "src/__support/high_precision_decimal.h" +#include "src/__support/uint128.h" #include "test/UnitTest/Test.h" diff --git a/libc/test/src/__support/integer_to_string_test.cpp b/libc/test/src/__support/integer_to_string_test.cpp index 270fddd828b68..e644751b56c93 100644 --- a/libc/test/src/__support/integer_to_string_test.cpp +++ b/libc/test/src/__support/integer_to_string_test.cpp @@ -9,10 +9,10 @@ #include "src/__support/CPP/limits.h" #include "src/__support/CPP/span.h" #include "src/__support/CPP/string_view.h" -#include "src/__support/UInt.h" -#include "src/__support/UInt128.h" +#include "src/__support/big_int.h" #include "src/__support/integer_literals.h" #include "src/__support/integer_to_string.h" +#include "src/__support/uint128.h" #include "test/UnitTest/Test.h" diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp index 401e631ea4bac..0047888965177 100644 --- a/libc/test/src/__support/math_extras_test.cpp +++ b/libc/test/src/__support/math_extras_test.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/UInt128.h" // UInt<128> #include "src/__support/integer_literals.h" #include "src/__support/math_extras.h" +#include "src/__support/uint128.h" // UInt<128> #include "test/UnitTest/Test.h" namespace LIBC_NAMESPACE { diff --git a/libc/test/src/__support/str_to_fp_test.h b/libc/test/src/__support/str_to_fp_test.h index bddff035fdd16..8d6181cda884b 100644 --- a/libc/test/src/__support/str_to_fp_test.h +++ b/libc/test/src/__support/str_to_fp_test.h @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/UInt128.h" #include "src/__support/str_to_float.h" +#include "src/__support/uint128.h" #include "src/errno/libc_errno.h" #include "test/UnitTest/Test.h" diff --git a/libc/test/src/fenv/CMakeLists.txt b/libc/test/src/fenv/CMakeLists.txt index ba338bb6c7318..577735599dc01 100644 --- a/libc/test/src/fenv/CMakeLists.txt +++ b/libc/test/src/fenv/CMakeLists.txt @@ -20,6 +20,7 @@ add_libc_unittest( DEPENDS libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept + libc.src.fenv.fesetexcept libc.src.fenv.fetestexcept libc.src.__support.FPUtil.fenv_impl ) @@ -117,7 +118,7 @@ if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows") SRCS feholdexcept_test.cpp DEPENDS - libc.include.fenv + libc.hdr.fenv_macros libc.src.fenv.feholdexcept libc.src.__support.FPUtil.fenv_impl LINK_LIBRARIES diff --git a/libc/test/src/fenv/enabled_exceptions_test.cpp b/libc/test/src/fenv/enabled_exceptions_test.cpp index 8bc2454faf9ea..53440b704ca76 100644 --- a/libc/test/src/fenv/enabled_exceptions_test.cpp +++ b/libc/test/src/fenv/enabled_exceptions_test.cpp @@ -15,7 +15,7 @@ #include "test/UnitTest/FPExceptMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fenv_macros.h" #include // This test enables an exception and verifies that raising that exception diff --git a/libc/test/src/fenv/exception_status_test.cpp b/libc/test/src/fenv/exception_status_test.cpp index e4e2240fc374c..a7000020b1a3c 100644 --- a/libc/test/src/fenv/exception_status_test.cpp +++ b/libc/test/src/fenv/exception_status_test.cpp @@ -1,4 +1,5 @@ -//===-- Unittests for feclearexcept, feraiseexcept and fetestexpect -------===// +//===-- Unittests for feclearexcept, feraiseexcept, fetestexpect ----------===// +//===-- and fesetexcept ---------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -8,12 +9,13 @@ #include "src/fenv/feclearexcept.h" #include "src/fenv/feraiseexcept.h" +#include "src/fenv/fesetexcept.h" #include "src/fenv/fetestexcept.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fenv_macros.h" TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { // This test raises a set of exceptions and checks that the exception @@ -38,6 +40,11 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { ASSERT_EQ(r, 0); s = LIBC_NAMESPACE::fetestexcept(e); ASSERT_EQ(s, 0); + + r = LIBC_NAMESPACE::fesetexcept(e); + ASSERT_EQ(r, 0); + s = LIBC_NAMESPACE::fetestexcept(e); + ASSERT_EQ(s, e); } for (int e1 : excepts) { @@ -52,6 +59,11 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { ASSERT_EQ(r, 0); s = LIBC_NAMESPACE::fetestexcept(e); ASSERT_EQ(s, 0); + + r = LIBC_NAMESPACE::fesetexcept(e); + ASSERT_EQ(r, 0); + s = LIBC_NAMESPACE::fetestexcept(e); + ASSERT_EQ(s, e); } } @@ -68,6 +80,11 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { ASSERT_EQ(r, 0); s = LIBC_NAMESPACE::fetestexcept(e); ASSERT_EQ(s, 0); + + r = LIBC_NAMESPACE::fesetexcept(e); + ASSERT_EQ(r, 0); + s = LIBC_NAMESPACE::fetestexcept(e); + ASSERT_EQ(s, e); } } } @@ -86,6 +103,11 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { ASSERT_EQ(r, 0); s = LIBC_NAMESPACE::fetestexcept(e); ASSERT_EQ(s, 0); + + r = LIBC_NAMESPACE::fesetexcept(e); + ASSERT_EQ(r, 0); + s = LIBC_NAMESPACE::fetestexcept(e); + ASSERT_EQ(s, e); } } } @@ -106,6 +128,11 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { ASSERT_EQ(r, 0); s = LIBC_NAMESPACE::fetestexcept(e); ASSERT_EQ(s, 0); + + r = LIBC_NAMESPACE::fesetexcept(e); + ASSERT_EQ(r, 0); + s = LIBC_NAMESPACE::fetestexcept(e); + ASSERT_EQ(s, e); } } } @@ -116,4 +143,9 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { ASSERT_EQ(r, 0); int s = LIBC_NAMESPACE::fetestexcept(ALL_EXCEPTS); ASSERT_EQ(s, ALL_EXCEPTS); + + r = LIBC_NAMESPACE::fesetexcept(ALL_EXCEPTS); + ASSERT_EQ(r, 0); + s = LIBC_NAMESPACE::fetestexcept(ALL_EXCEPTS); + ASSERT_EQ(s, ALL_EXCEPTS); } diff --git a/libc/test/src/fenv/feclearexcept_test.cpp b/libc/test/src/fenv/feclearexcept_test.cpp index fa3e856d1ba20..bb42d9070358e 100644 --- a/libc/test/src/fenv/feclearexcept_test.cpp +++ b/libc/test/src/fenv/feclearexcept_test.cpp @@ -11,7 +11,7 @@ #include "src/__support/FPUtil/FEnvImpl.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fenv_macros.h" #include TEST(LlvmLibcFEnvTest, ClearTest) { diff --git a/libc/test/src/fenv/feenableexcept_test.cpp b/libc/test/src/fenv/feenableexcept_test.cpp index 41c1945368ed5..aeb4f955fd69b 100644 --- a/libc/test/src/fenv/feenableexcept_test.cpp +++ b/libc/test/src/fenv/feenableexcept_test.cpp @@ -13,7 +13,7 @@ #include "test/UnitTest/Test.h" -#include +#include "hdr/fenv_macros.h" TEST(LlvmLibcFEnvTest, EnableTest) { #if defined(LIBC_TARGET_ARCH_IS_ANY_ARM) || \ diff --git a/libc/test/src/fenv/rounding_mode_test.cpp b/libc/test/src/fenv/rounding_mode_test.cpp index 4560160e8e2e9..ec2e27ecc818b 100644 --- a/libc/test/src/fenv/rounding_mode_test.cpp +++ b/libc/test/src/fenv/rounding_mode_test.cpp @@ -11,7 +11,7 @@ #include "test/UnitTest/Test.h" -#include +#include "hdr/fenv_macros.h" TEST(LlvmLibcRoundingModeTest, SetAndGet) { struct ResetDefaultRoundingMode { diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index bbf8f071e1e0c..55119868bdaa1 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1,15 +1,5 @@ add_custom_target(libc-math-unittests) -# FIXME: We shouldn't have regular libraries created because we could be -# cross-compiling the tests and running through an emulator. -if(NOT LIBC_TARGET_OS_IS_GPU) - add_library( - libc_math_test_utils - RandUtils.cpp - RandUtils.h - ) -endif() - add_fp_unittest( cosf_test NEED_MPFR @@ -108,7 +98,6 @@ add_fp_unittest( HDRS FAbsTest.h DEPENDS - libc.include.math libc.src.math.fabs libc.src.__support.FPUtil.fp_bits ) @@ -123,7 +112,6 @@ add_fp_unittest( HDRS FAbsTest.h DEPENDS - libc.include.math libc.src.math.fabsf libc.src.__support.FPUtil.fp_bits ) @@ -138,7 +126,6 @@ add_fp_unittest( HDRS FAbsTest.h DEPENDS - libc.include.math libc.src.math.fabsl libc.src.__support.FPUtil.fp_bits ) @@ -153,7 +140,6 @@ add_fp_unittest( HDRS TruncTest.h DEPENDS - libc.include.math libc.src.math.trunc libc.src.__support.FPUtil.fp_bits ) @@ -168,7 +154,6 @@ add_fp_unittest( HDRS TruncTest.h DEPENDS - libc.include.math libc.src.math.truncf libc.src.__support.FPUtil.fp_bits ) @@ -183,7 +168,6 @@ add_fp_unittest( HDRS TruncTest.h DEPENDS - libc.include.math libc.src.math.truncl libc.src.__support.FPUtil.fp_bits ) @@ -198,7 +182,6 @@ add_fp_unittest( HDRS CeilTest.h DEPENDS - libc.include.math libc.src.math.ceil libc.src.__support.FPUtil.fp_bits ) @@ -213,7 +196,6 @@ add_fp_unittest( HDRS CeilTest.h DEPENDS - libc.include.math libc.src.math.ceilf libc.src.__support.FPUtil.fp_bits ) @@ -228,7 +210,6 @@ add_fp_unittest( HDRS CeilTest.h DEPENDS - libc.include.math libc.src.math.ceill libc.src.__support.FPUtil.fp_bits ) @@ -243,7 +224,6 @@ add_fp_unittest( HDRS FloorTest.h DEPENDS - libc.include.math libc.src.math.floor libc.src.__support.FPUtil.fp_bits ) @@ -258,7 +238,6 @@ add_fp_unittest( HDRS FloorTest.h DEPENDS - libc.include.math libc.src.math.floorf libc.src.__support.FPUtil.fp_bits ) @@ -273,7 +252,6 @@ add_fp_unittest( HDRS FloorTest.h DEPENDS - libc.include.math libc.src.math.floorl libc.src.__support.FPUtil.fp_bits ) @@ -288,7 +266,6 @@ add_fp_unittest( HDRS RoundTest.h DEPENDS - libc.include.math libc.src.math.round libc.src.__support.FPUtil.fp_bits ) @@ -303,7 +280,6 @@ add_fp_unittest( HDRS RoundTest.h DEPENDS - libc.include.math libc.src.math.roundf libc.src.__support.FPUtil.fp_bits ) @@ -318,11 +294,52 @@ add_fp_unittest( HDRS RoundTest.h DEPENDS - libc.include.math libc.src.math.roundl libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + roundeven_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + roundeven_test.cpp + HDRS + RoundEvenTest.h + DEPENDS + libc.src.math.roundeven + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + roundevenf_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + roundevenf_test.cpp + HDRS + RoundEvenTest.h + DEPENDS + libc.src.math.roundevenf + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + roundevenl_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + roundevenl_test.cpp + HDRS + RoundEvenTest.h + DEPENDS + libc.src.math.roundevenl + libc.src.__support.FPUtil.fp_bits +) + add_fp_unittest( lround_test NEED_MPFR @@ -333,7 +350,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -352,7 +368,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -371,7 +386,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -390,7 +404,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -409,7 +422,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -428,7 +440,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -447,7 +458,6 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS - libc.include.math libc.src.math.rint libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -463,7 +473,6 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS - libc.include.math libc.src.math.rintf libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -479,7 +488,6 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS - libc.include.math libc.src.math.rintl libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -495,7 +503,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.lrint libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -511,7 +518,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.lrintf libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -527,7 +533,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.lrintl libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -543,7 +548,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.llrint libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -559,7 +563,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.llrintf libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -575,7 +578,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.llrintl libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -590,7 +592,6 @@ add_fp_unittest( expf_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.expf libc.src.__support.FPUtil.fp_bits ) @@ -604,7 +605,6 @@ add_fp_unittest( exp_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp libc.src.__support.FPUtil.fp_bits ) @@ -618,7 +618,6 @@ add_fp_unittest( exp2f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp2f libc.src.__support.FPUtil.fp_bits ) @@ -632,7 +631,6 @@ add_fp_unittest( exp2_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp2 libc.src.__support.FPUtil.fp_bits ) @@ -645,7 +643,6 @@ add_fp_unittest( SRCS exp2m1f_test.cpp DEPENDS - libc.include.llvm-libc-macros.math_macros libc.src.errno.errno libc.src.math.exp2m1f libc.src.__support.CPP.array @@ -661,7 +658,6 @@ add_fp_unittest( exp10f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp10f libc.src.__support.FPUtil.fp_bits ) @@ -675,7 +671,6 @@ add_fp_unittest( exp10_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp10 libc.src.__support.FPUtil.fp_bits ) @@ -689,7 +684,6 @@ add_fp_unittest( HDRS CopySignTest.h DEPENDS - libc.include.math libc.src.math.copysign libc.src.__support.FPUtil.fp_bits # FIXME: Currently fails on the GPU build. @@ -705,7 +699,6 @@ add_fp_unittest( HDRS CopySignTest.h DEPENDS - libc.include.math libc.src.math.copysignf libc.src.__support.FPUtil.fp_bits # FIXME: Currently fails on the GPU build. @@ -721,7 +714,6 @@ add_fp_unittest( HDRS CopySignTest.h DEPENDS - libc.include.math libc.src.math.copysignl libc.src.__support.FPUtil.fp_bits # FIXME: Currently fails on the GPU build. @@ -738,7 +730,6 @@ add_fp_unittest( HDRS FrexpTest.h DEPENDS - libc.include.math libc.src.math.frexp libc.src.__support.FPUtil.basic_operations ) @@ -753,7 +744,6 @@ add_fp_unittest( HDRS FrexpTest.h DEPENDS - libc.include.math libc.src.math.frexpf libc.src.__support.FPUtil.basic_operations ) @@ -768,7 +758,6 @@ add_fp_unittest( HDRS FrexpTest.h DEPENDS - libc.include.math libc.src.math.frexpl libc.src.__support.FPUtil.basic_operations ) @@ -782,7 +771,6 @@ add_fp_unittest( HDRS ILogbTest.h DEPENDS - libc.include.math libc.src.math.ilogb libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits @@ -798,7 +786,6 @@ add_fp_unittest( HDRS ILogbTest.h DEPENDS - libc.include.math libc.src.math.ilogbf libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits @@ -814,7 +801,6 @@ add_fp_unittest( HDRS ILogbTest.h DEPENDS - libc.include.math libc.src.math.ilogbl libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits @@ -830,7 +816,6 @@ add_fp_unittest( HDRS LdExpTest.h DEPENDS - libc.include.math libc.src.math.ldexp libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits @@ -846,7 +831,6 @@ add_fp_unittest( HDRS LdExpTest.h DEPENDS - libc.include.math libc.src.math.ldexpf libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits @@ -862,7 +846,6 @@ add_fp_unittest( HDRS LdExpTest.h DEPENDS - libc.include.math libc.src.math.ldexpl libc.src.__support.CPP.limits libc.src.__support.FPUtil.fp_bits @@ -876,7 +859,6 @@ add_fp_unittest( SRCS logb_test.cpp DEPENDS - libc.include.math libc.src.math.logb libc.src.__support.FPUtil.manipulation_functions ) @@ -888,7 +870,6 @@ add_fp_unittest( SRCS logbf_test.cpp DEPENDS - libc.include.math libc.src.math.logbf libc.src.__support.FPUtil.manipulation_functions ) @@ -902,7 +883,6 @@ add_fp_unittest( HDRS LogbTest.h DEPENDS - libc.include.math libc.src.math.logbl libc.src.__support.FPUtil.manipulation_functions ) @@ -916,7 +896,6 @@ add_fp_unittest( HDRS ModfTest.h DEPENDS - libc.include.math libc.src.math.modf libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.nearest_integer_operations @@ -933,7 +912,6 @@ add_fp_unittest( HDRS ModfTest.h DEPENDS - libc.include.math libc.src.math.modff libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.nearest_integer_operations @@ -950,7 +928,6 @@ add_fp_unittest( HDRS ModfTest.h DEPENDS - libc.include.math libc.src.math.modfl libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.nearest_integer_operations @@ -965,7 +942,6 @@ add_fp_unittest( HDRS FDimTest.h DEPENDS - libc.include.math libc.src.math.fdimf libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -980,7 +956,6 @@ add_fp_unittest( HDRS FDimTest.h DEPENDS - libc.include.math libc.src.math.fdim libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -995,7 +970,6 @@ add_fp_unittest( HDRS FDimTest.h DEPENDS - libc.include.math libc.src.math.fdiml libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -1010,7 +984,6 @@ add_fp_unittest( HDRS FMinTest.h DEPENDS - libc.include.math libc.src.math.fminf libc.src.__support.FPUtil.fp_bits ) @@ -1024,7 +997,6 @@ add_fp_unittest( HDRS FMinTest.h DEPENDS - libc.include.math libc.src.math.fmin libc.src.__support.FPUtil.fp_bits ) @@ -1038,7 +1010,6 @@ add_fp_unittest( HDRS FMinTest.h DEPENDS - libc.include.math libc.src.math.fminl libc.src.__support.FPUtil.fp_bits ) @@ -1052,7 +1023,6 @@ add_fp_unittest( HDRS FMaxTest.h DEPENDS - libc.include.math libc.src.math.fmaxf libc.src.__support.FPUtil.fp_bits ) @@ -1066,7 +1036,6 @@ add_fp_unittest( HDRS FMaxTest.h DEPENDS - libc.include.math libc.src.math.fmax libc.src.__support.FPUtil.fp_bits ) @@ -1080,7 +1049,6 @@ add_fp_unittest( HDRS FMaxTest.h DEPENDS - libc.include.math libc.src.math.fmaxl libc.src.__support.FPUtil.fp_bits ) @@ -1093,7 +1061,6 @@ add_fp_unittest( SRCS sqrtf_test.cpp DEPENDS - libc.include.math libc.src.math.sqrtf libc.src.__support.FPUtil.fp_bits ) @@ -1106,7 +1073,6 @@ add_fp_unittest( SRCS sqrt_test.cpp DEPENDS - libc.include.math libc.src.math.sqrt libc.src.__support.FPUtil.fp_bits ) @@ -1119,7 +1085,6 @@ add_fp_unittest( SRCS sqrtl_test.cpp DEPENDS - libc.include.math libc.src.math.sqrtl libc.src.__support.FPUtil.fp_bits ) @@ -1179,7 +1144,6 @@ add_fp_unittest( HDRS RemQuoTest.h DEPENDS - libc.include.math libc.src.math.remquof libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -1195,7 +1159,6 @@ add_fp_unittest( HDRS RemQuoTest.h DEPENDS - libc.include.math libc.src.math.remquo libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -1211,7 +1174,6 @@ add_fp_unittest( HDRS RemQuoTest.h DEPENDS - libc.include.math libc.src.math.remquol libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -1225,7 +1187,6 @@ add_fp_unittest( SRCS hypotf_test.cpp DEPENDS - libc.include.math libc.src.math.hypotf libc.src.__support.FPUtil.fp_bits ) @@ -1238,7 +1199,6 @@ add_fp_unittest( SRCS hypot_test.cpp DEPENDS - libc.include.math libc.src.math.hypot libc.src.__support.FPUtil.fp_bits ) @@ -1252,7 +1212,6 @@ add_fp_unittest( HDRS NextAfterTest.h DEPENDS - libc.include.math libc.src.math.nextafter libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -1267,7 +1226,6 @@ add_fp_unittest( HDRS NextAfterTest.h DEPENDS - libc.include.math libc.src.math.nextafterf libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -1282,7 +1240,6 @@ add_fp_unittest( HDRS NextAfterTest.h DEPENDS - libc.include.math libc.src.math.nextafterl libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -1297,7 +1254,6 @@ add_fp_unittest( HDRS NextAfterTest.h DEPENDS - libc.include.math libc.src.math.nextafterf128 libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -1313,8 +1269,9 @@ add_fp_unittest( SRCS fmaf_test.cpp DEPENDS - libc.include.math libc.src.math.fmaf + libc.src.stdlib.rand + libc.src.stdlib.srand libc.src.__support.FPUtil.fp_bits FLAGS FMA_OPT__ONLY @@ -1328,8 +1285,9 @@ add_fp_unittest( SRCS fma_test.cpp DEPENDS - libc.include.math libc.src.math.fma + libc.src.stdlib.rand + libc.src.stdlib.srand libc.src.__support.FPUtil.fp_bits ) @@ -1354,7 +1312,6 @@ add_fp_unittest( expm1f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.expm1f libc.src.__support.FPUtil.fp_bits ) @@ -1368,7 +1325,6 @@ add_fp_unittest( expm1_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.expm1 libc.src.__support.FPUtil.fp_bits ) @@ -1382,7 +1338,6 @@ add_fp_unittest( log_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log libc.src.__support.FPUtil.fp_bits ) @@ -1396,7 +1351,6 @@ add_fp_unittest( logf_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.logf libc.src.__support.FPUtil.fp_bits ) @@ -1410,7 +1364,6 @@ log2_test log2_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log2 libc.src.__support.FPUtil.fp_bits ) @@ -1424,7 +1377,6 @@ add_fp_unittest( log2f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log2f libc.src.__support.FPUtil.fp_bits ) @@ -1438,7 +1390,6 @@ add_fp_unittest( log10_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log10 libc.src.__support.FPUtil.fp_bits ) @@ -1452,7 +1403,6 @@ add_fp_unittest( log10f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log10f libc.src.__support.FPUtil.fp_bits ) @@ -1466,7 +1416,6 @@ log1p_test log1p_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log1p libc.src.__support.FPUtil.fp_bits ) @@ -1480,7 +1429,6 @@ add_fp_unittest( log1pf_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log1pf libc.src.__support.FPUtil.fp_bits ) @@ -1494,7 +1442,6 @@ add_fp_unittest( HDRS FModTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.math.fmodf libc.src.__support.FPUtil.basic_operations @@ -1512,7 +1459,6 @@ add_fp_unittest( HDRS FModTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.math.fmod libc.src.__support.FPUtil.basic_operations @@ -1531,7 +1477,6 @@ add_fp_unittest( SRCS explogxf_test.cpp DEPENDS - libc.include.math libc.src.math.generic.explogxf libc.src.math.fabs libc.src.math.fabsf @@ -1670,7 +1615,6 @@ add_fp_unittest( HDRS ScalbnTest.h DEPENDS - libc.include.math libc.src.math.scalbn libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.normal_float @@ -1686,7 +1630,6 @@ add_fp_unittest( HDRS ScalbnTest.h DEPENDS - libc.include.math libc.src.math.scalbnf libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.normal_float @@ -1702,7 +1645,6 @@ add_fp_unittest( HDRS ScalbnTest.h DEPENDS - libc.include.math libc.src.math.scalbnl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.normal_float @@ -1716,7 +1658,6 @@ add_fp_unittest( SRCS erff_test.cpp DEPENDS - libc.include.math libc.src.math.erff libc.src.__support.FPUtil.fp_bits ) @@ -1729,7 +1670,6 @@ add_fp_unittest( SRCS powf_test.cpp DEPENDS - libc.include.math libc.src.math.powf libc.src.__support.FPUtil.fp_bits ) @@ -1742,7 +1682,6 @@ add_fp_unittest( SRCS atan2f_test.cpp DEPENDS - libc.include.math libc.src.math.atan2f libc.src.__support.FPUtil.fp_bits ) diff --git a/libc/test/src/math/CeilTest.h b/libc/test/src/math/CeilTest.h index 74cc90614dfc2..da3f3c0e8f5ab 100644 --- a/libc/test/src/math/CeilTest.h +++ b/libc/test/src/math/CeilTest.h @@ -10,7 +10,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/CopySignTest.h b/libc/test/src/math/CopySignTest.h index 206626d66f580..052ff0333438e 100644 --- a/libc/test/src/math/CopySignTest.h +++ b/libc/test/src/math/CopySignTest.h @@ -10,7 +10,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/FAbsTest.h b/libc/test/src/math/FAbsTest.h index 942991f23be1c..23ad8a26c481c 100644 --- a/libc/test/src/math/FAbsTest.h +++ b/libc/test/src/math/FAbsTest.h @@ -13,7 +13,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/FDimTest.h b/libc/test/src/math/FDimTest.h index df8de91b42980..44aba9caf6463 100644 --- a/libc/test/src/math/FDimTest.h +++ b/libc/test/src/math/FDimTest.h @@ -6,7 +6,7 @@ // //===---------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/FMaxTest.h b/libc/test/src/math/FMaxTest.h index 2c7dc3dc13ec5..e9857f332e651 100644 --- a/libc/test/src/math/FMaxTest.h +++ b/libc/test/src/math/FMaxTest.h @@ -13,7 +13,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/FMinTest.h b/libc/test/src/math/FMinTest.h index a986d5240d0da..c6b9f4439b79b 100644 --- a/libc/test/src/math/FMinTest.h +++ b/libc/test/src/math/FMinTest.h @@ -13,7 +13,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/FModTest.h b/libc/test/src/math/FModTest.h index 96ad299258a17..bc909987a161b 100644 --- a/libc/test/src/math/FModTest.h +++ b/libc/test/src/math/FModTest.h @@ -14,7 +14,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #define TEST_SPECIAL(x, y, expected, dom_err, expected_exception) \ EXPECT_FP_EQ(expected, f(x, y)); \ diff --git a/libc/test/src/math/FloorTest.h b/libc/test/src/math/FloorTest.h index 21ae291e61bc7..679dc26e12480 100644 --- a/libc/test/src/math/FloorTest.h +++ b/libc/test/src/math/FloorTest.h @@ -13,7 +13,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h index 0c93ec858a12c..76bd221fcb1f2 100644 --- a/libc/test/src/math/FmaTest.h +++ b/libc/test/src/math/FmaTest.h @@ -10,9 +10,10 @@ #define LLVM_LIBC_TEST_SRC_MATH_FMATEST_H #include "src/__support/FPUtil/FPBits.h" +#include "src/stdlib/rand.h" +#include "src/stdlib/srand.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "test/src/math/RandUtils.h" #include "utils/MPFRWrapper/MPFRUtils.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; @@ -43,8 +44,7 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::Test { StorageType get_random_bit_pattern() { StorageType bits{0}; for (StorageType i = 0; i < sizeof(StorageType) / 2; ++i) { - bits = (bits << 2) + - static_cast(LIBC_NAMESPACE::testutils::rand()); + bits = (bits << 2) + static_cast(LIBC_NAMESPACE::rand()); } return bits; } @@ -77,6 +77,7 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::Test { void test_subnormal_range(Func func) { constexpr StorageType COUNT = 100'001; constexpr StorageType STEP = (MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT; + LIBC_NAMESPACE::srand(1); for (StorageType v = MIN_SUBNORMAL, w = MAX_SUBNORMAL; v <= MAX_SUBNORMAL && w >= MIN_SUBNORMAL; v += STEP, w -= STEP) { T x = FPBits(get_random_bit_pattern()).get_val(), y = FPBits(v).get_val(), @@ -90,6 +91,7 @@ class FmaTestTemplate : public LIBC_NAMESPACE::testing::Test { void test_normal_range(Func func) { constexpr StorageType COUNT = 100'001; constexpr StorageType STEP = (MAX_NORMAL - MIN_NORMAL) / COUNT; + LIBC_NAMESPACE::srand(1); for (StorageType v = MIN_NORMAL, w = MAX_NORMAL; v <= MAX_NORMAL && w >= MIN_NORMAL; v += STEP, w -= STEP) { T x = FPBits(v).get_val(), y = FPBits(w).get_val(), diff --git a/libc/test/src/math/FrexpTest.h b/libc/test/src/math/FrexpTest.h index f971b45628f09..5f993f604999d 100644 --- a/libc/test/src/math/FrexpTest.h +++ b/libc/test/src/math/FrexpTest.h @@ -11,7 +11,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h index df69965d5dbce..0c15f02fe3719 100644 --- a/libc/test/src/math/HypotTest.h +++ b/libc/test/src/math/HypotTest.h @@ -14,7 +14,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/ILogbTest.h b/libc/test/src/math/ILogbTest.h index ad47b9bb3961f..3d1f047a48061 100644 --- a/libc/test/src/math/ILogbTest.h +++ b/libc/test/src/math/ILogbTest.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_ILOGBTEST_H #define LLVM_LIBC_TEST_SRC_MATH_ILOGBTEST_H -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/limits.h" // INT_MAX #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/ManipulationFunctions.h" diff --git a/libc/test/src/math/LdExpTest.h b/libc/test/src/math/LdExpTest.h index 8bfd022973b44..2a406feed52fc 100644 --- a/libc/test/src/math/LdExpTest.h +++ b/libc/test/src/math/LdExpTest.h @@ -15,7 +15,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include template diff --git a/libc/test/src/math/LogbTest.h b/libc/test/src/math/LogbTest.h index 3859b56582e5e..f066d5f9de02b 100644 --- a/libc/test/src/math/LogbTest.h +++ b/libc/test/src/math/LogbTest.h @@ -11,7 +11,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/ModfTest.h b/libc/test/src/math/ModfTest.h index 84e26db49695d..49b0328753b3b 100644 --- a/libc/test/src/math/ModfTest.h +++ b/libc/test/src/math/ModfTest.h @@ -12,7 +12,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/NextAfterTest.h b/libc/test/src/math/NextAfterTest.h index 05803fb45ee27..a7248dd7042d4 100644 --- a/libc/test/src/math/NextAfterTest.h +++ b/libc/test/src/math/NextAfterTest.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTAFTERTEST_H #define LLVM_LIBC_TEST_SRC_MATH_NEXTAFTERTEST_H -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/BasicOperations.h" diff --git a/libc/test/src/math/RIntTest.h b/libc/test/src/math/RIntTest.h index 301655c64ed36..c706ff18f186e 100644 --- a/libc/test/src/math/RIntTest.h +++ b/libc/test/src/math/RIntTest.h @@ -15,8 +15,8 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" -#include +#include "hdr/fenv_macros.h" +#include "hdr/math_macros.h" #include namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/RemQuoTest.h b/libc/test/src/math/RemQuoTest.h index 1cb8cdbe81a22..677772dd9fccf 100644 --- a/libc/test/src/math/RemQuoTest.h +++ b/libc/test/src/math/RemQuoTest.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_REMQUOTEST_H #define LLVM_LIBC_TEST_SRC_MATH_REMQUOTEST_H -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/RoundEvenTest.h b/libc/test/src/math/RoundEvenTest.h new file mode 100644 index 0000000000000..68b8b9ae1d964 --- /dev/null +++ b/libc/test/src/math/RoundEvenTest.h @@ -0,0 +1,92 @@ +//===-- Utility class to test roundeven[f|l] --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_SRC_MATH_ROUNDEVENTEST_H +#define LLVM_LIBC_TEST_SRC_MATH_ROUNDEVENTEST_H + +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +#include "hdr/math_macros.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +template +class RoundEvenTest : public LIBC_NAMESPACE::testing::Test { + + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef T (*RoundEvenFunc)(T); + + void testSpecialNumbers(RoundEvenFunc func) { + EXPECT_FP_EQ(zero, func(zero)); + EXPECT_FP_EQ(neg_zero, func(neg_zero)); + + EXPECT_FP_EQ(inf, func(inf)); + EXPECT_FP_EQ(neg_inf, func(neg_inf)); + + EXPECT_FP_EQ(aNaN, func(aNaN)); + } + + void testRoundedNumbers(RoundEvenFunc func) { + EXPECT_FP_EQ(T(1.0), func(T(1.0))); + EXPECT_FP_EQ(T(-1.0), func(T(-1.0))); + EXPECT_FP_EQ(T(10.0), func(T(10.0))); + EXPECT_FP_EQ(T(-10.0), func(T(-10.0))); + EXPECT_FP_EQ(T(1234.0), func(T(1234.0))); + EXPECT_FP_EQ(T(-1234.0), func(T(-1234.0))); + } + + void testFractions(RoundEvenFunc func) { + EXPECT_FP_EQ(T(0.0), func(T(0.5))); + EXPECT_FP_EQ(T(-0.0), func(T(-0.5))); + EXPECT_FP_EQ(T(0.0), func(T(0.115))); + EXPECT_FP_EQ(T(-0.0), func(T(-0.115))); + EXPECT_FP_EQ(T(1.0), func(T(0.715))); + EXPECT_FP_EQ(T(-1.0), func(T(-0.715))); + EXPECT_FP_EQ(T(1.0), func(T(1.3))); + EXPECT_FP_EQ(T(-1.0), func(T(-1.3))); + EXPECT_FP_EQ(T(2.0), func(T(1.5))); + EXPECT_FP_EQ(T(-2.0), func(T(-1.5))); + EXPECT_FP_EQ(T(2.0), func(T(1.75))); + EXPECT_FP_EQ(T(-2.0), func(T(-1.75))); + EXPECT_FP_EQ(T(11.0), func(T(10.65))); + EXPECT_FP_EQ(T(-11.0), func(T(-10.65))); + EXPECT_FP_EQ(T(1233.0), func(T(1233.25))); + EXPECT_FP_EQ(T(1234.0), func(T(1233.50))); + EXPECT_FP_EQ(T(1234.0), func(T(1233.75))); + EXPECT_FP_EQ(T(-1233.0), func(T(-1233.25))); + EXPECT_FP_EQ(T(-1234.0), func(T(-1233.50))); + EXPECT_FP_EQ(T(-1234.0), func(T(-1233.75))); + EXPECT_FP_EQ(T(1234.0), func(T(1234.50))); + EXPECT_FP_EQ(T(-1234.0), func(T(-1234.50))); + } + + void testRange(RoundEvenFunc func) { + constexpr StorageType COUNT = 100'000; + constexpr StorageType STEP = STORAGE_MAX / COUNT; + for (StorageType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + T x = FPBits(v).get_val(); + if (isnan(x) || isinf(x)) + continue; + + ASSERT_MPFR_MATCH(mpfr::Operation::RoundEven, x, func(x), 0.0); + } + } +}; + +#define LIST_ROUNDEVEN_TESTS(T, func) \ + using LlvmLibcRoundEvenTest = RoundEvenTest; \ + TEST_F(LlvmLibcRoundEvenTest, SpecialNumbers) { testSpecialNumbers(&func); } \ + TEST_F(LlvmLibcRoundEvenTest, RoundedNubmers) { testRoundedNumbers(&func); } \ + TEST_F(LlvmLibcRoundEvenTest, Fractions) { testFractions(&func); } \ + TEST_F(LlvmLibcRoundEvenTest, Range) { testRange(&func); } + +#endif // LLVM_LIBC_TEST_SRC_MATH_ROUNDEVENTEST_H diff --git a/libc/test/src/math/RoundTest.h b/libc/test/src/math/RoundTest.h index 17da00f869d3b..eecf95982729b 100644 --- a/libc/test/src/math/RoundTest.h +++ b/libc/test/src/math/RoundTest.h @@ -13,7 +13,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/RoundToIntegerTest.h b/libc/test/src/math/RoundToIntegerTest.h index d2fabd0b4c9c3..7c93451235f29 100644 --- a/libc/test/src/math/RoundToIntegerTest.h +++ b/libc/test/src/math/RoundToIntegerTest.h @@ -15,7 +15,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/SqrtTest.h b/libc/test/src/math/SqrtTest.h index 9811b2767ee33..799b7862a3726 100644 --- a/libc/test/src/math/SqrtTest.h +++ b/libc/test/src/math/SqrtTest.h @@ -11,7 +11,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/TruncTest.h b/libc/test/src/math/TruncTest.h index c3a89dbb837b5..57c953fad8742 100644 --- a/libc/test/src/math/TruncTest.h +++ b/libc/test/src/math/TruncTest.h @@ -13,7 +13,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/math/acosf_test.cpp b/libc/test/src/math/acosf_test.cpp index 6f8321bd7182a..0d25a808e0bf3 100644 --- a/libc/test/src/math/acosf_test.cpp +++ b/libc/test/src/math/acosf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/acosf.h" diff --git a/libc/test/src/math/acoshf_test.cpp b/libc/test/src/math/acoshf_test.cpp index 41d1166fb430d..32761e25b5ce5 100644 --- a/libc/test/src/math/acoshf_test.cpp +++ b/libc/test/src/math/acoshf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/acoshf.h" diff --git a/libc/test/src/math/asinf_test.cpp b/libc/test/src/math/asinf_test.cpp index 4e36f03f48955..91e61085e91b8 100644 --- a/libc/test/src/math/asinf_test.cpp +++ b/libc/test/src/math/asinf_test.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/asinf.h" diff --git a/libc/test/src/math/asinhf_test.cpp b/libc/test/src/math/asinhf_test.cpp index 9a3bfbed1068d..b19e26efd07bf 100644 --- a/libc/test/src/math/asinhf_test.cpp +++ b/libc/test/src/math/asinhf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/asinhf.h" diff --git a/libc/test/src/math/atan2f_test.cpp b/libc/test/src/math/atan2f_test.cpp index 343e7601b0392..1242b7e66528f 100644 --- a/libc/test/src/math/atan2f_test.cpp +++ b/libc/test/src/math/atan2f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/atan2f.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/atanf_test.cpp b/libc/test/src/math/atanf_test.cpp index 58b0eadd63f8d..4fa7badaf7368 100644 --- a/libc/test/src/math/atanf_test.cpp +++ b/libc/test/src/math/atanf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/atanf.h" diff --git a/libc/test/src/math/atanhf_test.cpp b/libc/test/src/math/atanhf_test.cpp index c659f17d13b0a..7fc8c70d13867 100644 --- a/libc/test/src/math/atanhf_test.cpp +++ b/libc/test/src/math/atanhf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/atanhf.h" diff --git a/libc/test/src/math/cos_test.cpp b/libc/test/src/math/cos_test.cpp index 6a1122997c51a..9a39616ed16f8 100644 --- a/libc/test/src/math/cos_test.cpp +++ b/libc/test/src/math/cos_test.cpp @@ -11,7 +11,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" using LlvmLibcCosTest = LIBC_NAMESPACE::testing::FPTest; diff --git a/libc/test/src/math/cosf_test.cpp b/libc/test/src/math/cosf_test.cpp index 8a5eb17fdcea5..dab35fa1a9fe7 100644 --- a/libc/test/src/math/cosf_test.cpp +++ b/libc/test/src/math/cosf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/cosf.h" diff --git a/libc/test/src/math/coshf_test.cpp b/libc/test/src/math/coshf_test.cpp index 8792f56b03461..7c5d6630e1093 100644 --- a/libc/test/src/math/coshf_test.cpp +++ b/libc/test/src/math/coshf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" diff --git a/libc/test/src/math/erff_test.cpp b/libc/test/src/math/erff_test.cpp index 1e43c206aef0d..5c848d7d5bf7d 100644 --- a/libc/test/src/math/erff_test.cpp +++ b/libc/test/src/math/erff_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/erff.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt index 6b2f3dddcadd2..938e519aff084 100644 --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -16,7 +16,6 @@ add_fp_unittest( sqrtf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.sqrtf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -33,7 +32,6 @@ add_fp_unittest( sinf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.sinf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -50,7 +48,6 @@ add_fp_unittest( cosf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.cosf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -67,7 +64,6 @@ add_fp_unittest( sincosf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.sincosf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -84,7 +80,6 @@ add_fp_unittest( tanf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.tanf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -101,7 +96,6 @@ add_fp_unittest( erff_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.erff libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -118,7 +112,6 @@ add_fp_unittest( expf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.expf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -135,7 +128,6 @@ add_fp_unittest( exp2f_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.exp2f libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -167,7 +159,6 @@ add_fp_unittest( exp10f_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.exp10f libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -184,7 +175,6 @@ add_fp_unittest( expm1f_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.expm1f libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -201,7 +191,6 @@ add_fp_unittest( logf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.logf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -218,7 +207,6 @@ add_fp_unittest( log10f_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.log10f libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -235,7 +223,6 @@ add_fp_unittest( log1pf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.log1pf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -252,7 +239,6 @@ add_fp_unittest( log2f_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.log2f libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -271,7 +257,6 @@ add_fp_unittest( -O3 DEPENDS .exhaustive_test - libc.include.math libc.src.math.hypotf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -302,7 +287,6 @@ add_fp_unittest( coshf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.coshf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -319,7 +303,6 @@ add_fp_unittest( sinhf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.sinhf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -336,7 +319,6 @@ add_fp_unittest( tanhf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.tanhf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -353,7 +335,6 @@ add_fp_unittest( acoshf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.acoshf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -370,7 +351,6 @@ add_fp_unittest( asinhf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.asinhf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -387,7 +367,6 @@ add_fp_unittest( atanhf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.atanhf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -404,7 +383,6 @@ add_fp_unittest( atanf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.atanf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -421,7 +399,6 @@ add_fp_unittest( asinf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.asinf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES @@ -438,7 +415,6 @@ add_fp_unittest( acosf_test.cpp DEPENDS .exhaustive_test - libc.include.math libc.src.math.acosf libc.src.__support.FPUtil.fp_bits LINK_LIBRARIES diff --git a/libc/test/src/math/exp10_test.cpp b/libc/test/src/math/exp10_test.cpp index 778189626a617..4cbdd169d8032 100644 --- a/libc/test/src/math/exp10_test.cpp +++ b/libc/test/src/math/exp10_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/exp10.h" diff --git a/libc/test/src/math/exp10f_test.cpp b/libc/test/src/math/exp10f_test.cpp index 9d44e8f65decc..e9b2786681042 100644 --- a/libc/test/src/math/exp10f_test.cpp +++ b/libc/test/src/math/exp10f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/exp10f.h" diff --git a/libc/test/src/math/exp2_test.cpp b/libc/test/src/math/exp2_test.cpp index 845fda5451d4b..73232ed36077b 100644 --- a/libc/test/src/math/exp2_test.cpp +++ b/libc/test/src/math/exp2_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/exp2.h" diff --git a/libc/test/src/math/exp2f_test.cpp b/libc/test/src/math/exp2f_test.cpp index f63f091eab9a8..8ff0ce6a6e724 100644 --- a/libc/test/src/math/exp2f_test.cpp +++ b/libc/test/src/math/exp2f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA #include "src/errno/libc_errno.h" diff --git a/libc/test/src/math/exp2m1f_test.cpp b/libc/test/src/math/exp2m1f_test.cpp index a0f0da8681174..cb948289b6179 100644 --- a/libc/test/src/math/exp2m1f_test.cpp +++ b/libc/test/src/math/exp2m1f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" diff --git a/libc/test/src/math/exp_test.cpp b/libc/test/src/math/exp_test.cpp index 42018e608ae45..64d8198e64f2d 100644 --- a/libc/test/src/math/exp_test.cpp +++ b/libc/test/src/math/exp_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/exp.h" diff --git a/libc/test/src/math/expf_test.cpp b/libc/test/src/math/expf_test.cpp index 634958bdc43e5..1dce381918eb6 100644 --- a/libc/test/src/math/expf_test.cpp +++ b/libc/test/src/math/expf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/expf.h" diff --git a/libc/test/src/math/explogxf_test.cpp b/libc/test/src/math/explogxf_test.cpp index a536a9f3ab8de..bcca87f590d75 100644 --- a/libc/test/src/math/explogxf_test.cpp +++ b/libc/test/src/math/explogxf_test.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "hdr/math_macros.h" #include "in_float_range_test_helper.h" -#include "include/llvm-libc-macros/math-macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/fabs.h" #include "src/math/fabsf.h" diff --git a/libc/test/src/math/expm1_test.cpp b/libc/test/src/math/expm1_test.cpp index 198e6d5cdd8ab..1bf07f19f3a7c 100644 --- a/libc/test/src/math/expm1_test.cpp +++ b/libc/test/src/math/expm1_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/expm1.h" diff --git a/libc/test/src/math/expm1f_test.cpp b/libc/test/src/math/expm1f_test.cpp index c72815887ba8b..515f988b62649 100644 --- a/libc/test/src/math/expm1f_test.cpp +++ b/libc/test/src/math/expm1f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/expm1f.h" diff --git a/libc/test/src/math/fdim_test.cpp b/libc/test/src/math/fdim_test.cpp index 6c0c3e204c5f9..1e8adf036ddea 100644 --- a/libc/test/src/math/fdim_test.cpp +++ b/libc/test/src/math/fdim_test.cpp @@ -8,7 +8,7 @@ #include "FDimTest.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/fdim.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/fdimf_test.cpp b/libc/test/src/math/fdimf_test.cpp index a74011b5a2249..13e61d9082da4 100644 --- a/libc/test/src/math/fdimf_test.cpp +++ b/libc/test/src/math/fdimf_test.cpp @@ -8,7 +8,7 @@ #include "FDimTest.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/fdimf.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/fdiml_test.cpp b/libc/test/src/math/fdiml_test.cpp index d3f2e68a7c1d7..2d99d2134c1c0 100644 --- a/libc/test/src/math/fdiml_test.cpp +++ b/libc/test/src/math/fdiml_test.cpp @@ -8,7 +8,7 @@ #include "FDimTest.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/fdiml.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/generic/CMakeLists.txt b/libc/test/src/math/generic/CMakeLists.txt index bc2b5cea89d12..1fe7801941d5a 100644 --- a/libc/test/src/math/generic/CMakeLists.txt +++ b/libc/test/src/math/generic/CMakeLists.txt @@ -6,7 +6,6 @@ add_fp_unittest( SRCS ../ceil_test.cpp DEPENDS - libc.include.math libc.src.math.generic.ceil ) @@ -18,7 +17,6 @@ add_fp_unittest( SRCS ../ceilf_test.cpp DEPENDS - libc.include.math libc.src.math.generic.ceilf ) @@ -30,7 +28,6 @@ add_fp_unittest( SRCS ../ceill_test.cpp DEPENDS - libc.include.math libc.src.math.generic.ceill ) diff --git a/libc/test/src/math/ilogb_test.cpp b/libc/test/src/math/ilogb_test.cpp index 45756ffa3d9a7..c8daf2e0adafe 100644 --- a/libc/test/src/math/ilogb_test.cpp +++ b/libc/test/src/math/ilogb_test.cpp @@ -8,7 +8,7 @@ #include "ILogbTest.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/math/ilogb.h" diff --git a/libc/test/src/math/ilogbf_test.cpp b/libc/test/src/math/ilogbf_test.cpp index ff19dd145a198..87a2789f6c110 100644 --- a/libc/test/src/math/ilogbf_test.cpp +++ b/libc/test/src/math/ilogbf_test.cpp @@ -8,7 +8,7 @@ #include "ILogbTest.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/math/ilogbf.h" diff --git a/libc/test/src/math/ilogbl_test.cpp b/libc/test/src/math/ilogbl_test.cpp index b2c5246669946..042a803b024a7 100644 --- a/libc/test/src/math/ilogbl_test.cpp +++ b/libc/test/src/math/ilogbl_test.cpp @@ -8,7 +8,7 @@ #include "ILogbTest.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/ManipulationFunctions.h" #include "src/math/ilogbl.h" diff --git a/libc/test/src/math/log10_test.cpp b/libc/test/src/math/log10_test.cpp index dc4ac895546c4..fd9a615ca87f7 100644 --- a/libc/test/src/math/log10_test.cpp +++ b/libc/test/src/math/log10_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log10.h" diff --git a/libc/test/src/math/log10f_test.cpp b/libc/test/src/math/log10f_test.cpp index f8a137e44c351..4ba118455df4d 100644 --- a/libc/test/src/math/log10f_test.cpp +++ b/libc/test/src/math/log10f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/log10f.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/log1p_test.cpp b/libc/test/src/math/log1p_test.cpp index 975fb8e05c35e..47dfa406ec257 100644 --- a/libc/test/src/math/log1p_test.cpp +++ b/libc/test/src/math/log1p_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log1p.h" @@ -34,7 +34,7 @@ TEST_F(LlvmLibcLog1pTest, SpecialNumbers) { } TEST_F(LlvmLibcLog1pTest, TrickyInputs) { - constexpr int N = 41; + constexpr int N = 42; constexpr uint64_t INPUTS[N] = { 0x3ff0000000000000, // x = 1.0 0x4024000000000000, // x = 10.0 @@ -65,6 +65,7 @@ TEST_F(LlvmLibcLog1pTest, TrickyInputs) { 0x3c90c40cef04efb5, 0x449d2ccad399848e, 0x4aa12ccdffd9d2ec, 0x5656f070b92d36ce, 0x6db06dcb74f76bcc, 0x7f1954e72ffd4596, 0x5671e2f1628093e4, 0x73dac56e2bf1a951, 0x8001bc6879ea14c5, + 0x45ca5f497ec291df, // x = 0x1.a5f497ec291dfp+93 }; for (int i = 0; i < N; ++i) { double x = FPBits(INPUTS[i]).get_val(); diff --git a/libc/test/src/math/log1pf_test.cpp b/libc/test/src/math/log1pf_test.cpp index a1108fee48196..db0772d3c8b87 100644 --- a/libc/test/src/math/log1pf_test.cpp +++ b/libc/test/src/math/log1pf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log1pf.h" diff --git a/libc/test/src/math/log2_test.cpp b/libc/test/src/math/log2_test.cpp index 8765279005798..9992c1340e99d 100644 --- a/libc/test/src/math/log2_test.cpp +++ b/libc/test/src/math/log2_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log2.h" diff --git a/libc/test/src/math/log2f_test.cpp b/libc/test/src/math/log2f_test.cpp index c05b6b93cff77..24b51adac94d1 100644 --- a/libc/test/src/math/log2f_test.cpp +++ b/libc/test/src/math/log2f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log2f.h" diff --git a/libc/test/src/math/log_test.cpp b/libc/test/src/math/log_test.cpp index 06a0dc574be51..de1e59579419e 100644 --- a/libc/test/src/math/log_test.cpp +++ b/libc/test/src/math/log_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log.h" diff --git a/libc/test/src/math/logf_test.cpp b/libc/test/src/math/logf_test.cpp index 1ab480744ba59..28a171d540665 100644 --- a/libc/test/src/math/logf_test.cpp +++ b/libc/test/src/math/logf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/logf.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/powf_test.cpp b/libc/test/src/math/powf_test.cpp index cf674ecf8f99e..69135593cd32c 100644 --- a/libc/test/src/math/powf_test.cpp +++ b/libc/test/src/math/powf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/powf.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/roundeven_test.cpp b/libc/test/src/math/roundeven_test.cpp new file mode 100644 index 0000000000000..cd1a7bf2429fb --- /dev/null +++ b/libc/test/src/math/roundeven_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for roundeven -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundEvenTest.h" + +#include "src/math/roundeven.h" + +LIST_ROUNDEVEN_TESTS(double, LIBC_NAMESPACE::roundeven) diff --git a/libc/test/src/math/roundevenf_test.cpp b/libc/test/src/math/roundevenf_test.cpp new file mode 100644 index 0000000000000..68dff9b3eca9d --- /dev/null +++ b/libc/test/src/math/roundevenf_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for roundevenf ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundEvenTest.h" + +#include "src/math/roundevenf.h" + +LIST_ROUNDEVEN_TESTS(float, LIBC_NAMESPACE::roundevenf) diff --git a/libc/test/src/math/roundevenl_test.cpp b/libc/test/src/math/roundevenl_test.cpp new file mode 100644 index 0000000000000..f4031bd65ec21 --- /dev/null +++ b/libc/test/src/math/roundevenl_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for roundevenl ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundEvenTest.h" + +#include "src/math/roundevenl.h" + +LIST_ROUNDEVEN_TESTS(long double, LIBC_NAMESPACE::roundevenl) diff --git a/libc/test/src/math/sin_test.cpp b/libc/test/src/math/sin_test.cpp index fa1c5370c30fb..0171b79810d4e 100644 --- a/libc/test/src/math/sin_test.cpp +++ b/libc/test/src/math/sin_test.cpp @@ -12,7 +12,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" using LlvmLibcSinTest = LIBC_NAMESPACE::testing::FPTest; diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp index a7372fd53b319..7c359b345f4c3 100644 --- a/libc/test/src/math/sincosf_test.cpp +++ b/libc/test/src/math/sincosf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/sincosf.h" diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp index a3c5384e3e626..6a8f8f4ee4288 100644 --- a/libc/test/src/math/sinf_test.cpp +++ b/libc/test/src/math/sinf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/sinf.h" diff --git a/libc/test/src/math/sinhf_test.cpp b/libc/test/src/math/sinhf_test.cpp index bea976055dbdf..cc0552f728947 100644 --- a/libc/test/src/math/sinhf_test.cpp +++ b/libc/test/src/math/sinhf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 4ac1842cf5fae..22c59c97f6c7f 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -62,7 +62,6 @@ add_fp_unittest( HDRS FAbsTest.h DEPENDS - libc.include.math libc.src.math.fabs libc.src.__support.FPUtil.fp_bits ) @@ -76,7 +75,6 @@ add_fp_unittest( HDRS FAbsTest.h DEPENDS - libc.include.math libc.src.math.fabsf libc.src.__support.FPUtil.fp_bits ) @@ -90,7 +88,6 @@ add_fp_unittest( HDRS FAbsTest.h DEPENDS - libc.include.math libc.src.math.fabsl libc.src.__support.FPUtil.fp_bits ) @@ -104,7 +101,6 @@ add_fp_unittest( HDRS FAbsTest.h DEPENDS - libc.include.math libc.src.math.fabsf128 libc.src.__support.FPUtil.fp_bits ) @@ -118,7 +114,6 @@ add_fp_unittest( HDRS TruncTest.h DEPENDS - libc.include.math libc.src.math.trunc libc.src.__support.FPUtil.fp_bits ) @@ -132,7 +127,6 @@ add_fp_unittest( HDRS TruncTest.h DEPENDS - libc.include.math libc.src.math.truncf libc.src.__support.FPUtil.fp_bits ) @@ -146,7 +140,6 @@ add_fp_unittest( HDRS TruncTest.h DEPENDS - libc.include.math libc.src.math.truncl libc.src.__support.FPUtil.fp_bits ) @@ -160,7 +153,6 @@ add_fp_unittest( HDRS TruncTest.h DEPENDS - libc.include.math libc.src.math.truncf128 libc.src.__support.FPUtil.fp_bits ) @@ -174,7 +166,6 @@ add_fp_unittest( HDRS CanonicalizeTest.h DEPENDS - libc.include.math libc.src.math.canonicalize libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.fenv_impl @@ -190,7 +181,6 @@ add_fp_unittest( HDRS CanonicalizeTest.h DEPENDS - libc.include.math libc.src.math.canonicalizef libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.fenv_impl @@ -206,7 +196,6 @@ add_fp_unittest( HDRS CanonicalizeTest.h DEPENDS - libc.include.math libc.src.math.canonicalizef128 libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.fenv_impl @@ -222,7 +211,6 @@ add_fp_unittest( HDRS CanonicalizeTest.h DEPENDS - libc.include.math libc.src.math.canonicalizel libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.fenv_impl @@ -238,7 +226,6 @@ add_fp_unittest( HDRS CeilTest.h DEPENDS - libc.include.math libc.src.math.ceil libc.src.__support.FPUtil.fp_bits ) @@ -252,7 +239,6 @@ add_fp_unittest( HDRS CeilTest.h DEPENDS - libc.include.math libc.src.math.ceilf libc.src.__support.FPUtil.fp_bits ) @@ -266,7 +252,6 @@ add_fp_unittest( HDRS CeilTest.h DEPENDS - libc.include.math libc.src.math.ceill libc.src.__support.FPUtil.fp_bits ) @@ -280,7 +265,6 @@ add_fp_unittest( HDRS CeilTest.h DEPENDS - libc.include.math libc.src.math.ceilf128 libc.src.__support.FPUtil.fp_bits ) @@ -294,7 +278,6 @@ add_fp_unittest( HDRS FloorTest.h DEPENDS - libc.include.math libc.src.math.floor libc.src.__support.FPUtil.fp_bits ) @@ -308,7 +291,6 @@ add_fp_unittest( HDRS FloorTest.h DEPENDS - libc.include.math libc.src.math.floorf libc.src.__support.FPUtil.fp_bits ) @@ -322,7 +304,6 @@ add_fp_unittest( HDRS FloorTest.h DEPENDS - libc.include.math libc.src.math.floorl libc.src.__support.FPUtil.fp_bits ) @@ -336,7 +317,6 @@ add_fp_unittest( HDRS FloorTest.h DEPENDS - libc.include.math libc.src.math.floorf128 libc.src.__support.FPUtil.fp_bits ) @@ -350,7 +330,6 @@ add_fp_unittest( HDRS RoundTest.h DEPENDS - libc.include.math libc.src.math.round libc.src.__support.FPUtil.fp_bits ) @@ -364,7 +343,6 @@ add_fp_unittest( HDRS RoundTest.h DEPENDS - libc.include.math libc.src.math.roundf libc.src.__support.FPUtil.fp_bits ) @@ -378,7 +356,6 @@ add_fp_unittest( HDRS RoundTest.h DEPENDS - libc.include.math libc.src.math.roundl libc.src.__support.FPUtil.fp_bits ) @@ -392,11 +369,62 @@ add_fp_unittest( HDRS RoundTest.h DEPENDS - libc.include.math libc.src.math.roundf128 libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + roundeven_test + SUITE + libc-math-smoke-tests + SRCS + roundeven_test.cpp + HDRS + RoundEvenTest.h + DEPENDS + libc.src.math.roundeven + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + roundevenf_test + SUITE + libc-math-smoke-tests + SRCS + roundevenf_test.cpp + HDRS + RoundEvenTest.h + DEPENDS + libc.src.math.roundevenf + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + roundevenl_test + SUITE + libc-math-smoke-tests + SRCS + roundevenl_test.cpp + HDRS + RoundEvenTest.h + DEPENDS + libc.src.math.roundevenl + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( + roundevenf128_test + SUITE + libc-math-smoke-tests + SRCS + roundevenf128_test.cpp + HDRS + RoundEvenTest.h + DEPENDS + libc.src.math.roundevenf128 + libc.src.__support.FPUtil.fp_bits +) + add_fp_unittest( lround_test SUITE @@ -406,7 +434,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -424,7 +451,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -442,7 +468,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -460,7 +485,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -478,7 +502,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -496,7 +519,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -514,7 +536,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -532,7 +553,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.fenv.feclearexcept libc.src.fenv.feraiseexcept @@ -550,7 +570,6 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS - libc.include.math libc.src.math.rint libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -565,7 +584,6 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS - libc.include.math libc.src.math.rintf libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -580,7 +598,6 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS - libc.include.math libc.src.math.rintl libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -595,7 +612,6 @@ add_fp_unittest( HDRS RIntTest.h DEPENDS - libc.include.math libc.src.math.rintf128 libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -610,7 +626,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.lrint libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -625,7 +640,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.lrintf libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -640,7 +654,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.lrintl libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -655,7 +668,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.lrintf128 libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -670,7 +682,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.llrint libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -685,7 +696,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.llrintf libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -700,7 +710,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.llrintl libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -715,7 +724,6 @@ add_fp_unittest( HDRS RoundToIntegerTest.h DEPENDS - libc.include.math libc.src.math.llrintf128 libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits @@ -729,7 +737,6 @@ add_fp_unittest( expf_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.expf libc.src.__support.FPUtil.fp_bits ) @@ -742,7 +749,6 @@ add_fp_unittest( exp_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp libc.src.__support.FPUtil.fp_bits ) @@ -755,7 +761,6 @@ add_fp_unittest( exp2f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp2f libc.src.__support.FPUtil.fp_bits ) @@ -768,7 +773,6 @@ add_fp_unittest( exp2_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp2 libc.src.__support.FPUtil.fp_bits ) @@ -792,7 +796,6 @@ add_fp_unittest( exp10f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp10f libc.src.__support.FPUtil.fp_bits ) @@ -805,7 +808,6 @@ add_fp_unittest( exp10_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.exp10 libc.src.__support.FPUtil.fp_bits ) @@ -819,7 +821,6 @@ add_fp_unittest( HDRS CopySignTest.h DEPENDS - libc.include.math libc.src.math.copysign libc.src.__support.FPUtil.fp_bits ) @@ -833,7 +834,6 @@ add_fp_unittest( HDRS CopySignTest.h DEPENDS - libc.include.math libc.src.math.copysignf libc.src.__support.FPUtil.fp_bits ) @@ -847,7 +847,6 @@ add_fp_unittest( HDRS CopySignTest.h DEPENDS - libc.include.math libc.src.math.copysignl libc.src.__support.FPUtil.fp_bits ) @@ -861,7 +860,6 @@ add_fp_unittest( HDRS CopySignTest.h DEPENDS - libc.include.math libc.src.math.copysignf128 libc.src.__support.FPUtil.fp_bits ) @@ -1341,7 +1339,6 @@ add_fp_unittest( HDRS ModfTest.h DEPENDS - libc.include.math libc.src.math.modf libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.nearest_integer_operations @@ -1356,7 +1353,6 @@ add_fp_unittest( HDRS ModfTest.h DEPENDS - libc.include.math libc.src.math.modff libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.nearest_integer_operations @@ -1371,7 +1367,6 @@ add_fp_unittest( HDRS ModfTest.h DEPENDS - libc.include.math libc.src.math.modfl libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.nearest_integer_operations @@ -1386,7 +1381,6 @@ add_fp_unittest( HDRS ModfTest.h DEPENDS - libc.include.math libc.src.math.modff128 libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.nearest_integer_operations @@ -1977,7 +1971,6 @@ add_fp_unittest( SRCS sqrtf_test.cpp DEPENDS - libc.include.math libc.src.math.sqrtf libc.src.__support.FPUtil.fp_bits ) @@ -1989,7 +1982,6 @@ add_fp_unittest( SRCS sqrt_test.cpp DEPENDS - libc.include.math libc.src.math.sqrt libc.src.__support.FPUtil.fp_bits ) @@ -2001,7 +1993,6 @@ add_fp_unittest( SRCS sqrtl_test.cpp DEPENDS - libc.include.math libc.src.math.sqrtl libc.src.__support.FPUtil.fp_bits ) @@ -2013,7 +2004,6 @@ add_fp_unittest( SRCS sqrtf128_test.cpp DEPENDS - libc.include.math libc.src.math.sqrtf128 libc.src.__support.FPUtil.fp_bits ) @@ -2083,7 +2073,6 @@ add_fp_unittest( HDRS RemQuoTest.h DEPENDS - libc.include.math libc.src.math.remquof libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2098,7 +2087,6 @@ add_fp_unittest( HDRS RemQuoTest.h DEPENDS - libc.include.math libc.src.math.remquo libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2113,7 +2101,6 @@ add_fp_unittest( HDRS RemQuoTest.h DEPENDS - libc.include.math libc.src.math.remquol libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2126,7 +2113,6 @@ add_fp_unittest( SRCS hypotf_test.cpp DEPENDS - libc.include.math libc.src.math.hypotf libc.src.__support.FPUtil.fp_bits ) @@ -2138,7 +2124,6 @@ add_fp_unittest( SRCS hypot_test.cpp DEPENDS - libc.include.math libc.src.math.hypot libc.src.__support.FPUtil.fp_bits ) @@ -2150,7 +2135,6 @@ add_fp_unittest( SRCS nanf_test.cpp DEPENDS - libc.include.math libc.include.signal libc.src.math.nanf libc.src.__support.FPUtil.fp_bits @@ -2166,7 +2150,6 @@ add_fp_unittest( SRCS nan_test.cpp DEPENDS - libc.include.math libc.include.signal libc.src.math.nan libc.src.__support.FPUtil.fp_bits @@ -2182,7 +2165,6 @@ add_fp_unittest( SRCS nanl_test.cpp DEPENDS - libc.include.math libc.include.signal libc.src.math.nanl libc.src.__support.FPUtil.fp_bits @@ -2198,7 +2180,6 @@ add_fp_unittest( SRCS nanf128_test.cpp DEPENDS - libc.include.math libc.include.signal libc.src.math.nanf128 libc.src.__support.FPUtil.fp_bits @@ -2216,7 +2197,6 @@ add_fp_unittest( HDRS NextAfterTest.h DEPENDS - libc.include.math libc.src.math.nextafter libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2231,7 +2211,6 @@ add_fp_unittest( HDRS NextAfterTest.h DEPENDS - libc.include.math libc.src.math.nextafterf libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2246,7 +2225,6 @@ add_fp_unittest( HDRS NextAfterTest.h DEPENDS - libc.include.math libc.src.math.nextafterl libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2261,7 +2239,6 @@ add_fp_unittest( HDRS NextAfterTest.h DEPENDS - libc.include.math libc.src.math.nextafterf128 libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2278,7 +2255,6 @@ if(NOT LIBC_TARGET_OS_IS_GPU) HDRS NextTowardTest.h DEPENDS - libc.include.math libc.src.math.nexttoward libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2293,7 +2269,6 @@ if(NOT LIBC_TARGET_OS_IS_GPU) HDRS NextTowardTest.h DEPENDS - libc.include.math libc.src.math.nexttowardf libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2309,7 +2284,6 @@ add_fp_unittest( HDRS NextTowardTest.h DEPENDS - libc.include.math libc.src.math.nexttowardl libc.src.__support.FPUtil.basic_operations libc.src.__support.FPUtil.fp_bits @@ -2324,7 +2298,6 @@ add_fp_unittest( HDRS NextDownTest.h DEPENDS - libc.include.math libc.src.math.nextdown libc.src.__support.FPUtil.manipulation_functions ) @@ -2338,7 +2311,6 @@ add_fp_unittest( HDRS NextDownTest.h DEPENDS - libc.include.math libc.src.math.nextdownf libc.src.__support.FPUtil.manipulation_functions ) @@ -2352,7 +2324,6 @@ add_fp_unittest( HDRS NextDownTest.h DEPENDS - libc.include.math libc.src.math.nextdownl libc.src.__support.FPUtil.manipulation_functions ) @@ -2366,7 +2337,6 @@ add_fp_unittest( HDRS NextDownTest.h DEPENDS - libc.include.math libc.src.math.nextdownf128 libc.src.__support.FPUtil.manipulation_functions ) @@ -2380,7 +2350,6 @@ add_fp_unittest( HDRS NextUpTest.h DEPENDS - libc.include.math libc.src.math.nextup libc.src.__support.FPUtil.manipulation_functions ) @@ -2394,7 +2363,6 @@ add_fp_unittest( HDRS NextUpTest.h DEPENDS - libc.include.math libc.src.math.nextupf libc.src.__support.FPUtil.manipulation_functions ) @@ -2408,7 +2376,6 @@ add_fp_unittest( HDRS NextUpTest.h DEPENDS - libc.include.math libc.src.math.nextupl libc.src.__support.FPUtil.manipulation_functions ) @@ -2422,7 +2389,6 @@ add_fp_unittest( HDRS NextUpTest.h DEPENDS - libc.include.math libc.src.math.nextupf128 libc.src.__support.FPUtil.manipulation_functions ) @@ -2436,7 +2402,6 @@ add_fp_unittest( SRCS fmaf_test.cpp DEPENDS - libc.include.math libc.src.math.fmaf libc.src.__support.FPUtil.fp_bits FLAGS @@ -2450,7 +2415,6 @@ add_fp_unittest( SRCS fma_test.cpp DEPENDS - libc.include.math libc.src.math.fma libc.src.__support.FPUtil.fp_bits ) @@ -2463,7 +2427,6 @@ add_fp_unittest( expm1f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.expm1f libc.src.__support.FPUtil.fp_bits ) @@ -2476,7 +2439,6 @@ add_fp_unittest( expm1_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.expm1 libc.src.__support.FPUtil.fp_bits ) @@ -2489,7 +2451,6 @@ add_fp_unittest( log_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log libc.src.__support.FPUtil.fp_bits ) @@ -2502,7 +2463,6 @@ add_fp_unittest( logf_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.logf libc.src.__support.FPUtil.fp_bits ) @@ -2515,7 +2475,6 @@ add_fp_unittest( log2_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log2 libc.src.__support.FPUtil.fp_bits ) @@ -2528,7 +2487,6 @@ add_fp_unittest( log2f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log2f libc.src.__support.FPUtil.fp_bits ) @@ -2541,7 +2499,6 @@ add_fp_unittest( log10_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log10 libc.src.__support.FPUtil.fp_bits ) @@ -2554,7 +2511,6 @@ add_fp_unittest( log10f_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log10f libc.src.__support.FPUtil.fp_bits ) @@ -2567,7 +2523,6 @@ add_fp_unittest( log1p_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log1p libc.src.__support.FPUtil.fp_bits ) @@ -2580,7 +2535,6 @@ add_fp_unittest( log1pf_test.cpp DEPENDS libc.src.errno.errno - libc.include.math libc.src.math.log1pf libc.src.__support.FPUtil.fp_bits ) @@ -2594,7 +2548,6 @@ add_fp_unittest( HDRS FModTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.math.fmodf libc.src.__support.FPUtil.basic_operations @@ -2612,7 +2565,6 @@ add_fp_unittest( HDRS FModTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.math.fmod libc.src.__support.FPUtil.basic_operations @@ -2630,7 +2582,6 @@ add_fp_unittest( HDRS FModTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.math.fmodl libc.src.__support.FPUtil.basic_operations @@ -2648,7 +2599,6 @@ add_fp_unittest( HDRS FModTest.h DEPENDS - libc.include.math libc.src.errno.errno libc.src.math.fmodf128 libc.src.__support.FPUtil.basic_operations @@ -2787,7 +2737,6 @@ add_fp_unittest( HDRS ScalbnTest.h DEPENDS - libc.include.math libc.src.math.scalbn libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.normal_float @@ -2802,7 +2751,6 @@ add_fp_unittest( HDRS ScalbnTest.h DEPENDS - libc.include.math libc.src.math.scalbnf libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.normal_float @@ -2817,7 +2765,6 @@ add_fp_unittest( HDRS ScalbnTest.h DEPENDS - libc.include.math libc.src.math.scalbnl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.normal_float @@ -2830,7 +2777,6 @@ add_fp_unittest( SRCS erff_test.cpp DEPENDS - libc.include.math libc.src.math.erff libc.src.__support.FPUtil.fp_bits ) @@ -2842,7 +2788,6 @@ add_fp_unittest( SRCS powf_test.cpp DEPENDS - libc.include.math libc.src.math.powf libc.src.__support.FPUtil.fp_bits ) diff --git a/libc/test/src/math/smoke/CanonicalizeTest.h b/libc/test/src/math/smoke/CanonicalizeTest.h index 4361f7d8ac7ab..ab45e0eb8e94d 100644 --- a/libc/test/src/math/smoke/CanonicalizeTest.h +++ b/libc/test/src/math/smoke/CanonicalizeTest.h @@ -14,7 +14,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #define TEST_SPECIAL(x, y, expected, expected_exception) \ EXPECT_EQ(expected, f(&x, &y)); \ diff --git a/libc/test/src/math/smoke/CeilTest.h b/libc/test/src/math/smoke/CeilTest.h index ec70258fddec1..70e441a849cb4 100644 --- a/libc/test/src/math/smoke/CeilTest.h +++ b/libc/test/src/math/smoke/CeilTest.h @@ -12,7 +12,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class CeilTest : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/CopySignTest.h b/libc/test/src/math/smoke/CopySignTest.h index 70a6a419e0a03..fa9da91920f8d 100644 --- a/libc/test/src/math/smoke/CopySignTest.h +++ b/libc/test/src/math/smoke/CopySignTest.h @@ -12,7 +12,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class CopySignTest : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/FAbsTest.h b/libc/test/src/math/smoke/FAbsTest.h index 9309c2ada4a11..0c8ca95ba0f7b 100644 --- a/libc/test/src/math/smoke/FAbsTest.h +++ b/libc/test/src/math/smoke/FAbsTest.h @@ -12,7 +12,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class FAbsTest : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/FModTest.h b/libc/test/src/math/smoke/FModTest.h index 96ad299258a17..bc909987a161b 100644 --- a/libc/test/src/math/smoke/FModTest.h +++ b/libc/test/src/math/smoke/FModTest.h @@ -14,7 +14,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #define TEST_SPECIAL(x, y, expected, dom_err, expected_exception) \ EXPECT_FP_EQ(expected, f(x, y)); \ diff --git a/libc/test/src/math/smoke/FloorTest.h b/libc/test/src/math/smoke/FloorTest.h index 8886e8e751836..12944aa775626 100644 --- a/libc/test/src/math/smoke/FloorTest.h +++ b/libc/test/src/math/smoke/FloorTest.h @@ -12,7 +12,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class FloorTest : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/HypotTest.h b/libc/test/src/math/smoke/HypotTest.h index 80816033f28fe..a1b8f8a7fafa5 100644 --- a/libc/test/src/math/smoke/HypotTest.h +++ b/libc/test/src/math/smoke/HypotTest.h @@ -13,7 +13,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class HypotTestTemplate : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/ModfTest.h b/libc/test/src/math/smoke/ModfTest.h index 107963665b835..65d61855c9f22 100644 --- a/libc/test/src/math/smoke/ModfTest.h +++ b/libc/test/src/math/smoke/ModfTest.h @@ -11,7 +11,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class ModfTest : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/NextAfterTest.h b/libc/test/src/math/smoke/NextAfterTest.h index 403ea6bd8df68..d9c50c8109d80 100644 --- a/libc/test/src/math/smoke/NextAfterTest.h +++ b/libc/test/src/math/smoke/NextAfterTest.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTAFTERTEST_H #define LLVM_LIBC_TEST_SRC_MATH_NEXTAFTERTEST_H -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/BasicOperations.h" diff --git a/libc/test/src/math/smoke/NextTowardTest.h b/libc/test/src/math/smoke/NextTowardTest.h index 0c2abf815c239..b6c1c8d1797da 100644 --- a/libc/test/src/math/smoke/NextTowardTest.h +++ b/libc/test/src/math/smoke/NextTowardTest.h @@ -9,14 +9,14 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_NEXTTOWARDTEST_H #define LLVM_LIBC_TEST_SRC_MATH_NEXTTOWARDTEST_H -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/fenv_macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/bit.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include #define ASSERT_FP_EQ_WITH_EXCEPTION(result, expected, expected_exception) \ ASSERT_FP_EQ(result, expected); \ diff --git a/libc/test/src/math/smoke/RIntTest.h b/libc/test/src/math/smoke/RIntTest.h index 5a283a8bc0b53..cbed9a3b10baa 100644 --- a/libc/test/src/math/smoke/RIntTest.h +++ b/libc/test/src/math/smoke/RIntTest.h @@ -14,8 +14,8 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" -#include +#include "hdr/fenv_macros.h" +#include "hdr/math_macros.h" #include static constexpr int ROUNDING_MODES[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, diff --git a/libc/test/src/math/smoke/RemQuoTest.h b/libc/test/src/math/smoke/RemQuoTest.h index cf56b1d6460fe..7df537d8b2063 100644 --- a/libc/test/src/math/smoke/RemQuoTest.h +++ b/libc/test/src/math/smoke/RemQuoTest.h @@ -9,7 +9,7 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_REMQUOTEST_H #define LLVM_LIBC_TEST_SRC_MATH_REMQUOTEST_H -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/smoke/RoundEvenTest.h b/libc/test/src/math/smoke/RoundEvenTest.h new file mode 100644 index 0000000000000..e168d57bdbf3c --- /dev/null +++ b/libc/test/src/math/smoke/RoundEvenTest.h @@ -0,0 +1,72 @@ +//===-- Utility class to test roundeven[f|l] --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_SRC_MATH_SMOKE_ROUNDEVENTEST_H +#define LLVM_LIBC_TEST_SRC_MATH_SMOKE_ROUNDEVENTEST_H + +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "hdr/math_macros.h" + +template +class RoundEvenTest : public LIBC_NAMESPACE::testing::Test { + + DECLARE_SPECIAL_CONSTANTS(T) + +public: + typedef T (*RoundEvenFunc)(T); + + void testSpecialNumbers(RoundEvenFunc func) { + EXPECT_FP_EQ(zero, func(zero)); + EXPECT_FP_EQ(neg_zero, func(neg_zero)); + + EXPECT_FP_EQ(inf, func(inf)); + EXPECT_FP_EQ(neg_inf, func(neg_inf)); + + EXPECT_FP_EQ(aNaN, func(aNaN)); + } + + void testRoundedNumbers(RoundEvenFunc func) { + EXPECT_FP_EQ(T(1.0), func(T(1.0))); + EXPECT_FP_EQ(T(-1.0), func(T(-1.0))); + EXPECT_FP_EQ(T(10.0), func(T(10.0))); + EXPECT_FP_EQ(T(-10.0), func(T(-10.0))); + EXPECT_FP_EQ(T(1234.0), func(T(1234.0))); + EXPECT_FP_EQ(T(-1234.0), func(T(-1234.0))); + } + + void testFractions(RoundEvenFunc func) { + EXPECT_FP_EQ(T(0.0), func(T(0.5))); + EXPECT_FP_EQ(T(-0.0), func(T(-0.5))); + EXPECT_FP_EQ(T(0.0), func(T(0.115))); + EXPECT_FP_EQ(T(-0.0), func(T(-0.115))); + EXPECT_FP_EQ(T(1.0), func(T(0.715))); + EXPECT_FP_EQ(T(-1.0), func(T(-0.715))); + EXPECT_FP_EQ(T(2.0), func(T(1.5))); + EXPECT_FP_EQ(T(-2.0), func(T(-1.5))); + EXPECT_FP_EQ(T(2.0), func(T(1.75))); + EXPECT_FP_EQ(T(-2.0), func(T(-1.75))); + EXPECT_FP_EQ(T(10.0), func(T(10.50))); + EXPECT_FP_EQ(T(-10.0), func(T(-10.50))); + EXPECT_FP_EQ(T(11.0), func(T(10.65))); + EXPECT_FP_EQ(T(-11.0), func(T(-10.65))); + EXPECT_FP_EQ(T(1234.0), func(T(1234.50))); + EXPECT_FP_EQ(T(-1234.0), func(T(-1234.50))); + EXPECT_FP_EQ(T(1236.0), func(T(1235.50))); + EXPECT_FP_EQ(T(-1236.0), func(T(-1235.50))); + } +}; + +#define LIST_ROUNDEVEN_TESTS(T, func) \ + using LlvmLibcRoundEvenTest = RoundEvenTest; \ + TEST_F(LlvmLibcRoundEvenTest, SpecialNumbers) { testSpecialNumbers(&func); } \ + TEST_F(LlvmLibcRoundEvenTest, RoundedNubmers) { testRoundedNumbers(&func); } \ + TEST_F(LlvmLibcRoundEvenTest, Fractions) { testFractions(&func); } + +#endif // LLVM_LIBC_TEST_SRC_MATH_SMOKE_ROUNDEVENTEST_H diff --git a/libc/test/src/math/smoke/RoundTest.h b/libc/test/src/math/smoke/RoundTest.h index 8cf96f4569034..49b2a1bf7dfba 100644 --- a/libc/test/src/math/smoke/RoundTest.h +++ b/libc/test/src/math/smoke/RoundTest.h @@ -12,7 +12,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class RoundTest : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/RoundToIntegerTest.h b/libc/test/src/math/smoke/RoundToIntegerTest.h index 44b3f8996df5a..863cf75f05ff6 100644 --- a/libc/test/src/math/smoke/RoundToIntegerTest.h +++ b/libc/test/src/math/smoke/RoundToIntegerTest.h @@ -14,7 +14,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include static constexpr int ROUNDING_MODES[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, diff --git a/libc/test/src/math/smoke/SqrtTest.h b/libc/test/src/math/smoke/SqrtTest.h index eea5dc1534e0b..46382ed58e140 100644 --- a/libc/test/src/math/smoke/SqrtTest.h +++ b/libc/test/src/math/smoke/SqrtTest.h @@ -10,7 +10,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class SqrtTest : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/TruncTest.h b/libc/test/src/math/smoke/TruncTest.h index 5612d27fef21d..c0fc87f9313b2 100644 --- a/libc/test/src/math/smoke/TruncTest.h +++ b/libc/test/src/math/smoke/TruncTest.h @@ -12,7 +12,7 @@ #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" template class TruncTest : public LIBC_NAMESPACE::testing::Test { diff --git a/libc/test/src/math/smoke/acosf_test.cpp b/libc/test/src/math/smoke/acosf_test.cpp index 573a2c39492f0..732c29548c60d 100644 --- a/libc/test/src/math/smoke/acosf_test.cpp +++ b/libc/test/src/math/smoke/acosf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/acosf.h" diff --git a/libc/test/src/math/smoke/acoshf_test.cpp b/libc/test/src/math/smoke/acoshf_test.cpp index f561f23eb99ad..2e94216ede364 100644 --- a/libc/test/src/math/smoke/acoshf_test.cpp +++ b/libc/test/src/math/smoke/acoshf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/acoshf.h" diff --git a/libc/test/src/math/smoke/asinf_test.cpp b/libc/test/src/math/smoke/asinf_test.cpp index 39d25e72c143b..c67d07711cd13 100644 --- a/libc/test/src/math/smoke/asinf_test.cpp +++ b/libc/test/src/math/smoke/asinf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/asinf.h" diff --git a/libc/test/src/math/smoke/asinhf_test.cpp b/libc/test/src/math/smoke/asinhf_test.cpp index 9637bfa539488..f95184676303d 100644 --- a/libc/test/src/math/smoke/asinhf_test.cpp +++ b/libc/test/src/math/smoke/asinhf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/asinhf.h" diff --git a/libc/test/src/math/smoke/atan2f_test.cpp b/libc/test/src/math/smoke/atan2f_test.cpp index ecac36b3a8c01..f81d140fefc5e 100644 --- a/libc/test/src/math/smoke/atan2f_test.cpp +++ b/libc/test/src/math/smoke/atan2f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/atan2f.h" diff --git a/libc/test/src/math/smoke/atanf_test.cpp b/libc/test/src/math/smoke/atanf_test.cpp index abd9835d38a05..3800c2334b922 100644 --- a/libc/test/src/math/smoke/atanf_test.cpp +++ b/libc/test/src/math/smoke/atanf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/atanf.h" diff --git a/libc/test/src/math/smoke/atanhf_test.cpp b/libc/test/src/math/smoke/atanhf_test.cpp index 590a7ab60f04f..fc3e2dd9bc54d 100644 --- a/libc/test/src/math/smoke/atanhf_test.cpp +++ b/libc/test/src/math/smoke/atanhf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/atanhf.h" diff --git a/libc/test/src/math/smoke/cosf_test.cpp b/libc/test/src/math/smoke/cosf_test.cpp index 62132990ed547..7000fe2f2b07d 100644 --- a/libc/test/src/math/smoke/cosf_test.cpp +++ b/libc/test/src/math/smoke/cosf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/cosf.h" diff --git a/libc/test/src/math/smoke/coshf_test.cpp b/libc/test/src/math/smoke/coshf_test.cpp index 9d7ef505ae749..4d915b12dee16 100644 --- a/libc/test/src/math/smoke/coshf_test.cpp +++ b/libc/test/src/math/smoke/coshf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" diff --git a/libc/test/src/math/smoke/erff_test.cpp b/libc/test/src/math/smoke/erff_test.cpp index 24778f8d653ad..102126ee4e23f 100644 --- a/libc/test/src/math/smoke/erff_test.cpp +++ b/libc/test/src/math/smoke/erff_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/erff.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/smoke/exp10_test.cpp b/libc/test/src/math/smoke/exp10_test.cpp index fffffeb4c78ab..7154cb176038c 100644 --- a/libc/test/src/math/smoke/exp10_test.cpp +++ b/libc/test/src/math/smoke/exp10_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/exp10.h" diff --git a/libc/test/src/math/smoke/exp10f_test.cpp b/libc/test/src/math/smoke/exp10f_test.cpp index c0dcc12503324..9fb15ae75348b 100644 --- a/libc/test/src/math/smoke/exp10f_test.cpp +++ b/libc/test/src/math/smoke/exp10f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/exp10f.h" diff --git a/libc/test/src/math/smoke/exp2_test.cpp b/libc/test/src/math/smoke/exp2_test.cpp index d362d32f678b2..a8ef6cfa7f6a1 100644 --- a/libc/test/src/math/smoke/exp2_test.cpp +++ b/libc/test/src/math/smoke/exp2_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/exp2.h" diff --git a/libc/test/src/math/smoke/exp2f_test.cpp b/libc/test/src/math/smoke/exp2f_test.cpp index e2989a6ec4d8a..3ef1a4ece4cf6 100644 --- a/libc/test/src/math/smoke/exp2f_test.cpp +++ b/libc/test/src/math/smoke/exp2f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA #include "src/errno/libc_errno.h" diff --git a/libc/test/src/math/smoke/exp_test.cpp b/libc/test/src/math/smoke/exp_test.cpp index a2becc74f526f..2abaa7230831f 100644 --- a/libc/test/src/math/smoke/exp_test.cpp +++ b/libc/test/src/math/smoke/exp_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/exp.h" diff --git a/libc/test/src/math/smoke/expf_test.cpp b/libc/test/src/math/smoke/expf_test.cpp index 42710c5fa404e..b954125afd7bb 100644 --- a/libc/test/src/math/smoke/expf_test.cpp +++ b/libc/test/src/math/smoke/expf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/expf.h" diff --git a/libc/test/src/math/smoke/expm1_test.cpp b/libc/test/src/math/smoke/expm1_test.cpp index 07963ec2d34c8..d5f166d53a50e 100644 --- a/libc/test/src/math/smoke/expm1_test.cpp +++ b/libc/test/src/math/smoke/expm1_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/expm1.h" diff --git a/libc/test/src/math/smoke/expm1f_test.cpp b/libc/test/src/math/smoke/expm1f_test.cpp index 82e0b15463504..03b6e47b7c3bc 100644 --- a/libc/test/src/math/smoke/expm1f_test.cpp +++ b/libc/test/src/math/smoke/expm1f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/expm1f.h" diff --git a/libc/test/src/math/smoke/log10_test.cpp b/libc/test/src/math/smoke/log10_test.cpp index 36d7534197648..37baf89128f2e 100644 --- a/libc/test/src/math/smoke/log10_test.cpp +++ b/libc/test/src/math/smoke/log10_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log10.h" diff --git a/libc/test/src/math/smoke/log10f_test.cpp b/libc/test/src/math/smoke/log10f_test.cpp index 53e699417fb7c..721045d355da8 100644 --- a/libc/test/src/math/smoke/log10f_test.cpp +++ b/libc/test/src/math/smoke/log10f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/log10f.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/smoke/log1p_test.cpp b/libc/test/src/math/smoke/log1p_test.cpp index 5fe9c60f90abf..993dbf8001df8 100644 --- a/libc/test/src/math/smoke/log1p_test.cpp +++ b/libc/test/src/math/smoke/log1p_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log1p.h" diff --git a/libc/test/src/math/smoke/log1pf_test.cpp b/libc/test/src/math/smoke/log1pf_test.cpp index e2fb2f057d2eb..6127cc89a7421 100644 --- a/libc/test/src/math/smoke/log1pf_test.cpp +++ b/libc/test/src/math/smoke/log1pf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log1pf.h" diff --git a/libc/test/src/math/smoke/log2_test.cpp b/libc/test/src/math/smoke/log2_test.cpp index fbeba9527bcb7..b59767e668eb6 100644 --- a/libc/test/src/math/smoke/log2_test.cpp +++ b/libc/test/src/math/smoke/log2_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log2.h" diff --git a/libc/test/src/math/smoke/log2f_test.cpp b/libc/test/src/math/smoke/log2f_test.cpp index 46906e78dcaf7..00bfb7c4abad6 100644 --- a/libc/test/src/math/smoke/log2f_test.cpp +++ b/libc/test/src/math/smoke/log2f_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log2f.h" diff --git a/libc/test/src/math/smoke/log_test.cpp b/libc/test/src/math/smoke/log_test.cpp index b1e3905994800..fd527dee50847 100644 --- a/libc/test/src/math/smoke/log_test.cpp +++ b/libc/test/src/math/smoke/log_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/log.h" diff --git a/libc/test/src/math/smoke/logf_test.cpp b/libc/test/src/math/smoke/logf_test.cpp index 97b6bdde307b3..a272060276145 100644 --- a/libc/test/src/math/smoke/logf_test.cpp +++ b/libc/test/src/math/smoke/logf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/logf.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/smoke/nanf128_test.cpp b/libc/test/src/math/smoke/nanf128_test.cpp index 2a9f57de5b43b..652e35ccb53d7 100644 --- a/libc/test/src/math/smoke/nanf128_test.cpp +++ b/libc/test/src/math/smoke/nanf128_test.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/UInt128.h" +#include "src/__support/uint128.h" #include "src/math/nanf128.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" diff --git a/libc/test/src/math/smoke/powf_test.cpp b/libc/test/src/math/smoke/powf_test.cpp index e9de1554ec614..98a532f3468c7 100644 --- a/libc/test/src/math/smoke/powf_test.cpp +++ b/libc/test/src/math/smoke/powf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/math/powf.h" #include "test/UnitTest/FPMatcher.h" diff --git a/libc/test/src/math/smoke/roundeven_test.cpp b/libc/test/src/math/smoke/roundeven_test.cpp new file mode 100644 index 0000000000000..e2d625fb0d315 --- /dev/null +++ b/libc/test/src/math/smoke/roundeven_test.cpp @@ -0,0 +1,12 @@ +//===-- Unittests for roundeven -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundEvenTest.h" +#include "src/math/roundeven.h" + +LIST_ROUNDEVEN_TESTS(double, LIBC_NAMESPACE::roundeven) diff --git a/libc/test/src/math/smoke/roundevenf128_test.cpp b/libc/test/src/math/smoke/roundevenf128_test.cpp new file mode 100644 index 0000000000000..a1fdc40d577e7 --- /dev/null +++ b/libc/test/src/math/smoke/roundevenf128_test.cpp @@ -0,0 +1,12 @@ +//===-- Unittests for roundevenf128 ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundEvenTest.h" +#include "src/math/roundevenf128.h" + +LIST_ROUNDEVEN_TESTS(float128, LIBC_NAMESPACE::roundevenf128) diff --git a/libc/test/src/math/smoke/roundevenf_test.cpp b/libc/test/src/math/smoke/roundevenf_test.cpp new file mode 100644 index 0000000000000..f033e26988fab --- /dev/null +++ b/libc/test/src/math/smoke/roundevenf_test.cpp @@ -0,0 +1,12 @@ +//===-- Unittests for roundevenf ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundEvenTest.h" +#include "src/math/roundevenf.h" + +LIST_ROUNDEVEN_TESTS(float, LIBC_NAMESPACE::roundevenf) diff --git a/libc/test/src/math/smoke/roundevenl_test.cpp b/libc/test/src/math/smoke/roundevenl_test.cpp new file mode 100644 index 0000000000000..be09f1283aa70 --- /dev/null +++ b/libc/test/src/math/smoke/roundevenl_test.cpp @@ -0,0 +1,12 @@ +//===-- Unittests for roundevenf ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RoundEvenTest.h" +#include "src/math/roundevenl.h" + +LIST_ROUNDEVEN_TESTS(long double, LIBC_NAMESPACE::roundevenl) diff --git a/libc/test/src/math/smoke/sincosf_test.cpp b/libc/test/src/math/smoke/sincosf_test.cpp index 5952b20fc5bff..8c35953240d8d 100644 --- a/libc/test/src/math/smoke/sincosf_test.cpp +++ b/libc/test/src/math/smoke/sincosf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/sincosf.h" diff --git a/libc/test/src/math/smoke/sinf_test.cpp b/libc/test/src/math/smoke/sinf_test.cpp index 9450895041874..9fc208dd545b2 100644 --- a/libc/test/src/math/smoke/sinf_test.cpp +++ b/libc/test/src/math/smoke/sinf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/sinf.h" diff --git a/libc/test/src/math/smoke/sinhf_test.cpp b/libc/test/src/math/smoke/sinhf_test.cpp index 0f005f752e698..1e052988eb286 100644 --- a/libc/test/src/math/smoke/sinhf_test.cpp +++ b/libc/test/src/math/smoke/sinhf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" diff --git a/libc/test/src/math/smoke/tanf_test.cpp b/libc/test/src/math/smoke/tanf_test.cpp index 68bf493f7e822..ab3f7c1aeb7e4 100644 --- a/libc/test/src/math/smoke/tanf_test.cpp +++ b/libc/test/src/math/smoke/tanf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/tanf.h" diff --git a/libc/test/src/math/smoke/tanhf_test.cpp b/libc/test/src/math/smoke/tanhf_test.cpp index f1ce8b40d43ac..ddae021d2bc42 100644 --- a/libc/test/src/math/smoke/tanhf_test.cpp +++ b/libc/test/src/math/smoke/tanhf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/tanhf.h" diff --git a/libc/test/src/math/tan_test.cpp b/libc/test/src/math/tan_test.cpp index 85174db9364e3..d813dccc38369 100644 --- a/libc/test/src/math/tan_test.cpp +++ b/libc/test/src/math/tan_test.cpp @@ -11,7 +11,7 @@ #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" using LlvmLibcTanTest = LIBC_NAMESPACE::testing::FPTest; diff --git a/libc/test/src/math/tanf_test.cpp b/libc/test/src/math/tanf_test.cpp index d40bc44d6442f..e624d30f1e00f 100644 --- a/libc/test/src/math/tanf_test.cpp +++ b/libc/test/src/math/tanf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/tanf.h" diff --git a/libc/test/src/math/tanhf_test.cpp b/libc/test/src/math/tanhf_test.cpp index ef272b17d68ca..c34efe8d733be 100644 --- a/libc/test/src/math/tanhf_test.cpp +++ b/libc/test/src/math/tanhf_test.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" #include "src/__support/FPUtil/FPBits.h" #include "src/errno/libc_errno.h" #include "src/math/tanhf.h" diff --git a/libc/test/src/stdlib/strtold_test.cpp b/libc/test/src/stdlib/strtold_test.cpp index 2066e9635aba1..2c9f542930bf8 100644 --- a/libc/test/src/stdlib/strtold_test.cpp +++ b/libc/test/src/stdlib/strtold_test.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/UInt128.h" +#include "src/__support/uint128.h" #include "src/errno/libc_errno.h" #include "src/stdlib/strtold.h" diff --git a/libc/test/src/sys/random/linux/CMakeLists.txt b/libc/test/src/sys/random/linux/CMakeLists.txt index 47d363c2c1746..737326cb158ce 100644 --- a/libc/test/src/sys/random/linux/CMakeLists.txt +++ b/libc/test/src/sys/random/linux/CMakeLists.txt @@ -7,7 +7,6 @@ add_libc_unittest( SRCS getrandom_test.cpp DEPENDS - libc.include.math libc.include.sys_random libc.src.errno.errno libc.src.math.fabs diff --git a/libc/test/utils/FPUtil/CMakeLists.txt b/libc/test/utils/FPUtil/CMakeLists.txt index 06b7042e20db5..7b6c294506b19 100644 --- a/libc/test/utils/FPUtil/CMakeLists.txt +++ b/libc/test/utils/FPUtil/CMakeLists.txt @@ -4,7 +4,7 @@ if((${LIBC_TARGET_OS} STREQUAL "linux") AND (${LIBC_TARGET_ARCHITECTURE_IS_X86}) SRCS x86_long_double_test.cpp DEPENDS - libc.include.math + libc.hdr.math_macros libc.src.__support.FPUtil.fp_bits ) endif() diff --git a/libc/test/utils/FPUtil/x86_long_double_test.cpp b/libc/test/utils/FPUtil/x86_long_double_test.cpp index 3b140c6c02667..87796b5c9f5ba 100644 --- a/libc/test/utils/FPUtil/x86_long_double_test.cpp +++ b/libc/test/utils/FPUtil/x86_long_double_test.cpp @@ -9,7 +9,7 @@ #include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/Test.h" -#include "include/llvm-libc-macros/math-macros.h" +#include "hdr/math_macros.h" using FPBits = LIBC_NAMESPACE::fputil::FPBits; diff --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt index 2f2b0ac09df9a..6af6fd7707041 100644 --- a/libc/utils/MPFRWrapper/CMakeLists.txt +++ b/libc/utils/MPFRWrapper/CMakeLists.txt @@ -5,12 +5,6 @@ if(LIBC_TESTS_CAN_USE_MPFR) mpfr_inc.h ) target_compile_options(libcMPFRWrapper PRIVATE -O3) - if (LLVM_LIBC_FULL_BUILD) - # It is not easy to make libcMPFRWrapper a standalone library because gmp.h may unconditionally - # pull in some STL headers. As a result, targets using this library will need to link against - # C++ and unwind libraries. Since we are using MPFR anyway, we directly specifies the GNU toolchain. - target_link_libraries(libcMPFRWrapper PUBLIC -lstdc++ -lgcc_s) - endif() add_dependencies( libcMPFRWrapper libc.src.__support.CPP.string_view @@ -24,6 +18,6 @@ if(LIBC_TESTS_CAN_USE_MPFR) target_link_directories(libcMPFRWrapper PUBLIC ${LLVM_LIBC_MPFR_INSTALL_PATH}/lib) endif() target_link_libraries(libcMPFRWrapper PUBLIC LibcFPTestHelpers.unit LibcTest.unit mpfr gmp) -elseif(NOT LIBC_TARGET_OS_IS_GPU) +elseif(NOT LIBC_TARGET_OS_IS_GPU AND NOT LLVM_LIBC_FULL_BUILD) message(WARNING "Math tests using MPFR will be skipped.") endif() diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index eaa47da6bda23..18a8ac044a9bb 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -14,8 +14,7 @@ #include "src/__support/FPUtil/fpbits_str.h" #include "test/UnitTest/FPMatcher.h" -#include "include/llvm-libc-macros/math-macros.h" -#include +#include "hdr/math_macros.h" #include #include @@ -351,6 +350,16 @@ class MPFRNumber { return result; } + MPFRNumber roundeven() const { + MPFRNumber result(*this); +#if MPFR_VERSION_MAJOR >= 4 + mpfr_roundeven(result.value, value); +#else + mpfr_rint(result.value, value, MPFR_RNDN); +#endif + return result; + } + bool round_to_long(long &result) const { // We first calculate the rounded value. This way, when converting // to long using mpfr_get_si, the rounding direction of MPFR_RNDN @@ -634,6 +643,8 @@ unary_operation(Operation op, InputType input, unsigned int precision, return mpfrInput.mod_pi_over_4(); case Operation::Round: return mpfrInput.round(); + case Operation::RoundEven: + return mpfrInput.roundeven(); case Operation::Sin: return mpfrInput.sin(); case Operation::Sinh: diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 0a41ac639798b..d2f73e2628e16 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -49,6 +49,7 @@ enum class Operation : int { ModPIOver2, ModPIOver4, Round, + RoundEven, Sin, Sinh, Sqrt, diff --git a/libc/utils/docgen/ctype.json b/libc/utils/docgen/ctype.json new file mode 100644 index 0000000000000..25eeb683846cf --- /dev/null +++ b/libc/utils/docgen/ctype.json @@ -0,0 +1,47 @@ +{ + "functions": { + "isalnum": { + "defined": "7.4.1.1" + }, + "isalpha": { + "defined": "7.4.1.2" + }, + "isblank": { + "defined": "7.4.1.3" + }, + "iscntrl": { + "defined": "7.4.1.4" + }, + "isdigit": { + "defined": "7.4.1.5" + }, + "isgraph": { + "defined": "7.4.1.6" + }, + "islower": { + "defined": "7.4.1.7" + }, + "isprint": { + "defined": "7.4.1.8" + }, + "ispunct": { + "defined": "7.4.1.9" + }, + "isspace": { + "defined": "7.4.1.10" + }, + "isupper": { + "defined": "7.4.1.11" + }, + "isxdigit": { + "defined": "7.4.1.12" + }, + "tolower" : { + "defined": "7.4.2.1" + }, + "toupper": { + "defined": "7.4.2.2" + } + } +} + diff --git a/libc/utils/docgen/docgen.py b/libc/utils/docgen/docgen.py new file mode 100755 index 0000000000000..7411b4506f082 --- /dev/null +++ b/libc/utils/docgen/docgen.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# +# ====- Generate documentation for libc functions ------------*- python -*--==# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ==-------------------------------------------------------------------------==# +from argparse import ArgumentParser, Namespace +from pathlib import Path +from typing import Dict +import sys +import json + + +def load_api(hname: str) -> Dict: + p = Path(__file__).parent / Path(hname).with_suffix(".json") + api = p.read_text(encoding="utf-8") + return json.loads(api) + + +# TODO: we may need to get more sophisticated for less generic implementations. +# Does libc/src/{hname minus .h suffix}/{fname}.cpp exist? +def is_implemented(hname: str, fname: str) -> bool: + return Path( + Path(__file__).parent.parent.parent, + "src", + hname.rstrip(".h"), + fname + ".cpp", + ).exists() + + +def print_functions(header: str, functions: Dict): + for key in sorted(functions.keys()): + print(f" * - {key}") + + if is_implemented(header, key): + print(" - |check|") + else: + print(" -") + + # defined is optional. Having any content is optional. + if functions[key] is not None and "defined" in functions[key]: + print(f' - {functions[key]["defined"]}') + else: + print(" -") + + +def print_header(header: str, api: Dict): + fns = f"{header} Functions" + print(fns) + print("=" * (len(fns))) + print( + f""" +.. list-table:: + :widths: auto + :align: center + :header-rows: 1 + + * - Function + - Implemented + - Standard""" + ) + # TODO: how do we want to signal implementation of macros? + print_functions(header, api["functions"]) + + +def parse_args() -> Namespace: + parser = ArgumentParser() + choices = [p.with_suffix(".h").name for p in Path(__file__).parent.glob("*.json")] + parser.add_argument("header_name", choices=choices) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + api = load_api(args.header_name) + + print_header(args.header_name, api) diff --git a/libc/utils/docgen/fenv.json b/libc/utils/docgen/fenv.json new file mode 100644 index 0000000000000..0af38b16b2d98 --- /dev/null +++ b/libc/utils/docgen/fenv.json @@ -0,0 +1,58 @@ +{ + "macros": [ + "__STDC_VERSION_FENV_H__" + ], + "functions": { + "feclearexcept": { + "defined": "7.6.4.1" + }, + "fegetexceptflag": { + "defined": "7.6.4.2" + }, + "feraiseexcept": { + "defined": "7.6.4.3" + }, + "fesetexcept": { + "defined": "7.6.4.4" + }, + "fesetexceptflag": { + "defined": "7.6.4.5" + }, + "fetestexceptflag": { + "defined": "7.6.4.6" + }, + "fetestexcept": { + "defined": "7.6.4.7" + }, + "fegetmode": { + "defined": "7.6.5.1" + }, + "fegetround": { + "defined": "7.6.5.2" + }, + "fe_dec_getround": { + "defined": "7.6.5.3" + }, + "fesetmode": { + "defined": "7.6.5.4" + }, + "fesetround": { + "defined": "7.6.5.5" + }, + "fe_dec_setround": { + "defined": "7.6.5.6" + }, + "fegetenv": { + "defined": "7.6.6.1" + }, + "feholdexcept": { + "defined": "7.6.6.2" + }, + "fesetenv": { + "defined": "7.6.6.3" + }, + "feupdateenv": { + "defined": "7.6.6.4" + } + } +} diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index a2ee3655da28b..eb293ad5e0bd5 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -64,6 +64,13 @@ set( LIBCLC_TEST_TARGETS_ALL set( LIBCLC_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) set( LIBCLC_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) +# $&$ maybe delete +# mesa3d environment is only available since LLVM 4.0 +if( LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 4.0.0 ) + list( APPEND LIBCLC_TARGETS_ALL amdgcn-mesa-mesa3d ) +endif() +# $&$ end delete + set( LIBCLC_TARGETS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of targets to build, or 'all'." ) @@ -203,7 +210,7 @@ set( CMAKE_LLAsm_COMPILER ${LLVM_AS} ) set( CMAKE_LLAsm_ARCHIVE ${LLVM_LINK} ) # LLVM 13 enables standard includes by default -if( ${LLVM_VERSION} VERSION_GREATER "12.99.99" ) +if( LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13.0.0 ) set( CMAKE_LLAsm_FLAGS "${CMAKE_LLAsm_FLAGS} -cl-no-stdinc" ) set( CMAKE_CLC_FLAGS "${CMAKE_CLC_FLAGS} -cl-no-stdinc" ) endif() @@ -321,7 +328,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) list( APPEND dirs amdgpu ) endif() - #nvptx is special + # nvptx is special if( ${ARCH} STREQUAL nvptx OR ${ARCH} STREQUAL nvptx64 ) set( DARCH ptx ) else() @@ -352,8 +359,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) endforeach() endforeach() - # Add the generated convert.cl here to prevent adding - # the one listed in SOURCES + # Add the generated convert.cl here to prevent adding the one listed in + # SOURCES if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" ) set( rel_files convert.cl ) set( objects convert.cl ) diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt index 387e013afeb6c..928238c1ac69b 100644 --- a/libcxx/benchmarks/CMakeLists.txt +++ b/libcxx/benchmarks/CMakeLists.txt @@ -182,6 +182,7 @@ set(BENCHMARK_TESTS algorithms/make_heap.bench.cpp algorithms/make_heap_then_sort_heap.bench.cpp algorithms/min.bench.cpp + algorithms/minmax.bench.cpp algorithms/min_max_element.bench.cpp algorithms/mismatch.bench.cpp algorithms/pop_heap.bench.cpp diff --git a/libcxx/benchmarks/algorithms/minmax.bench.cpp b/libcxx/benchmarks/algorithms/minmax.bench.cpp new file mode 100644 index 0000000000000..b0ff7f91c1993 --- /dev/null +++ b/libcxx/benchmarks/algorithms/minmax.bench.cpp @@ -0,0 +1,68 @@ +#include +#include + +#include + +void run_sizes(auto benchmark) { + benchmark->Arg(1) + ->Arg(2) + ->Arg(3) + ->Arg(4) + ->Arg(5) + ->Arg(6) + ->Arg(7) + ->Arg(8) + ->Arg(9) + ->Arg(10) + ->Arg(11) + ->Arg(12) + ->Arg(13) + ->Arg(14) + ->Arg(15) + ->Arg(16) + ->Arg(17) + ->Arg(18) + ->Arg(19) + ->Arg(20) + ->Arg(21) + ->Arg(22) + ->Arg(23) + ->Arg(24) + ->Arg(25) + ->Arg(26) + ->Arg(27) + ->Arg(28) + ->Arg(29) + ->Arg(30) + ->Arg(31) + ->Arg(32) + ->Arg(64) + ->Arg(512) + ->Arg(1024) + ->Arg(4000) + ->Arg(4096) + ->Arg(5500) + ->Arg(64000) + ->Arg(65536) + ->Arg(70000); +} + +template +static void BM_std_minmax(benchmark::State& state) { + std::vector vec(state.range(), 3); + + for (auto _ : state) { + benchmark::DoNotOptimize(vec); + benchmark::DoNotOptimize(std::ranges::minmax(vec)); + } +} +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); +BENCHMARK(BM_std_minmax)->Apply(run_sizes); + +BENCHMARK_MAIN(); diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 014ac1c31e630..3197d2cd1b271 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -308,7 +308,7 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_associative_heterogeneous_erasure`` *unimplemented* ---------------------------------------------------------- ----------------- - ``__cpp_lib_bind_back`` *unimplemented* + ``__cpp_lib_bind_back`` ``202202L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_byteswap`` ``202110L`` ---------------------------------------------------------- ----------------- @@ -398,8 +398,6 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_atomic_min_max`` *unimplemented* ---------------------------------------------------------- ----------------- - ``__cpp_lib_bind_back`` *unimplemented* - ---------------------------------------------------------- ----------------- ``__cpp_lib_bind_front`` ``202306L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_bitset`` ``202306L`` diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst index 2da9df54a5319..81c05b9112bd2 100644 --- a/libcxx/docs/ReleaseNotes/19.rst +++ b/libcxx/docs/ReleaseNotes/19.rst @@ -43,6 +43,8 @@ Implemented Papers - P2819R2 - Add ``tuple`` protocol to ``complex`` - P2495R3 - Interfacing ``stringstream``\s with ``string_view`` - P2867R2 - Remove Deprecated ``strstream``\s From C++26 +- P2872R3 - Remove ``wstring_convert`` From C++26 +- P3142R0 - Printing Blank Lines with ``println`` (as DR against C++23) - P2302R4 - ``std::ranges::contains`` - P1659R3 - ``std::ranges::starts_with`` and ``std::ranges::ends_with`` @@ -55,8 +57,16 @@ Improvements and New Features - The ``std::mismatch`` algorithm has been optimized for integral types, which can lead up to 40x performance improvements. +- The ``std::ranges::minmax`` algorithm has been optimized for integral types, resulting in a performance increase of + up to 100x. + - The ``_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM`` macro has been added to make the declarations in ```` available. +- The ``_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT`` macro has been added to make the declarations in ```` + available. + +- The formatting library is updated to Unicode 15.1.0. + Deprecations and Removals ------------------------- @@ -88,6 +98,11 @@ Deprecations and Removals libatomic is not available. If you are one such user, please reach out to the libc++ developers so we can collaborate on a path for supporting atomics properly on freestanding platforms. +- LWG3430 disallow implicit conversion of the source arguments to ``std::filesystem::path`` when + constructing ``std::basic_*fstream``. This effectively removes the possibility to directly construct + a ``std::basic_*fstream`` from a ``std::basic_string_view``, a input-iterator or a C-string, instead + you can construct a temporary ``std::basic_string``. This change has been applied to C++17 and later. + Upcoming Deprecations and Removals ---------------------------------- @@ -105,7 +120,10 @@ TODO ABI Affecting Changes --------------------- -TODO + +- The optional POSIX macro ``ENODATA`` has been deprecated in C++ and POSIX 2017. The + ``random_device`` could throw a ``system_error`` with this value. It now + throws ``ENOMSG``. Build System Changes diff --git a/libcxx/docs/Status/Cxx23.rst b/libcxx/docs/Status/Cxx23.rst index 23d30c8128d71..b19ff4fdc0f79 100644 --- a/libcxx/docs/Status/Cxx23.rst +++ b/libcxx/docs/Status/Cxx23.rst @@ -43,6 +43,7 @@ Paper Status .. [#note-P0533R9] P0533R9: ``isfinite``, ``isinf``, ``isnan`` and ``isnormal`` are implemented. .. [#note-P1413R3] P1413R3: ``std::aligned_storage_t`` and ``std::aligned_union_t`` are marked deprecated, but clang doesn't issue a diagnostic for deprecated using template declarations. + .. [#note-P2387R3] P2387R3: ``bind_back`` only .. [#note-P2520R0] P2520R0: Libc++ implemented this paper as a DR in C++20 as well. .. [#note-P2711R1] P2711R1: ``join_with_view`` hasn't been done yet since this type isn't implemented yet. .. [#note-P2770R0] P2770R0: ``join_with_view`` hasn't been done yet since this type isn't implemented yet. diff --git a/libcxx/docs/Status/Cxx23Issues.csv b/libcxx/docs/Status/Cxx23Issues.csv index 02297715cc2e2..a212d56685c00 100644 --- a/libcxx/docs/Status/Cxx23Issues.csv +++ b/libcxx/docs/Status/Cxx23Issues.csv @@ -64,7 +64,7 @@ `2818 `__,"``::std::`` everywhere rule needs tweaking","June 2021","|Nothing To Do|","" `2997 `__,"LWG 491 and the specification of ``{forward_,}list::unique``","June 2021","","" `3410 `__,"``lexicographical_compare_three_way`` is overspecified","June 2021","|Complete|","17.0","|spaceship|" -`3430 `__,"``std::fstream`` & co. should be constructible from string_view","June 2021","","" +`3430 `__,"``std::fstream`` & co. should be constructible from string_view","June 2021","|Complete|","19.0","" `3462 `__,"§[formatter.requirements]: Formatter requirements forbid use of ``fc.arg()``","June 2021","|Nothing To Do|","","|format|" `3481 `__,"``viewable_range`` mishandles lvalue move-only views","June 2021","Superseded by `P2415R2 `__","","|ranges|" `3506 `__,"Missing allocator-extended constructors for ``priority_queue``","June 2021","|Complete|","14.0" diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv index 80547c5c1f3f5..065db97a0b0b1 100644 --- a/libcxx/docs/Status/Cxx23Papers.csv +++ b/libcxx/docs/Status/Cxx23Papers.csv @@ -45,7 +45,7 @@ "`P1413R3 `__","LWG","Deprecate ``std::aligned_storage`` and ``std::aligned_union``","February 2022","|Complete| [#note-P1413R3]_","" "`P2255R2 `__","LWG","A type trait to detect reference binding to temporary","February 2022","","" "`P2273R3 `__","LWG","Making ``std::unique_ptr`` constexpr","February 2022","|Complete|","16.0" -"`P2387R3 `__","LWG","Pipe support for user-defined range adaptors","February 2022","","","|ranges|" +"`P2387R3 `__","LWG","Pipe support for user-defined range adaptors","February 2022","|Partial| [#note-P2387R3]_","","|ranges|" "`P2440R1 `__","LWG","``ranges::iota``, ``ranges::shift_left`` and ``ranges::shift_right``","February 2022","","","|ranges|" "`P2441R2 `__","LWG","``views::join_with``","February 2022","|In Progress|","","|ranges|" "`P2442R1 `__","LWG","Windowing range adaptors: ``views::chunk`` and ``views::slide``","February 2022","","","|ranges|" diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 8a4bf2ef62162..008f7418ab9c0 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -52,7 +52,7 @@ "`4023 `__","Preconditions of ``std::basic_streambuf::setg/setp``","Tokyo March 2024","","","" "`4025 `__","Move assignment operator of ``std::expected`` should not be conditionally deleted","Tokyo March 2024","","","" "`4030 `__","Clarify whether arithmetic expressions in ``[numeric.sat.func]`` are mathematical or C++","Tokyo March 2024","|Nothing To Do|","","" -"`4031 `__","``bad_expected_access`` member functions should be ``noexcept``","Tokyo March 2024","","","" +"`4031 `__","``bad_expected_access`` member functions should be ``noexcept``","Tokyo March 2024","|Complete|","16.0","" "`4035 `__","``single_view`` should provide ``empty``","Tokyo March 2024","","","|ranges|" "`4036 `__","``__alignof_is_defined`` is only implicitly specified in C++ and not yet deprecated","Tokyo March 2024","","","" "`4037 `__","Static data members of ``ctype_base`` are not yet required to be usable in constant expressions","Tokyo March 2024","","","" @@ -63,4 +63,5 @@ "`4054 `__","Repeating a ``repeat_view`` should repeat the view","Tokyo March 2024","","","|ranges|" "","","","","","" "`3343 `__","Ordering of calls to ``unlock()`` and ``notify_all()`` in Effects element of ``notify_all_at_thread_exit()`` should be reversed","Not Yet Adopted","|Complete|","16.0","" +"XXXX","","The sys_info range should be affected by save","Not Yet Adopted","|Complete|","19.0" "","","","","","" diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index a34dad5816807..fa11da62bc080 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -49,9 +49,9 @@ "`P2875R4 `__","LWG","Undeprecate ``polymorphic_allocator::destroy`` for C++26","Tokyo March 2024","|Complete|","15.0","" "`P2867R2 `__","LWG","Remove Deprecated ``strstreams`` From C++26","Tokyo March 2024","|Complete|","19.0","" "`P2869R4 `__","LWG","Remove Deprecated ``shared_ptr`` Atomic Access APIs from C++26","Tokyo March 2024","","","" -"`P2872R3 `__","LWG","Remove ``wstring_convert`` From C++26","Tokyo March 2024","","","" +"`P2872R3 `__","LWG","Remove ``wstring_convert`` From C++26","Tokyo March 2024","|Complete|","19.0","" "`P3107R5 `__","LWG","Permit an efficient implementation of ``std::print``","Tokyo March 2024","","","|format| |DR|" -"`P3142R0 `__","LWG","Printing Blank Lines with ``println``","Tokyo March 2024","","","|format|" +"`P3142R0 `__","LWG","Printing Blank Lines with ``println``","Tokyo March 2024","|Complete|","19.0","|format|" "`P2845R8 `__","LWG","Formatting of ``std::filesystem::path``","Tokyo March 2024","","","|format|" "`P0493R5 `__","LWG","Atomic minimum/maximum","Tokyo March 2024","","","" "`P2542R8 `__","LWG","``views::concat``","Tokyo March 2024","","","|ranges|" diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv index b7acc76b8c11f..7da77def92daa 100644 --- a/libcxx/docs/Status/FormatIssues.csv +++ b/libcxx/docs/Status/FormatIssues.csv @@ -20,6 +20,10 @@ Number,Name,Standard,Assignee,Status,First released version "`P2905R2 `__","Runtime format strings","C++26 DR","Mark de Wever","|Complete|",18.0 "`P2918R2 `__","Runtime format strings II","C++26","Mark de Wever","|Complete|",18.0 "`P2909R4 `__","Fix formatting of code units as integers (Dude, where’s my ``char``?)","C++26 DR","Mark de Wever","|Complete|",18.0 +"`P3107R5 `__","Permit an efficient implementation of ``std::print``","C++26 DR","Mark de Wever","|In Progress|","" +"`P3142R0 `__","Printing Blank Lines with ``println``","C++26 DR","Hristo Hristov","|Complete|",19.0 +"`P2845R8 `__","Formatting of ``std::filesystem::path``","C++26","Mark de Wever","","" + `P1361 `_,"Integration of chrono with text formatting","C++20",Mark de Wever,|In Progress|, `P2372 `__,"Fixing locale handling in chrono formatters","C++20",Mark de Wever,|In Progress|, "`P2419R2 `__","Clarify handling of encodings in localized formatting of chrono types","C++23", diff --git a/libcxx/docs/Status/FormatPaper.csv b/libcxx/docs/Status/FormatPaper.csv index 82da54284c738..e9d407e79e253 100644 --- a/libcxx/docs/Status/FormatPaper.csv +++ b/libcxx/docs/Status/FormatPaper.csv @@ -2,12 +2,12 @@ Section,Description,Dependencies,Assignee,Status,First released version `P1361 `__ `P2372 `__,"Formatting chrono" `[time.syn] `_,"Formatter ``chrono::duration``",,Mark de Wever,|Complete|,16.0 `[time.syn] `_,"Formatter ``chrono::sys_time``",,Mark de Wever,|Complete|,17.0 -`[time.syn] `_,"Formatter ``chrono::utc_time``",A ```` implementation,Not assigned,,, -`[time.syn] `_,"Formatter ``chrono::tai_time``",A ```` implementation,Not assigned,,, -`[time.syn] `_,"Formatter ``chrono::gps_time``",A ```` implementation,Not assigned,,, +`[time.syn] `_,"Formatter ``chrono::utc_time``",A ```` implementation,Mark de Wever,,, +`[time.syn] `_,"Formatter ``chrono::tai_time``",A ```` implementation,Mark de Wever,,, +`[time.syn] `_,"Formatter ``chrono::gps_time``",A ```` implementation,Mark de Wever,,, `[time.syn] `_,"Formatter ``chrono::file_time``",,Mark de Wever,|Complete|,17.0 `[time.syn] `_,"Formatter ``chrono::local_time``",,Mark de Wever,|Complete|,17.0 -`[time.syn] `_,"Formatter ``chrono::local-time-format-t``",A ```` implementation,Not assigned,,, +`[time.syn] `_,"Formatter ``chrono::local-time-format-t``",A ```` implementation,Mark de Wever,,, `[time.syn] `_,"Formatter ``chrono::day``",,Mark de Wever,|Complete|,16.0 `[time.syn] `_,"Formatter ``chrono::month``",,Mark de Wever,|Complete|,16.0 `[time.syn] `_,"Formatter ``chrono::year``",,Mark de Wever,|Complete|,16.0 @@ -28,25 +28,6 @@ Section,Description,Dependencies,Assignee,Status,First released version `[time.syn] `_,"Formatter ``chrono::local_info``",A ```` implementation,Mark de Wever,, `[time.syn] `_,"Formatter ``chrono::zoned_time``",A ```` implementation,Mark de Wever,, -`P2286R8 `__,"Formatting ranges" -`[format.syn] `_,"Concept ``formattable``",,Mark de Wever,|Complete|,16.0 -`[format.string.std] `_,"std-format-spec ``type`` debug",,Mark de Wever,|Complete|,16.0 -`[format.range] `_,"Formatting for ranges: sequences",,Mark de Wever,|Complete|,16.0 -`[format.range.fmtmap] `_,"Formatting for ranges: map",,Mark de Wever,|Complete|,16.0 -`[format.range.fmtset] `_,"Formatting for ranges: set",,Mark de Wever,|Complete|,16.0 -`[format.range] `_,"Formatting for ranges: container adaptors",,Mark de Wever,|Complete|,16.0 -`[format.range] `_,"Formatting for ranges: ``pair`` and ``tuple``",,Mark de Wever,|Complete|,16.0 -`[format.range] `_,"Formatting for ranges: ``vector``",,Mark de Wever,|Complete|,16.0 - -"`P2585R0 `__","Improving default container formatting" -`[format.range.fmtstr] `_,"Formatting for ranges: strings",,Mark de Wever,|Complete|,17.0 -`[format.range.fmtstr] `_,"Formatting for ranges: debug_strings",,Mark de Wever,|Complete|,17.0 - "`P2693R1 `__","Formatting ``thread::id`` and ``stacktrace``" `[thread.thread.id] `_,"Formatting ``thread::id``",,Mark de Wever,|Complete|,17.0 `[stacktrace.format] `_,"Formatting ``stacktrace``",A ```` implementation,Mark de Wever,, - -"`P2093R14 `__","Formatted output" -`[print.fun] `__,"Output to ``stdout``",,Mark de Wever,|Complete|, 17.0 -`[print.fun] `__,"Output to ``FILE*``",,Mark de Wever,|Complete|, 17.0 -`[ostream.formatted.print] `__,"Output to ``ostream``",,Mark de Wever,|Complete|, 18.0 diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst index 8a1c747a25414..bc7817d14d04d 100644 --- a/libcxx/docs/UsingLibcxx.rst +++ b/libcxx/docs/UsingLibcxx.rst @@ -234,7 +234,7 @@ C++17 Specific Configuration Macros C++20 Specific Configuration Macros ----------------------------------- -**_LIBCPP_ENABLE_CXX20_REMOVED_SHARED_PTR_UNIQUE** +**_LIBCPP_ENABLE_CXX20_REMOVED_SHARED_PTR_UNIQUE**: This macro is used to re-enable the function ``std::shared_ptr<...>::unique()``. @@ -267,7 +267,7 @@ C++26 Specific Configuration Macros **_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT**: This macro is used to re-enable all named declarations in ````. -**_LIBCPP_ENABLE_CXX26_REMOVED_STRING_RESERVE** +**_LIBCPP_ENABLE_CXX26_REMOVED_STRING_RESERVE**: This macro is used to re-enable the function ``std::basic_string<...>::reserve()``. @@ -277,6 +277,10 @@ C++26 Specific Configuration Macros **_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM**: This macro is used to re-enable all named declarations in ````. +**_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT**: + This macro is used to re-enable the ``wstring_convert`` and ``wbuffer_convert`` + in ````. + Libc++ Extensions ================= diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index db55c6f02a3dc..743f99297d17d 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -134,7 +134,7 @@ velocity, libc++ drops support for older compilers as newer ones are released. ============ =============== ========================== ===================== Compiler Versions Restrictions Support policy ============ =============== ========================== ===================== -Clang 16, 17, 18-git latest two stable releases per `LLVM's release page `_ and the development version +Clang 17, 18, 19-git latest two stable releases per `LLVM's release page `_ and the development version AppleClang 15 latest stable release per `Xcode's release page `_ Open XL 17.1 (AIX) latest stable release per `Open XL's documentation page `_ GCC 13 In C++11 or later only latest stable release per `GCC's release page `_ diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index db3980342f50b..a4a58a787ee9a 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -291,6 +291,7 @@ set(files __chrono/parser_std_format_spec.h __chrono/statically_widen.h __chrono/steady_clock.h + __chrono/sys_info.h __chrono/system_clock.h __chrono/time_point.h __chrono/time_zone.h @@ -395,6 +396,7 @@ set(files __format/formatter_pointer.h __format/formatter_string.h __format/formatter_tuple.h + __format/indic_conjunct_break_table.h __format/parser_std_format_spec.h __format/range_default_formatter.h __format/range_formatter.h @@ -738,6 +740,7 @@ set(files __type_traits/datasizeof.h __type_traits/decay.h __type_traits/dependent_type.h + __type_traits/desugars_to.h __type_traits/disjunction.h __type_traits/enable_if.h __type_traits/extent.h @@ -822,7 +825,6 @@ set(files __type_traits/nat.h __type_traits/negation.h __type_traits/noexcept_move_assign_container.h - __type_traits/operation_traits.h __type_traits/promote.h __type_traits/rank.h __type_traits/remove_all_extents.h diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h index 3902f7560304a..a0fa88d6d2acd 100644 --- a/libcxx/include/__algorithm/comp.h +++ b/libcxx/include/__algorithm/comp.h @@ -10,8 +10,7 @@ #define _LIBCPP___ALGORITHM_COMP_H #include <__config> -#include <__type_traits/integral_constant.h> -#include <__type_traits/operation_traits.h> +#include <__type_traits/desugars_to.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -27,7 +26,7 @@ struct __equal_to { }; template -struct __desugars_to<__equal_tag, __equal_to, _Tp, _Up> : true_type {}; +inline const bool __desugars_to_v<__equal_tag, __equal_to, _Tp, _Up> = true; // The definition is required because __less is part of the ABI, but it's empty // because all comparisons should be transparent. @@ -42,6 +41,9 @@ struct __less { } }; +template +inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_COMP_H diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h index c76a16b47f5da..1341d9e4159ba 100644 --- a/libcxx/include/__algorithm/equal.h +++ b/libcxx/include/__algorithm/equal.h @@ -18,12 +18,11 @@ #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__string/constexpr_c_functions.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> -#include <__type_traits/integral_constant.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_volatile.h> -#include <__type_traits/operation_traits.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -47,7 +46,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo template ::value && !is_volatile<_Tp>::value && + __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, int> = 0> _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool @@ -87,7 +86,7 @@ template ::value && __is_identity<_Proj1>::value && + __enable_if_t<__desugars_to_v<__equal_tag, _Pred, _Tp, _Up> && __is_identity<_Proj1>::value && __is_identity<_Proj2>::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, int> = 0> diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h index 8abb273ac1782..4ada29eabc470 100644 --- a/libcxx/include/__algorithm/mismatch.h +++ b/libcxx/include/__algorithm/mismatch.h @@ -16,11 +16,11 @@ #include <__algorithm/unwrap_iter.h> #include <__config> #include <__functional/identity.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/invoke.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_integral.h> -#include <__type_traits/operation_traits.h> #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/unreachable.h> @@ -59,7 +59,7 @@ template ::value && __desugars_to<__equal_tag, _Pred, _Tp, _Tp>::value && + __enable_if_t::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> && __is_identity<_Proj1>::value && __is_identity<_Proj2>::value, int> = 0> _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h index 14a0d76741d4c..376abd39fa36e 100644 --- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h @@ -14,9 +14,9 @@ #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__numeric/transform_reduce.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_execution_policy.h> -#include <__type_traits/operation_traits.h> #include <__utility/move.h> #include #include @@ -37,7 +37,7 @@ template , - __enable_if_t<__desugars_to<__plus_tag, _BinaryOperation, _Tp, _UnaryResult>::value && is_arithmetic_v<_Tp> && + __enable_if_t<__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> && is_arithmetic_v<_UnaryResult>, int> = 0> _LIBCPP_HIDE_FROM_ABI _Tp @@ -53,8 +53,8 @@ template , - __enable_if_t::value && - is_arithmetic_v<_Tp> && is_arithmetic_v<_UnaryResult>), + __enable_if_t && is_arithmetic_v<_Tp> && + is_arithmetic_v<_UnaryResult>), int> = 0> _LIBCPP_HIDE_FROM_ABI _Tp __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept { diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 22a62b620c936..ca5722523336f 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -23,7 +23,9 @@ #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/is_reference.h> +#include <__type_traits/is_trivially_copyable.h> #include <__type_traits/remove_cvref.h> #include <__utility/forward.h> #include <__utility/move.h> @@ -83,7 +85,20 @@ struct __fn { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(__first != __last, "range has to contain at least one element"); - if constexpr (forward_range<_Range>) { + // This optimiation is not in minmax_element because clang doesn't see through the pointers and as a result doesn't + // vectorize the code. + if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> && + __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value && + __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) { + minmax_result<_ValueT> __result = {__r[0], __r[0]}; + for (auto __e : __r) { + if (__e < __result.min) + __result.min = __e; + if (__result.max < __e) + __result.max = __e; + } + return __result; + } else if constexpr (forward_range<_Range>) { // Special-case the one element case. Avoid repeatedly initializing objects from the result of an iterator // dereference when doing so might not be idempotent. The `if constexpr` avoids the extra branch in cases where // it's not needed. diff --git a/libcxx/include/__chrono/sys_info.h b/libcxx/include/__chrono/sys_info.h new file mode 100644 index 0000000000000..794d22f2ccc1e --- /dev/null +++ b/libcxx/include/__chrono/sys_info.h @@ -0,0 +1,51 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html + +#ifndef _LIBCPP___CHRONO_SYS_INFO_H +#define _LIBCPP___CHRONO_SYS_INFO_H + +#include +// Enable the contents of the header only when libc++ was built with experimental features enabled. +#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) + +# include <__chrono/duration.h> +# include <__chrono/system_clock.h> +# include <__chrono/time_point.h> +# include <__config> +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +# if _LIBCPP_STD_VER >= 20 + +namespace chrono { + +struct sys_info { + sys_seconds begin; + sys_seconds end; + seconds offset; + minutes save; + string abbrev; +}; + +} // namespace chrono + +# endif //_LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) + +#endif // _LIBCPP___CHRONO_SYS_INFO_H diff --git a/libcxx/include/__chrono/time_zone.h b/libcxx/include/__chrono/time_zone.h index 7d97327a6c8e9..8e30034b799ad 100644 --- a/libcxx/include/__chrono/time_zone.h +++ b/libcxx/include/__chrono/time_zone.h @@ -16,6 +16,9 @@ // Enable the contents of the header only when libc++ was built with experimental features enabled. #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) +# include <__chrono/duration.h> +# include <__chrono/sys_info.h> +# include <__chrono/system_clock.h> # include <__compare/strong_order.h> # include <__config> # include <__memory/unique_ptr.h> @@ -55,10 +58,18 @@ class _LIBCPP_AVAILABILITY_TZDB time_zone { _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI string_view name() const noexcept { return __name(); } + template + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI sys_info get_info(const sys_time<_Duration>& __time) const { + return __get_info(chrono::time_point_cast(__time)); + } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const __impl& __implementation() const noexcept { return *__impl_; } private: [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view __name() const noexcept; + + [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info __get_info(sys_seconds __time) const; + unique_ptr<__impl> __impl_; }; diff --git a/libcxx/include/__chrono/tzdb.h b/libcxx/include/__chrono/tzdb.h index 45c20f279f9c9..e0bfedf0d7823 100644 --- a/libcxx/include/__chrono/tzdb.h +++ b/libcxx/include/__chrono/tzdb.h @@ -16,6 +16,7 @@ // Enable the contents of the header only when libc++ was built with experimental features enabled. #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) +# include <__algorithm/ranges_lower_bound.h> # include <__chrono/leap_second.h> # include <__chrono/time_zone.h> # include <__chrono/time_zone_link.h> @@ -43,6 +44,40 @@ struct tzdb { vector links; vector leap_seconds; + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const time_zone* __locate_zone(string_view __name) const { + if (const time_zone* __result = __find_in_zone(__name)) + return __result; + + if (auto __it = ranges::lower_bound(links, __name, {}, &time_zone_link::name); + __it != links.end() && __it->name() == __name) + if (const time_zone* __result = __find_in_zone(__it->target())) + return __result; + + return nullptr; + } + + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI const time_zone* locate_zone(string_view __name) const { + if (const time_zone* __result = __locate_zone(__name)) + return __result; + + std::__throw_runtime_error("tzdb: requested time zone not found"); + } + + _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI const time_zone* current_zone() const { + return __current_zone(); + } + +private: + _LIBCPP_HIDE_FROM_ABI const time_zone* __find_in_zone(string_view __name) const noexcept { + if (auto __it = ranges::lower_bound(zones, __name, {}, &time_zone::name); + __it != zones.end() && __it->name() == __name) + return std::addressof(*__it); + + return nullptr; + } + + [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const time_zone* __current_zone() const; }; } // namespace chrono diff --git a/libcxx/include/__chrono/tzdb_list.h b/libcxx/include/__chrono/tzdb_list.h index e8aaf31e36316..693899d372112 100644 --- a/libcxx/include/__chrono/tzdb_list.h +++ b/libcxx/include/__chrono/tzdb_list.h @@ -17,6 +17,7 @@ #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) # include <__availability> +# include <__chrono/time_zone.h> # include <__chrono/tzdb.h> # include <__config> # include <__fwd/string.h> @@ -84,6 +85,15 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI inline con return get_tzdb_list().front(); } +_LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI inline const time_zone* +locate_zone(string_view __name) { + return get_tzdb().locate_zone(__name); +} + +_LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI inline const time_zone* current_zone() { + return get_tzdb().current_zone(); +} + _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb(); _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI string remote_version(); diff --git a/libcxx/include/__expected/bad_expected_access.h b/libcxx/include/__expected/bad_expected_access.h index 585b4ec9a053b..9d490307b6808 100644 --- a/libcxx/include/__expected/bad_expected_access.h +++ b/libcxx/include/__expected/bad_expected_access.h @@ -32,12 +32,12 @@ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wweak-vtables") template <> class bad_expected_access : public exception { protected: - _LIBCPP_HIDE_FROM_ABI bad_expected_access() noexcept = default; - _LIBCPP_HIDE_FROM_ABI bad_expected_access(const bad_expected_access&) = default; - _LIBCPP_HIDE_FROM_ABI bad_expected_access(bad_expected_access&&) = default; - _LIBCPP_HIDE_FROM_ABI bad_expected_access& operator=(const bad_expected_access&) = default; - _LIBCPP_HIDE_FROM_ABI bad_expected_access& operator=(bad_expected_access&&) = default; - _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~bad_expected_access() override = default; + _LIBCPP_HIDE_FROM_ABI bad_expected_access() noexcept = default; + _LIBCPP_HIDE_FROM_ABI bad_expected_access(const bad_expected_access&) noexcept = default; + _LIBCPP_HIDE_FROM_ABI bad_expected_access(bad_expected_access&&) noexcept = default; + _LIBCPP_HIDE_FROM_ABI bad_expected_access& operator=(const bad_expected_access&) noexcept = default; + _LIBCPP_HIDE_FROM_ABI bad_expected_access& operator=(bad_expected_access&&) noexcept = default; + _LIBCPP_HIDE_FROM_ABI_VIRTUAL ~bad_expected_access() override = default; public: // The way this has been designed (by using a class template below) means that we'll already diff --git a/libcxx/include/__format/escaped_output_table.h b/libcxx/include/__format/escaped_output_table.h index e9f4a6e4f63f5..b194f9431c3be 100644 --- a/libcxx/include/__format/escaped_output_table.h +++ b/libcxx/include/__format/escaped_output_table.h @@ -110,7 +110,7 @@ namespace __escaped_output_table { /// - bits [0, 10] The size of the range, allowing 2048 elements. /// - bits [11, 31] The lower bound code point of the range. The upper bound of /// the range is lower bound + size. -_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[893] = { +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[894] = { 0x00000020, 0x0003f821, 0x00056800, @@ -464,14 +464,14 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[893] = { 0x0174d000, 0x0177a00b, 0x017eb019, - 0x017fe004, + 0x01800000, 0x01815005, 0x01820000, 0x0184b803, 0x01880004, 0x01898000, 0x018c7800, - 0x018f200b, + 0x018f200a, 0x0190f800, 0x05246802, 0x05263808, @@ -1000,8 +1000,9 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[893] = { 0x15b9d005, 0x15c0f001, 0x1675100d, - 0x175f0fff, - 0x179f0c1e, + 0x175f080e, + 0x1772f7ff, + 0x17b2f1a1, 0x17d0f5e1, 0x189a5804}; diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h index 1d94cc349c0dd..fa42ba203b0b5 100644 --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -690,7 +690,7 @@ __format_floating_point(_Tp __value, _FormatContext& __ctx, __format_spec::__par // Let P equal the precision if nonzero, 6 if the precision is not // specified, or 1 if the precision is 0. Then, if a conversion with // style E would have an exponent of X: - int __p = std::max(1, (__specs.__has_precision() ? __specs.__precision_ : 6)); + int __p = std::max(1, (__specs.__has_precision() ? __specs.__precision_ : 6)); if (__result.__exponent == __result.__last) // if P > X >= -4, the conversion is with style f or F and precision P - 1 - X. // By including the radix point it calculates P - (1 + X) diff --git a/libcxx/include/__format/indic_conjunct_break_table.h b/libcxx/include/__format/indic_conjunct_break_table.h new file mode 100644 index 0000000000000..44521d27498c3 --- /dev/null +++ b/libcxx/include/__format/indic_conjunct_break_table.h @@ -0,0 +1,350 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utils/generate_indic_conjunct_break_table.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H +#define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H + +#include <__algorithm/ranges_upper_bound.h> +#include <__config> +#include <__iterator/access.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +namespace __indic_conjunct_break { + +enum class __property : uint8_t { + // Values generated from the data files. + __Consonant, + __Extend, + __Linker, + + // The code unit has none of above properties. + __none +}; + +/// The entries of the indic conjunct break property table. +/// +/// The data is generated from +/// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt +/// +/// The data has 3 values +/// - bits [0, 1] The property. One of the values generated from the datafiles +/// of \ref __property +/// - bits [2, 10] The size of the range. +/// - bits [11, 31] The lower bound code point of the range. The upper bound of +/// the range is lower bound + size. +/// +/// The 9 bits for the size allow a maximum range of 512 elements. Some ranges +/// in the Unicode tables are larger. They are stored in multiple consecutive +/// ranges in the data table. An alternative would be to store the sizes in a +/// separate 16-bit value. The original MSVC STL code had such an approach, but +/// this approach uses less space for the data and is about 4% faster in the +/// following benchmark. +/// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp +// clang-format off +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { + 0x00180139, + 0x001a807d, + 0x00241811, + 0x002c88b1, + 0x002df801, + 0x002e0805, + 0x002e2005, + 0x002e3801, + 0x00308029, + 0x00325851, + 0x00338001, + 0x0036b019, + 0x0036f815, + 0x00373805, + 0x0037500d, + 0x00388801, + 0x00398069, + 0x003f5821, + 0x003fe801, + 0x0040b00d, + 0x0040d821, + 0x00412809, + 0x00414811, + 0x0042c809, + 0x0044c01d, + 0x0046505d, + 0x00471871, + 0x0048a890, + 0x0049e001, + 0x004a6802, + 0x004a880d, + 0x004ac01c, + 0x004bc01c, + 0x004ca84c, + 0x004d5018, + 0x004d9000, + 0x004db00c, + 0x004de001, + 0x004e6802, + 0x004ee004, + 0x004ef800, + 0x004f8004, + 0x004ff001, + 0x0051e001, + 0x0054a84c, + 0x00555018, + 0x00559004, + 0x0055a810, + 0x0055e001, + 0x00566802, + 0x0057c800, + 0x0058a84c, + 0x00595018, + 0x00599004, + 0x0059a810, + 0x0059e001, + 0x005a6802, + 0x005ae004, + 0x005af800, + 0x005b8800, + 0x0060a84c, + 0x0061503c, + 0x0061e001, + 0x00626802, + 0x0062a805, + 0x0062c008, + 0x0065e001, + 0x0068a894, + 0x0069d805, + 0x006a6802, + 0x0071c009, + 0x0072400d, + 0x0075c009, + 0x0076400d, + 0x0078c005, + 0x0079a801, + 0x0079b801, + 0x0079c801, + 0x007b8805, + 0x007ba001, + 0x007bd00d, + 0x007c0001, + 0x007c1009, + 0x007c3005, + 0x007e3001, + 0x0081b801, + 0x0081c805, + 0x00846801, + 0x009ae809, + 0x00b8a001, + 0x00be9001, + 0x00bee801, + 0x00c54801, + 0x00c9c809, + 0x00d0b805, + 0x00d30001, + 0x00d3a81d, + 0x00d3f801, + 0x00d58035, + 0x00d5f83d, + 0x00d9a001, + 0x00db5821, + 0x00dd5801, + 0x00df3001, + 0x00e1b801, + 0x00e68009, + 0x00e6a031, + 0x00e71019, + 0x00e76801, + 0x00e7a001, + 0x00e7c005, + 0x00ee00fd, + 0x01006801, + 0x01068031, + 0x01070801, + 0x0107282d, + 0x01677809, + 0x016bf801, + 0x016f007d, + 0x01815015, + 0x0184c805, + 0x05337801, + 0x0533a025, + 0x0534f005, + 0x05378005, + 0x05416001, + 0x05470045, + 0x05495809, + 0x054d9801, + 0x05558001, + 0x05559009, + 0x0555b805, + 0x0555f005, + 0x05560801, + 0x0557b001, + 0x055f6801, + 0x07d8f001, + 0x07f1003d, + 0x080fe801, + 0x08170001, + 0x081bb011, + 0x08506801, + 0x08507801, + 0x0851c009, + 0x0851f801, + 0x08572805, + 0x0869200d, + 0x08755805, + 0x0877e809, + 0x087a3029, + 0x087c100d, + 0x08838001, + 0x0883f801, + 0x0885d001, + 0x08880009, + 0x08899805, + 0x088b9801, + 0x088e5001, + 0x0891b001, + 0x08974805, + 0x0899d805, + 0x089b3019, + 0x089b8011, + 0x08a23001, + 0x08a2f001, + 0x08a61801, + 0x08ae0001, + 0x08b5b801, + 0x08b95801, + 0x08c1d001, + 0x08c9f001, + 0x08ca1801, + 0x08d1a001, + 0x08d23801, + 0x08d4c801, + 0x08ea1001, + 0x08ea2005, + 0x08ecb801, + 0x08fa1001, + 0x0b578011, + 0x0b598019, + 0x0de4f001, + 0x0e8b2801, + 0x0e8b3809, + 0x0e8b7011, + 0x0e8bd81d, + 0x0e8c2819, + 0x0e8d500d, + 0x0e921009, + 0x0f000019, + 0x0f004041, + 0x0f00d819, + 0x0f011805, + 0x0f013011, + 0x0f047801, + 0x0f098019, + 0x0f157001, + 0x0f17600d, + 0x0f27600d, + 0x0f468019, + 0x0f4a2019}; +// clang-format on + +/// Returns the indic conjuct break property of a code point. +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept { + // The algorithm searches for the upper bound of the range and, when found, + // steps back one entry. This algorithm is used since the code point can be + // anywhere in the range. After a lower bound is found the next step is to + // compare whether the code unit is indeed in the range. + // + // Since the entry contains a code unit, size, and property the code point + // being sought needs to be adjusted. Just shifting the code point to the + // proper position doesn't work; suppose an entry has property 0, size 1, + // and lower bound 3. This results in the entry 0x1810. + // When searching for code point 3 it will search for 0x1800, find 0x1810 + // and moves to the previous entry. Thus the lower bound value will never + // be found. + // The simple solution is to set the bits belonging to the property and + // size. Then the upper bound for code point 3 will return the entry after + // 0x1810. After moving to the previous entry the algorithm arrives at the + // correct entry. + ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; + if (__i == 0) + return __property::__none; + + --__i; + uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111); + if (__code_point <= __upper_bound) + return static_cast<__property>(__entries[__i] & 0b11); + + return __property::__none; +} + +} // namespace __indic_conjunct_break + +#endif //_LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H diff --git a/libcxx/include/__format/unicode.h b/libcxx/include/__format/unicode.h index 40067ca3448bb..de7d0fea1df56 100644 --- a/libcxx/include/__format/unicode.h +++ b/libcxx/include/__format/unicode.h @@ -15,8 +15,10 @@ #include <__concepts/same_as.h> #include <__config> #include <__format/extended_grapheme_cluster_table.h> +#include <__format/indic_conjunct_break_table.h> #include <__iterator/concepts.h> #include <__iterator/readable_traits.h> // iter_value_t +#include <__utility/unreachable.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -292,84 +294,231 @@ class __code_point_view { }; # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_HIDE_FROM_ABI constexpr bool __at_extended_grapheme_cluster_break( - bool& __ri_break_allowed, - bool __has_extened_pictographic, - __extended_grapheme_custer_property_boundary::__property __prev, - __extended_grapheme_custer_property_boundary::__property __next) { - using __extended_grapheme_custer_property_boundary::__property; +// State machine to implement the Extended Grapheme Cluster Boundary +// +// The exact rules may change between Unicode versions. +// This implements the extended rules see +// https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries +class __extended_grapheme_cluster_break { + using __EGC_property = __extended_grapheme_custer_property_boundary::__property; + using __inCB_property = __indic_conjunct_break::__property; - __has_extened_pictographic |= __prev == __property::__Extended_Pictographic; +public: + _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_break(char32_t __first_code_point) + : __prev_code_point_(__first_code_point), + __prev_property_(__extended_grapheme_custer_property_boundary::__get_property(__first_code_point)) { + // Initializes the active rule. + if (__prev_property_ == __EGC_property::__Extended_Pictographic) + __active_rule_ = __rule::__GB11_emoji; + else if (__prev_property_ == __EGC_property::__Regional_Indicator) + __active_rule_ = __rule::__GB12_GB13_regional_indicator; + else if (__indic_conjunct_break::__get_property(__first_code_point) == __inCB_property::__Consonant) + __active_rule_ = __rule::__GB9c_indic_conjunct_break; + } - // https://www.unicode.org/reports/tr29/tr29-39.html#Grapheme_Cluster_Boundary_Rules + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(char32_t __next_code_point) { + __EGC_property __next_property = __extended_grapheme_custer_property_boundary::__get_property(__next_code_point); + bool __result = __evaluate(__next_code_point, __next_property); + __prev_code_point_ = __next_code_point; + __prev_property_ = __next_property; + return __result; + } - // *** Break at the start and end of text, unless the text is empty. *** + // The code point whose break propery are considered during the next + // evaluation cyle. + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __current_code_point() const { return __prev_code_point_; } - _LIBCPP_ASSERT_INTERNAL(__prev != __property::__sot, "should be handled in the constructor"); // GB1 - _LIBCPP_ASSERT_INTERNAL(__prev != __property::__eot, "should be handled by our caller"); // GB2 +private: + // The naming of the identifiers matches the Unicode standard. + // NOLINTBEGIN(readability-identifier-naming) + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool + __evaluate(char32_t __next_code_point, __EGC_property __next_property) { + switch (__active_rule_) { + case __rule::__none: + return __evaluate_none(__next_code_point, __next_property); + case __rule::__GB9c_indic_conjunct_break: + return __evaluate_GB9c_indic_conjunct_break(__next_code_point, __next_property); + case __rule::__GB11_emoji: + return __evaluate_GB11_emoji(__next_code_point, __next_property); + case __rule::__GB12_GB13_regional_indicator: + return __evaluate_GB12_GB13_regional_indicator(__next_code_point, __next_property); + } + __libcpp_unreachable(); + } - // *** Do not break between a CR and LF. Otherwise, break before and after controls. *** - if (__prev == __property::__CR && __next == __property::__LF) // GB3 - return false; + _LIBCPP_HIDE_FROM_ABI constexpr bool __evaluate_none(char32_t __next_code_point, __EGC_property __next_property) { + // *** Break at the start and end of text, unless the text is empty. *** - if (__prev == __property::__Control || __prev == __property::__CR || __prev == __property::__LF) // GB4 - return true; + _LIBCPP_ASSERT_INTERNAL(__prev_property_ != __EGC_property::__sot, "should be handled in the constructor"); // GB1 + _LIBCPP_ASSERT_INTERNAL(__prev_property_ != __EGC_property::__eot, "should be handled by our caller"); // GB2 - if (__next == __property::__Control || __next == __property::__CR || __next == __property::__LF) // GB5 - return true; + // *** Do not break between a CR and LF. Otherwise, break before and after controls. *** + if (__prev_property_ == __EGC_property::__CR && __next_property == __EGC_property::__LF) // GB3 + return false; - // *** Do not break Hangul syllable sequences. *** - if (__prev == __property::__L && (__next == __property::__L || __next == __property::__V || - __next == __property::__LV || __next == __property::__LVT)) // GB6 - return false; + if (__prev_property_ == __EGC_property::__Control || __prev_property_ == __EGC_property::__CR || + __prev_property_ == __EGC_property::__LF) // GB4 + return true; - if ((__prev == __property::__LV || __prev == __property::__V) && - (__next == __property::__V || __next == __property::__T)) // GB7 - return false; + if (__next_property == __EGC_property::__Control || __next_property == __EGC_property::__CR || + __next_property == __EGC_property::__LF) // GB5 + return true; - if ((__prev == __property::__LVT || __prev == __property::__T) && __next == __property::__T) // GB8 - return false; + // *** Do not break Hangul syllable sequences. *** + if (__prev_property_ == __EGC_property::__L && + (__next_property == __EGC_property::__L || __next_property == __EGC_property::__V || + __next_property == __EGC_property::__LV || __next_property == __EGC_property::__LVT)) // GB6 + return false; - // *** Do not break before extending characters or ZWJ. *** - if (__next == __property::__Extend || __next == __property::__ZWJ) - return false; // GB9 + if ((__prev_property_ == __EGC_property::__LV || __prev_property_ == __EGC_property::__V) && + (__next_property == __EGC_property::__V || __next_property == __EGC_property::__T)) // GB7 + return false; - // *** Do not break before SpacingMarks, or after Prepend characters. *** - if (__next == __property::__SpacingMark) // GB9a - return false; + if ((__prev_property_ == __EGC_property::__LVT || __prev_property_ == __EGC_property::__T) && + __next_property == __EGC_property::__T) // GB8 + return false; - if (__prev == __property::__Prepend) // GB9b - return false; + // *** Do not break before extending characters or ZWJ. *** + if (__next_property == __EGC_property::__Extend || __next_property == __EGC_property::__ZWJ) + return false; // GB9 - // *** Do not break within emoji modifier sequences or emoji zwj sequences. *** + // *** Do not break before SpacingMarks, or after Prepend characters. *** + if (__next_property == __EGC_property::__SpacingMark) // GB9a + return false; - // GB11 \p{Extended_Pictographic} Extend* ZWJ x \p{Extended_Pictographic} - // - // Note that several parts of this rule are matched by GB9: Any x (Extend | ZWJ) - // - \p{Extended_Pictographic} x Extend - // - Extend x Extend - // - \p{Extended_Pictographic} x ZWJ - // - Extend x ZWJ - // - // So the only case left to test is - // - \p{Extended_Pictographic}' x ZWJ x \p{Extended_Pictographic} - // where \p{Extended_Pictographic}' is stored in __has_extened_pictographic - if (__has_extened_pictographic && __prev == __property::__ZWJ && __next == __property::__Extended_Pictographic) - return false; + if (__prev_property_ == __EGC_property::__Prepend) // GB9b + return false; - // *** Do not break within emoji flag sequences *** + // *** Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker. *** + if (__indic_conjunct_break::__get_property(__next_code_point) == __inCB_property::__Consonant) { + __active_rule_ = __rule::__GB9c_indic_conjunct_break; + __GB9c_indic_conjunct_break_state_ = __GB9c_indic_conjunct_break_state::__Consonant; + return true; + } + + // *** Do not break within emoji modifier sequences or emoji zwj sequences. *** + if (__next_property == __EGC_property::__Extended_Pictographic) { + __active_rule_ = __rule::__GB11_emoji; + __GB11_emoji_state_ = __GB11_emoji_state::__Extended_Pictographic; + return true; + } + + // *** Do not break within emoji flag sequences *** - // That is, do not break between regional indicator (RI) symbols if there - // is an odd number of RI characters before the break point. + // That is, do not break between regional indicator (RI) symbols if there + // is an odd number of RI characters before the break point. + if (__next_property == __EGC_property::__Regional_Indicator) { // GB12 + GB13 + __active_rule_ = __rule::__GB12_GB13_regional_indicator; + return true; + } - if (__prev == __property::__Regional_Indicator && __next == __property::__Regional_Indicator) { // GB12 + GB13 - __ri_break_allowed = !__ri_break_allowed; - return __ri_break_allowed; + // *** Otherwise, break everywhere. *** + return true; // GB999 } - // *** Otherwise, break everywhere. *** - return true; // GB999 -} + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool + __evaluate_GB9c_indic_conjunct_break(char32_t __next_code_point, __EGC_property __next_property) { + __inCB_property __break = __indic_conjunct_break::__get_property(__next_code_point); + if (__break == __inCB_property::__none) { + __active_rule_ = __rule::__none; + return __evaluate_none(__next_code_point, __next_property); + } + + switch (__GB9c_indic_conjunct_break_state_) { + case __GB9c_indic_conjunct_break_state::__Consonant: + if (__break == __inCB_property::__Extend) { + return false; + } + if (__break == __inCB_property::__Linker) { + __GB9c_indic_conjunct_break_state_ = __GB9c_indic_conjunct_break_state::__Linker; + return false; + } + __active_rule_ = __rule::__none; + return __evaluate_none(__next_code_point, __next_property); + + case __GB9c_indic_conjunct_break_state::__Linker: + if (__break == __inCB_property::__Extend) { + return false; + } + if (__break == __inCB_property::__Linker) { + return false; + } + if (__break == __inCB_property::__Consonant) { + __GB9c_indic_conjunct_break_state_ = __GB9c_indic_conjunct_break_state::__Consonant; + return false; + } + __active_rule_ = __rule::__none; + return __evaluate_none(__next_code_point, __next_property); + } + __libcpp_unreachable(); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool + __evaluate_GB11_emoji(char32_t __next_code_point, __EGC_property __next_property) { + switch (__GB11_emoji_state_) { + case __GB11_emoji_state::__Extended_Pictographic: + if (__next_property == __EGC_property::__Extend) { + __GB11_emoji_state_ = __GB11_emoji_state::__Extend; + return false; + } + [[fallthrough]]; + case __GB11_emoji_state::__Extend: + if (__next_property == __EGC_property::__ZWJ) { + __GB11_emoji_state_ = __GB11_emoji_state::__ZWJ; + return false; + } + if (__next_property == __EGC_property::__Extend) + return false; + __active_rule_ = __rule::__none; + return __evaluate_none(__next_code_point, __next_property); + + case __GB11_emoji_state::__ZWJ: + if (__next_property == __EGC_property::__Extended_Pictographic) { + __GB11_emoji_state_ = __GB11_emoji_state::__Extended_Pictographic; + return false; + } + __active_rule_ = __rule::__none; + return __evaluate_none(__next_code_point, __next_property); + } + __libcpp_unreachable(); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool + __evaluate_GB12_GB13_regional_indicator(char32_t __next_code_point, __EGC_property __next_property) { + __active_rule_ = __rule::__none; + if (__next_property == __EGC_property::__Regional_Indicator) + return false; + return __evaluate_none(__next_code_point, __next_property); + } + + char32_t __prev_code_point_; + __EGC_property __prev_property_; + + enum class __rule { + __none, + __GB9c_indic_conjunct_break, + __GB11_emoji, + __GB12_GB13_regional_indicator, + }; + __rule __active_rule_ = __rule::__none; + + enum class __GB11_emoji_state { + __Extended_Pictographic, + __Extend, + __ZWJ, + }; + __GB11_emoji_state __GB11_emoji_state_ = __GB11_emoji_state::__Extended_Pictographic; + + enum class __GB9c_indic_conjunct_break_state { + __Consonant, + __Linker, + }; + + __GB9c_indic_conjunct_break_state __GB9c_indic_conjunct_break_state_ = __GB9c_indic_conjunct_break_state::__Consonant; + + // NOLINTEND(readability-identifier-naming) +}; /// Helper class to extract an extended grapheme cluster from a Unicode character range. /// @@ -382,9 +531,7 @@ class __extended_grapheme_cluster_view { public: _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_view(_Iterator __first, _Iterator __last) - : __code_point_view_(__first, __last), - __next_code_point_(__code_point_view_.__consume().__code_point), - __next_prop_(__extended_grapheme_custer_property_boundary::__get_property(__next_code_point_)) {} + : __code_point_view_(__first, __last), __at_break_(__code_point_view_.__consume().__code_point) {} struct __cluster { /// The first code point of the extended grapheme cluster. @@ -400,44 +547,20 @@ class __extended_grapheme_cluster_view { _Iterator __last_; }; - _LIBCPP_HIDE_FROM_ABI constexpr __cluster __consume() { - _LIBCPP_ASSERT_INTERNAL(__next_prop_ != __extended_grapheme_custer_property_boundary::__property::__eot, - "can't move beyond the end of input"); - - char32_t __code_point = __next_code_point_; - if (!__code_point_view_.__at_end()) - return {__code_point, __get_break()}; - - __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot; - return {__code_point, __code_point_view_.__position()}; + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __cluster __consume() { + char32_t __code_point = __at_break_.__current_code_point(); + _Iterator __position = __code_point_view_.__position(); + while (!__code_point_view_.__at_end()) { + if (__at_break_(__code_point_view_.__consume().__code_point)) + break; + __position = __code_point_view_.__position(); + } + return {__code_point, __position}; } private: __code_point_view<_CharT> __code_point_view_; - - char32_t __next_code_point_; - __extended_grapheme_custer_property_boundary::__property __next_prop_; - - _LIBCPP_HIDE_FROM_ABI constexpr _Iterator __get_break() { - bool __ri_break_allowed = true; - bool __has_extened_pictographic = false; - while (true) { - _Iterator __result = __code_point_view_.__position(); - __extended_grapheme_custer_property_boundary::__property __prev = __next_prop_; - if (__code_point_view_.__at_end()) { - __next_prop_ = __extended_grapheme_custer_property_boundary::__property::__eot; - return __result; - } - __next_code_point_ = __code_point_view_.__consume().__code_point; - __next_prop_ = __extended_grapheme_custer_property_boundary::__get_property(__next_code_point_); - - __has_extened_pictographic |= - __prev == __extended_grapheme_custer_property_boundary::__property::__Extended_Pictographic; - - if (__at_extended_grapheme_cluster_break(__ri_break_allowed, __has_extened_pictographic, __prev, __next_prop_)) - return __result; - } - } + __extended_grapheme_cluster_break __at_break_; }; template diff --git a/libcxx/include/__format/width_estimation_table.h b/libcxx/include/__format/width_estimation_table.h index 6309483367f13..c9a9f6719c610 100644 --- a/libcxx/include/__format/width_estimation_table.h +++ b/libcxx/include/__format/width_estimation_table.h @@ -119,7 +119,7 @@ namespace __width_estimation_table { /// - bits [0, 13] The size of the range, allowing 16384 elements. /// - bits [14, 31] The lower bound code point of the range. The upper bound of /// the range is lower bound + size. -_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[108] = { +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[107] = { 0x0440005f /* 00001100 - 0000115f [ 96] */, // 0x08c68001 /* 0000231a - 0000231b [ 2] */, // 0x08ca4001 /* 00002329 - 0000232a [ 2] */, // @@ -158,14 +158,13 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[108] = { 0x0ba00019 /* 00002e80 - 00002e99 [ 26] */, // 0x0ba6c058 /* 00002e9b - 00002ef3 [ 89] */, // 0x0bc000d5 /* 00002f00 - 00002fd5 [ 214] */, // - 0x0bfc000b /* 00002ff0 - 00002ffb [ 12] */, // - 0x0c00003e /* 00003000 - 0000303e [ 63] */, // + 0x0bfc004e /* 00002ff0 - 0000303e [ 79] */, // 0x0c104055 /* 00003041 - 00003096 [ 86] */, // 0x0c264066 /* 00003099 - 000030ff [ 103] */, // 0x0c41402a /* 00003105 - 0000312f [ 43] */, // 0x0c4c405d /* 00003131 - 0000318e [ 94] */, // 0x0c640053 /* 00003190 - 000031e3 [ 84] */, // - 0x0c7c002e /* 000031f0 - 0000321e [ 47] */, // + 0x0c7bc02f /* 000031ef - 0000321e [ 48] */, // 0x0c880027 /* 00003220 - 00003247 [ 40] */, // 0x0c943fff /* 00003250 - 0000724f [16384] */, // 0x1c94323c /* 00007250 - 0000a48c [12861] */, // diff --git a/libcxx/include/__functional/bind_back.h b/libcxx/include/__functional/bind_back.h index ce26d3b70630f..3c42d4769e8a9 100644 --- a/libcxx/include/__functional/bind_back.h +++ b/libcxx/include/__functional/bind_back.h @@ -62,6 +62,20 @@ _LIBCPP_HIDE_FROM_ABI constexpr auto __bind_back(_Fn&& __f, _Args&&... __args) n std::forward<_Fn>(__f), std::forward_as_tuple(std::forward<_Args>(__args)...)); } +# if _LIBCPP_STD_VER >= 23 +template +_LIBCPP_HIDE_FROM_ABI constexpr auto bind_back(_Fn&& __f, _Args&&... __args) { + static_assert(is_constructible_v, _Fn>, "bind_back requires decay_t to be constructible from F"); + static_assert(is_move_constructible_v>, "bind_back requires decay_t to be move constructible"); + static_assert((is_constructible_v, _Args> && ...), + "bind_back requires all decay_t to be constructible from respective Args"); + static_assert((is_move_constructible_v> && ...), + "bind_back requires all decay_t to be move constructible"); + return __bind_back_t, tuple...>>( + std::forward<_Fn>(__f), std::forward_as_tuple(std::forward<_Args>(__args)...)); +} +# endif // _LIBCPP_STD_VER >= 23 + #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h index 7ddc00650f162..240f127e54255 100644 --- a/libcxx/include/__functional/operations.h +++ b/libcxx/include/__functional/operations.h @@ -13,8 +13,7 @@ #include <__config> #include <__functional/binary_function.h> #include <__functional/unary_function.h> -#include <__type_traits/integral_constant.h> -#include <__type_traits/operation_traits.h> +#include <__type_traits/desugars_to.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -41,10 +40,10 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(plus); // The non-transparent std::plus specialization is only equivalent to a raw plus // operator when we don't perform an implicit conversion when calling it. template -struct __desugars_to<__plus_tag, plus<_Tp>, _Tp, _Tp> : true_type {}; +inline const bool __desugars_to_v<__plus_tag, plus<_Tp>, _Tp, _Tp> = true; template -struct __desugars_to<__plus_tag, plus, _Tp, _Up> : true_type {}; +inline const bool __desugars_to_v<__plus_tag, plus, _Tp, _Up> = true; #if _LIBCPP_STD_VER >= 14 template <> @@ -315,11 +314,11 @@ struct _LIBCPP_TEMPLATE_VIS equal_to { // The non-transparent std::equal_to specialization is only equivalent to a raw equality // comparison when we don't perform an implicit conversion when calling it. template -struct __desugars_to<__equal_tag, equal_to<_Tp>, _Tp, _Tp> : true_type {}; +inline const bool __desugars_to_v<__equal_tag, equal_to<_Tp>, _Tp, _Tp> = true; // In the transparent case, we do not enforce that template -struct __desugars_to<__equal_tag, equal_to, _Tp, _Up> : true_type {}; +inline const bool __desugars_to_v<__equal_tag, equal_to, _Tp, _Up> = true; #if _LIBCPP_STD_VER >= 14 template @@ -360,6 +359,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less); +template +inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true; + #if _LIBCPP_STD_VER >= 14 template <> struct _LIBCPP_TEMPLATE_VIS less { @@ -371,6 +373,9 @@ struct _LIBCPP_TEMPLATE_VIS less { } typedef void is_transparent; }; + +template +inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Tp> = true; #endif #if _LIBCPP_STD_VER >= 14 diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index 38b28018049eb..27f06eadd0eb1 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -13,8 +13,7 @@ #include <__concepts/equality_comparable.h> #include <__concepts/totally_ordered.h> #include <__config> -#include <__type_traits/integral_constant.h> -#include <__type_traits/operation_traits.h> +#include <__type_traits/desugars_to.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -98,7 +97,10 @@ struct greater_equal { // For ranges we do not require that the types on each side of the equality // operator are of the same type template -struct __desugars_to<__equal_tag, ranges::equal_to, _Tp, _Up> : true_type {}; +inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = true; + +template +inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true; #endif // _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__memory/construct_at.h b/libcxx/include/__memory/construct_at.h index 91d17134db014..eb02132480064 100644 --- a/libcxx/include/__memory/construct_at.h +++ b/libcxx/include/__memory/construct_at.h @@ -44,7 +44,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* __location, _Args&&... __ #endif template ()) _Tp(std::declval<_Args>()...))> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp* __construct_at(_Tp* __location, _Args&&... __args) { +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* __construct_at(_Tp* __location, _Args&&... __args) { #if _LIBCPP_STD_VER >= 20 return std::construct_at(__location, std::forward<_Args>(__args)...); #else diff --git a/libcxx/include/__numeric/pstl_transform_reduce.h b/libcxx/include/__numeric/pstl_transform_reduce.h index 2f412d41f7f27..07ecf0d9956bb 100644 --- a/libcxx/include/__numeric/pstl_transform_reduce.h +++ b/libcxx/include/__numeric/pstl_transform_reduce.h @@ -87,7 +87,7 @@ _LIBCPP_HIDE_FROM_ABI _Tp transform_reduce( } // This overload doesn't get a customization point because it's trivial to detect (through e.g. -// __desugars_to) when specializing the more general variant, which should always be preferred +// __desugars_to_v) when specializing the more general variant, which should always be preferred template -#include <__type_traits/integral_constant.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -21,6 +20,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // Tags to represent the canonical operations struct __equal_tag {}; struct __plus_tag {}; +struct __less_tag {}; // This class template is used to determine whether an operation "desugars" // (or boils down) to a given canonical operation. @@ -33,8 +33,8 @@ struct __plus_tag {}; // predicate being passed is actually going to call a builtin operator, or has // some specific semantics. template -struct __desugars_to : false_type {}; +inline const bool __desugars_to_v = false; _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___TYPE_TRAITS_OPERATION_TRAITS_H +#endif // _LIBCPP___TYPE_TRAITS_DESUGARS_TO_H diff --git a/libcxx/include/chrono b/libcxx/include/chrono index 4dd43137b7182..5eddd050196de 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -689,6 +689,9 @@ struct tzdb { vector zones; vector links; vector leap_seconds; + + const time_zone* locate_zone(string_view tz_name) const; + const time_zone* current_zone() const; }; class tzdb_list { // C++20 @@ -714,11 +717,22 @@ public: // [time.zone.db.access], time zone database access const tzdb& get_tzdb(); // C++20 tzdb_list& get_tzdb_list(); // C++20 +const time_zone* locate_zone(string_view tz_name); // C++20 +const time_zone* current_zone() // C++20 // [time.zone.db.remote], remote time zone database support const tzdb& reload_tzdb(); // C++20 string remote_version(); // C++20 +// [time.zone.info], information classes +struct sys_info { // C++20 + sys_seconds begin; + sys_seconds end; + seconds offset; + minutes save; + string abbrev; +}; + // 25.10.5, class time_zone // C++20 enum class choose {earliest, latest}; class time_zone { @@ -728,6 +742,9 @@ class time_zone { // unspecified additional constructors string_view name() const noexcept; + + template + sys_info get_info(const sys_time& st) const; }; bool operator==(const time_zone& x, const time_zone& y) noexcept; // C++20 strong_ordering operator<=>(const time_zone& x, const time_zone& y) noexcept; // C++20 @@ -876,6 +893,7 @@ constexpr chrono::year operator ""y(unsigned lo #include <__chrono/month_weekday.h> #include <__chrono/monthday.h> #include <__chrono/steady_clock.h> +#include <__chrono/sys_info.h> #include <__chrono/system_clock.h> #include <__chrono/time_point.h> #include <__chrono/weekday.h> diff --git a/libcxx/include/fstream b/libcxx/include/fstream index 7a084d114b185..7128f72e16119 100644 --- a/libcxx/include/fstream +++ b/libcxx/include/fstream @@ -78,8 +78,8 @@ public: basic_ifstream(); explicit basic_ifstream(const char* s, ios_base::openmode mode = ios_base::in); explicit basic_ifstream(const string& s, ios_base::openmode mode = ios_base::in); - explicit basic_ifstream(const filesystem::path& p, - ios_base::openmode mode = ios_base::in); // C++17 + template + explicit basic_ifstream(const T& s, ios_base::openmode mode = ios_base::in); // Since C++17 basic_ifstream(basic_ifstream&& rhs); basic_ifstream& operator=(basic_ifstream&& rhs); @@ -117,8 +117,8 @@ public: basic_ofstream(); explicit basic_ofstream(const char* s, ios_base::openmode mode = ios_base::out); explicit basic_ofstream(const string& s, ios_base::openmode mode = ios_base::out); - explicit basic_ofstream(const filesystem::path& p, - ios_base::openmode mode = ios_base::out); // C++17 + template + explicit basic_ofstream(const T& s, ios_base::openmode mode = ios_base::out); // Since C++17 basic_ofstream(basic_ofstream&& rhs); basic_ofstream& operator=(basic_ofstream&& rhs); @@ -158,8 +158,8 @@ public: basic_fstream(); explicit basic_fstream(const char* s, ios_base::openmode mode = ios_base::in|ios_base::out); explicit basic_fstream(const string& s, ios_base::openmode mode = ios_base::in|ios_base::out); - explicit basic_fstream(const filesystem::path& p, - ios_base::openmode mode = ios_base::in|ios_base::out); C++17 + template + explicit basic_fstream(const T& s, ios_base::openmode mode = ios_base::in | ios_base::out); // Since C++17 basic_fstream(basic_fstream&& rhs); basic_fstream& operator=(basic_fstream&& rhs); @@ -192,6 +192,8 @@ typedef basic_fstream wfstream; #include <__config> #include <__fwd/fstream.h> #include <__locale> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_same.h> #include <__utility/move.h> #include <__utility/swap.h> #include <__utility/unreachable.h> @@ -1101,8 +1103,9 @@ public: # endif _LIBCPP_HIDE_FROM_ABI explicit basic_ifstream(const string& __s, ios_base::openmode __mode = ios_base::in); # if _LIBCPP_STD_VER >= 17 + template >> _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_HIDE_FROM_ABI explicit basic_ifstream( - const filesystem::path& __p, ios_base::openmode __mode = ios_base::in) + const _Tp& __p, ios_base::openmode __mode = ios_base::in) : basic_ifstream(__p.c_str(), __mode) {} # endif // _LIBCPP_STD_VER >= 17 _LIBCPP_HIDE_FROM_ABI basic_ifstream(basic_ifstream&& __rhs); @@ -1255,8 +1258,9 @@ public: _LIBCPP_HIDE_FROM_ABI explicit basic_ofstream(const string& __s, ios_base::openmode __mode = ios_base::out); # if _LIBCPP_STD_VER >= 17 + template >> _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_HIDE_FROM_ABI explicit basic_ofstream( - const filesystem::path& __p, ios_base::openmode __mode = ios_base::out) + const _Tp& __p, ios_base::openmode __mode = ios_base::out) : basic_ofstream(__p.c_str(), __mode) {} # endif // _LIBCPP_STD_VER >= 17 @@ -1414,8 +1418,9 @@ public: ios_base::openmode __mode = ios_base::in | ios_base::out); # if _LIBCPP_STD_VER >= 17 + template >> _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_HIDE_FROM_ABI explicit basic_fstream( - const filesystem::path& __p, ios_base::openmode __mode = ios_base::in | ios_base::out) + const _Tp& __p, ios_base::openmode __mode = ios_base::in | ios_base::out) : basic_fstream(__p.c_str(), __mode) {} # endif // _LIBCPP_STD_VER >= 17 diff --git a/libcxx/include/functional b/libcxx/include/functional index a2774a48bda0e..a2476c93ad1b4 100644 --- a/libcxx/include/functional +++ b/libcxx/include/functional @@ -207,6 +207,12 @@ binary_negate not2(const Predicate& pred); template constexpr unspecified not_fn(F&& f); // C++17, constexpr in C++20 +// [func.bind.partial], function templates bind_front and bind_back +template + constexpr unspecified bind_front(F&&, Args&&...); // C++20 +template + constexpr unspecified bind_back(F&&, Args&&...); // C++23 + template struct is_bind_expression; template struct is_placeholder; diff --git a/libcxx/include/libcxx.imp b/libcxx/include/libcxx.imp index 2cb1fa5e1e2aa..6c77ba8343c60 100644 --- a/libcxx/include/libcxx.imp +++ b/libcxx/include/libcxx.imp @@ -288,6 +288,7 @@ { include: [ "<__chrono/parser_std_format_spec.h>", "private", "", "public" ] }, { include: [ "<__chrono/statically_widen.h>", "private", "", "public" ] }, { include: [ "<__chrono/steady_clock.h>", "private", "", "public" ] }, + { include: [ "<__chrono/sys_info.h>", "private", "", "public" ] }, { include: [ "<__chrono/system_clock.h>", "private", "", "public" ] }, { include: [ "<__chrono/time_point.h>", "private", "", "public" ] }, { include: [ "<__chrono/time_zone.h>", "private", "", "public" ] }, @@ -389,6 +390,7 @@ { include: [ "<__format/formatter_pointer.h>", "private", "", "public" ] }, { include: [ "<__format/formatter_string.h>", "private", "", "public" ] }, { include: [ "<__format/formatter_tuple.h>", "private", "", "public" ] }, + { include: [ "<__format/indic_conjunct_break_table.h>", "private", "", "public" ] }, { include: [ "<__format/parser_std_format_spec.h>", "private", "", "public" ] }, { include: [ "<__format/range_default_formatter.h>", "private", "", "public" ] }, { include: [ "<__format/range_formatter.h>", "private", "", "public" ] }, @@ -734,6 +736,7 @@ { include: [ "<__type_traits/datasizeof.h>", "private", "", "public" ] }, { include: [ "<__type_traits/decay.h>", "private", "", "public" ] }, { include: [ "<__type_traits/dependent_type.h>", "private", "", "public" ] }, + { include: [ "<__type_traits/desugars_to.h>", "private", "", "public" ] }, { include: [ "<__type_traits/disjunction.h>", "private", "", "public" ] }, { include: [ "<__type_traits/enable_if.h>", "private", "", "public" ] }, { include: [ "<__type_traits/extent.h>", "private", "", "public" ] }, @@ -818,7 +821,6 @@ { include: [ "<__type_traits/nat.h>", "private", "", "public" ] }, { include: [ "<__type_traits/negation.h>", "private", "", "public" ] }, { include: [ "<__type_traits/noexcept_move_assign_container.h>", "private", "", "public" ] }, - { include: [ "<__type_traits/operation_traits.h>", "private", "", "public" ] }, { include: [ "<__type_traits/promote.h>", "private", "", "public" ] }, { include: [ "<__type_traits/rank.h>", "private", "", "public" ] }, { include: [ "<__type_traits/remove_all_extents.h>", "private", "", "public" ] }, diff --git a/libcxx/include/locale b/libcxx/include/locale index e3c63e3abe130..748b276a85255 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -84,7 +84,7 @@ template charT tolower(charT c, const locale& loc); template, class Byte_alloc = allocator> -class wstring_convert +class wstring_convert // Removed in C++26 { public: typedef basic_string, Byte_alloc> byte_string; @@ -119,7 +119,7 @@ public: }; template > -class wbuffer_convert +class wbuffer_convert // Removed in C++26 : public basic_streambuf { public: @@ -3107,6 +3107,8 @@ extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname; extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS messages_byname; #endif +#if _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT) + template , @@ -3712,6 +3714,8 @@ wbuffer_convert<_Codecvt, _Elem, _Tr>* wbuffer_convert<_Codecvt, _Elem, _Tr>::__ _LIBCPP_SUPPRESS_DEPRECATED_POP +#endif // _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT) + _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 6d4dcc2511f3e..011a4818ab9d2 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1167,6 +1167,9 @@ module std_private_chrono_time_zone [system] { module std_private_chrono_time_zone_link [system] { header "__chrono/time_zone_link.h" } +module std_private_chrono_sys_info [system] { + header "__chrono/sys_info.h" +} module std_private_chrono_system_clock [system] { header "__chrono/system_clock.h" export std_private_chrono_time_point @@ -1339,12 +1342,14 @@ module std_private_format_formatter_output [system] { header "__f module std_private_format_formatter_pointer [system] { header "__format/formatter_pointer.h" } module std_private_format_formatter_string [system] { header "__format/formatter_string.h" } module std_private_format_formatter_tuple [system] { header "__format/formatter_tuple.h" } +module std_private_format_indic_conjunct_break_table [system] { header "__format/indic_conjunct_break_table.h" } module std_private_format_parser_std_format_spec [system] { header "__format/parser_std_format_spec.h" } module std_private_format_range_default_formatter [system] { header "__format/range_default_formatter.h" } module std_private_format_range_formatter [system] { header "__format/range_formatter.h" } module std_private_format_unicode [system] { header "__format/unicode.h" export std_private_format_extended_grapheme_cluster_table + export std_private_format_indic_conjunct_break_table } module std_private_format_width_estimation_table [system] { header "__format/width_estimation_table.h" } module std_private_format_write_escaped [system] { header "__format/write_escaped.h" } @@ -1867,6 +1872,7 @@ module std_private_type_traits_decay [system export std_private_type_traits_add_pointer } module std_private_type_traits_dependent_type [system] { header "__type_traits/dependent_type.h" } +module std_private_type_traits_desugars_to [system] { header "__type_traits/desugars_to.h" } module std_private_type_traits_disjunction [system] { header "__type_traits/disjunction.h" } module std_private_type_traits_enable_if [system] { header "__type_traits/enable_if.h" } module std_private_type_traits_extent [system] { header "__type_traits/extent.h" } @@ -2017,7 +2023,6 @@ module std_private_type_traits_maybe_const [system module std_private_type_traits_nat [system] { header "__type_traits/nat.h" } module std_private_type_traits_negation [system] { header "__type_traits/negation.h" } module std_private_type_traits_noexcept_move_assign_container [system] { header "__type_traits/noexcept_move_assign_container.h" } -module std_private_type_traits_operation_traits [system] { header "__type_traits/operation_traits.h" } module std_private_type_traits_promote [system] { header "__type_traits/promote.h" } module std_private_type_traits_rank [system] { header "__type_traits/rank.h" } module std_private_type_traits_remove_all_extents [system] { header "__type_traits/remove_all_extents.h" } diff --git a/libcxx/include/ostream b/libcxx/include/ostream index 42819ceb252c6..d4fc1c58b8a94 100644 --- a/libcxx/include/ostream +++ b/libcxx/include/ostream @@ -164,6 +164,7 @@ template void print(ostream& os, format_string fmt, Args&&... args); template // since C++23 void println(ostream& os, format_string fmt, Args&&... args); +void println(ostream& os); // since C++26 void vprint_unicode(ostream& os, string_view fmt, format_args args); // since C++23 void vprint_nonunicode(ostream& os, string_view fmt, format_args args); // since C++23 @@ -1163,6 +1164,9 @@ _LIBCPP_HIDE_FROM_ABI void println(ostream& __os, format_string<_Args...> __fmt, # endif // _LIBCPP_HAS_NO_UNICODE } +template // TODO PRINT template or availability markup fires too eagerly (http://llvm.org/PR61563). +_LIBCPP_HIDE_FROM_ABI inline void println(ostream& __os) { std::print(__os, "\n"); } + #endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/print b/libcxx/include/print index a9f10433a7dc6..e0bcf214ea239 100644 --- a/libcxx/include/print +++ b/libcxx/include/print @@ -15,8 +15,10 @@ namespace std { // [print.fun], print functions template void print(format_string fmt, Args&&... args); + void println(); // Since C++26 template void print(FILE* stream, format_string fmt, Args&&... args); + void println(FILE* stream); // Since C++26 template void println(format_string fmt, Args&&... args); @@ -356,6 +358,12 @@ _LIBCPP_HIDE_FROM_ABI void println(FILE* __stream, format_string<_Args...> __fmt # endif // _LIBCPP_HAS_NO_UNICODE } +template // TODO PRINT template or availability markup fires too eagerly (http://llvm.org/PR61563). +_LIBCPP_HIDE_FROM_ABI inline void println(FILE* __stream) { std::print(__stream, "\n"); } + +template // TODO PRINT template or availability markup fires too eagerly (http://llvm.org/PR61563). +_LIBCPP_HIDE_FROM_ABI inline void println() { println(stdout); } + template _LIBCPP_HIDE_FROM_ABI void println(format_string<_Args...> __fmt, _Args&&... __args) { std::println(stdout, __fmt, std::forward<_Args>(__args)...); diff --git a/libcxx/include/version b/libcxx/include/version index 90dc1b279c6c2..0ed77345baa71 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -41,8 +41,7 @@ __cpp_lib_atomic_shared_ptr 201711L __cpp_lib_atomic_value_initialization 201911L __cpp_lib_atomic_wait 201907L __cpp_lib_barrier 201907L -__cpp_lib_bind_back 202306L - 202202L // C++23 +__cpp_lib_bind_back 202202L __cpp_lib_bind_front 202306L 201907L // C++20 __cpp_lib_bit_cast 201806L @@ -449,7 +448,7 @@ __cpp_lib_within_lifetime 202306L # define __cpp_lib_adaptor_iterator_pair_constructor 202106L # define __cpp_lib_allocate_at_least 202302L // # define __cpp_lib_associative_heterogeneous_erasure 202110L -// # define __cpp_lib_bind_back 202202L +# define __cpp_lib_bind_back 202202L # define __cpp_lib_byteswap 202110L # define __cpp_lib_constexpr_bitset 202207L # define __cpp_lib_constexpr_charconv 202207L @@ -498,8 +497,6 @@ __cpp_lib_within_lifetime 202306L #if _LIBCPP_STD_VER >= 26 // # define __cpp_lib_associative_heterogeneous_insertion 202306L // # define __cpp_lib_atomic_min_max 202403L -# undef __cpp_lib_bind_back -// # define __cpp_lib_bind_back 202306L # undef __cpp_lib_bind_front # define __cpp_lib_bind_front 202306L # define __cpp_lib_bitset 202306L diff --git a/libcxx/modules/std/chrono.inc b/libcxx/modules/std/chrono.inc index 2c0bd3f98a67d..575e6347aecce 100644 --- a/libcxx/modules/std/chrono.inc +++ b/libcxx/modules/std/chrono.inc @@ -199,10 +199,10 @@ export namespace std { using std::chrono::tzdb_list; // [time.zone.db.access], time zone database access - // using std::chrono::current_zone; + using std::chrono::current_zone; using std::chrono::get_tzdb; using std::chrono::get_tzdb_list; - // using std::chrono::locate_zone; + using std::chrono::locate_zone; // [time.zone.db.remote], remote time zone database support using std::chrono::reload_tzdb; @@ -212,10 +212,12 @@ export namespace std { // [time.zone.exception], exception classes using std::chrono::ambiguous_local_time; using std::chrono::nonexistent_local_time; +# endif // if 0 // [time.zone.info], information classes using std::chrono::sys_info; +# if 0 // [time.zone.timezone], class time_zone using std::chrono::choose; # endif // if 0 diff --git a/libcxx/modules/std/functional.inc b/libcxx/modules/std/functional.inc index 1148944a9d2fe..ddc7d023ee6dc 100644 --- a/libcxx/modules/std/functional.inc +++ b/libcxx/modules/std/functional.inc @@ -56,8 +56,10 @@ export namespace std { using std::not_fn; // [func.bind.partial], function templates bind_front and bind_back - // using std::bind_back; using std::bind_front; +#if _LIBCPP_STD_VER >= 23 + using std::bind_back; +#endif // [func.bind], bind using std::is_bind_expression; diff --git a/libcxx/modules/std/locale.inc b/libcxx/modules/std/locale.inc index c34f56530e98c..8975453864226 100644 --- a/libcxx/modules/std/locale.inc +++ b/libcxx/modules/std/locale.inc @@ -67,10 +67,15 @@ export namespace std { using std::messages_base; using std::messages_byname; +# if _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT) + // [depr.conversions.buffer] using std::wbuffer_convert; // [depr.conversions.string] using std::wstring_convert; + +# endif // _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT) + #endif // _LIBCPP_HAS_NO_LOCALIZATION } // namespace std diff --git a/libcxx/src/include/tzdb/time_zone_private.h b/libcxx/src/include/tzdb/time_zone_private.h index 039a3b0ffeb7c..2c47e9fdb13df 100644 --- a/libcxx/src/include/tzdb/time_zone_private.h +++ b/libcxx/src/include/tzdb/time_zone_private.h @@ -24,7 +24,8 @@ namespace chrono { class time_zone::__impl { public: - explicit _LIBCPP_HIDE_FROM_ABI __impl(string&& __name) : __name_(std::move(__name)) {} + explicit _LIBCPP_HIDE_FROM_ABI __impl(string&& __name, const __tz::__rules_storage_type& __rules_db) + : __name_(std::move(__name)), __rules_db_(__rules_db) {} [[nodiscard]] _LIBCPP_HIDE_FROM_ABI string_view __name() const noexcept { return __name_; } @@ -33,12 +34,20 @@ class time_zone::__impl { return __continuations_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const __tz::__rules_storage_type& __rules_db() const { return __rules_db_; } + private: string __name_; // Note the first line has a name + __continuation, the other lines // are just __continuations. So there is always at least one item in // the vector. vector<__tz::__continuation> __continuations_; + + // Continuations often depend on a set of rules. The rules are stored in + // parallel data structurs in tzdb_list. From the time_zone it's not possible + // to find its associated tzdb entry and thus not possible to find its + // associated rules. Therefore a link to the rules in stored in this class. + const __tz::__rules_storage_type& __rules_db_; }; } // namespace chrono diff --git a/libcxx/src/include/tzdb/types_private.h b/libcxx/src/include/tzdb/types_private.h index 4604b9fc88114..c86982948b61f 100644 --- a/libcxx/src/include/tzdb/types_private.h +++ b/libcxx/src/include/tzdb/types_private.h @@ -33,7 +33,17 @@ namespace chrono::__tz { // Sun>=8 first Sunday on or after the eighth // Sun<=25 last Sunday on or before the 25th struct __constrained_weekday { - /* year_month_day operator()(year __year, month __month);*/ // needed but not implemented + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI year_month_day operator()(year __year, month __month) const { + auto __result = static_cast(year_month_day{__year, __month, __day}); + weekday __wd{static_cast(__result)}; + + if (__comparison == __le) + __result -= __wd - __weekday; + else + __result += __weekday - __wd; + + return __result; + } weekday __weekday; enum __comparison_t { __le, __ge } __comparison; @@ -85,7 +95,8 @@ struct __continuation { // used. // If this field contains - then standard time always // applies. This is indicated by the monostate. - using __rules_t = variant; + // TODO TZDB Investigate implantation the size_t based caching. + using __rules_t = variant; __rules_t __rules; diff --git a/libcxx/src/random.cpp b/libcxx/src/random.cpp index 93590af310e51..14c6f4473d70b 100644 --- a/libcxx/src/random.cpp +++ b/libcxx/src/random.cpp @@ -79,10 +79,8 @@ unsigned random_device::operator()() { char* p = reinterpret_cast(&r); while (n > 0) { ssize_t s = read(__f_, p, n); - _LIBCPP_SUPPRESS_DEPRECATED_PUSH if (s == 0) - __throw_system_error(ENODATA, "random_device got EOF"); // TODO ENODATA -> ENOMSG - _LIBCPP_SUPPRESS_DEPRECATED_POP + __throw_system_error(ENOMSG, "random_device got EOF"); if (s == -1) { if (errno != EINTR) __throw_system_error(errno, "random_device got an unexpected error"); diff --git a/libcxx/src/time_zone.cpp b/libcxx/src/time_zone.cpp index b6bf06a116f68..aef6ac674a11e 100644 --- a/libcxx/src/time_zone.cpp +++ b/libcxx/src/time_zone.cpp @@ -8,14 +8,712 @@ // For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html +// TODO TZDB look at optimizations +// +// The current algorithm is correct but not efficient. For example, in a named +// rule based continuation finding the next rule does quite a bit of work, +// returns the next rule and "forgets" its state. This could be better. +// +// It would be possible to cache lookups. If a time for a zone is calculated its +// sys_info could be kept and the next lookup could test whether the time is in +// a "known" sys_info. The wording in the Standard hints at this slowness by +// "suggesting" this could be implemented on the user's side. + +// TODO TZDB look at removing quirks +// +// The code has some special rules to adjust the timing at the continuation +// switches. This works correctly, but some of the places feel odd. It would be +// good to investigate this further and see whether all quirks are needed or +// that there are better fixes. +// +// These quirks often use a 12h interval; this is the scan interval of zdump, +// which implies there are no sys_info objects with a duration of less than 12h. + +#include +#include #include +#include +#include +#include #include "include/tzdb/time_zone_private.h" +#include "include/tzdb/tzdb_list_private.h" + +// TODO TZDB remove debug printing +#ifdef PRINT +# include +#endif _LIBCPP_BEGIN_NAMESPACE_STD +#ifdef PRINT +template <> +struct formatter { + template + constexpr typename ParseContext::iterator parse(ParseContext& ctx) { + return ctx.begin(); + } + + template + typename FormatContext::iterator format(const chrono::sys_info& info, FormatContext& ctx) const { + return std::format_to( + ctx.out(), "[{}, {}) {:%Q%q} {:%Q%q} {}", info.begin, info.end, info.offset, info.save, info.abbrev); + } +}; +#endif + namespace chrono { +//===----------------------------------------------------------------------===// +// Details +//===----------------------------------------------------------------------===// + +struct __sys_info { + sys_info __info; + bool __can_merge; // Can the returned sys_info object be merged with +}; + +// Return type for helper function to get a sys_info. +// - The expected result returns the "best" sys_info object. This object can be +// before the requested time. Sometimes sys_info objects from different +// continuations share their offset, save, and abbrev and these objects are +// merged to one sys_info object. The __can_merge flag determines whether the +// current result can be merged with the next result. +// - The unexpected result means no sys_info object was found and the time is +// the time to be used for the next search iteration. +using __sys_info_result = expected<__sys_info, sys_seconds>; + +template , _Proj>> _Comp = ranges::less> +[[nodiscard]] static ranges::borrowed_iterator_t<_Range> +__binary_find(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) { + auto __end = ranges::end(__r); + auto __ret = ranges::lower_bound(ranges::begin(__r), __end, __value, __comp, __proj); + if (__ret == __end) + return __end; + + // When the value does not match the predicate it's equal and a valid result + // was found. + return !std::invoke(__comp, __value, std::invoke(__proj, *__ret)) ? __ret : __end; +} + +// Format based on https://data.iana.org/time-zones/tz-how-to.html +// +// 1 a time zone abbreviation that is a string of three or more characters that +// are either ASCII alphanumerics, "+", or "-" +// 2 the string "%z", in which case the "%z" will be replaced by a numeric time +// zone abbreviation +// 3 a pair of time zone abbreviations separated by a slash ('/'), in which +// case the first string is the abbreviation for the standard time name and +// the second string is the abbreviation for the daylight saving time name +// 4 a string containing "%s", in which case the "%s" will be replaced by the +// text in the appropriate Rule's LETTER column, and the resulting string +// should be a time zone abbreviation +// +// Rule 1 is not strictly validated since America/Barbados uses a two letter +// abbreviation AT. +[[nodiscard]] static string +__format(const __tz::__continuation& __continuation, const string& __letters, seconds __save) { + bool __shift = false; + string __result; + for (char __c : __continuation.__format) { + if (__shift) { + switch (__c) { + case 's': + std::ranges::copy(__letters, std::back_inserter(__result)); + break; + + case 'z': { + if (__continuation.__format.size() != 2) + std::__throw_runtime_error( + std::format("corrupt tzdb FORMAT field: %z should be the entire contents, instead contains '{}'", + __continuation.__format) + .c_str()); + chrono::hh_mm_ss __offset{__continuation.__stdoff + __save}; + if (__offset.is_negative()) { + __result += '-'; + __offset = chrono::hh_mm_ss{-(__continuation.__stdoff + __save)}; + } else + __result += '+'; + + if (__offset.minutes() != 0min) + std::format_to(std::back_inserter(__result), "{:%H%M}", __offset); + else + std::format_to(std::back_inserter(__result), "{:%H}", __offset); + } break; + + default: + std::__throw_runtime_error( + std::format("corrupt tzdb FORMAT field: invalid sequence '%{}' found, expected %s or %z", __c).c_str()); + } + __shift = false; + + } else if (__c == '/') { + if (__save != 0s) + __result.clear(); + else + break; + + } else if (__c == '%') { + __shift = true; + } else if (__c == '+' || __c == '-' || std::isalnum(__c)) { + __result.push_back(__c); + } else { + std::__throw_runtime_error( + std::format( + "corrupt tzdb FORMAT field: invalid character '{}' found, expected +, -, or an alphanumeric value", __c) + .c_str()); + } + } + + if (__shift) + std::__throw_runtime_error("corrupt tzdb FORMAT field: input ended with the start of the escape sequence '%'"); + + if (__result.empty()) + std::__throw_runtime_error("corrupt tzdb FORMAT field: result is empty"); + + return __result; +} + +[[nodiscard]] static sys_seconds __to_sys_seconds(year_month_day __ymd, seconds __seconds) { + seconds __result = static_cast(__ymd).time_since_epoch() + __seconds; + return sys_seconds{__result}; +} + +[[nodiscard]] static seconds __at_to_sys_seconds(const __tz::__continuation& __continuation) { + switch (__continuation.__at.__clock) { + case __tz::__clock::__local: + return __continuation.__at.__time - __continuation.__stdoff - + std::visit( + [](const auto& __value) { + using _Tp = decay_t; + if constexpr (same_as<_Tp, monostate>) + return chrono::seconds{0}; + else if constexpr (same_as<_Tp, __tz::__save>) + return chrono::duration_cast(__value.__time); + else if constexpr (same_as<_Tp, std::string>) + // For a named rule based continuation the SAVE depends on the RULE + // active at the end. This should be determined separately. + return chrono::seconds{0}; + else + static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support + + std::__libcpp_unreachable(); + }, + __continuation.__rules); + + case __tz::__clock::__universal: + return __continuation.__at.__time; + + case __tz::__clock::__standard: + return __continuation.__at.__time - __continuation.__stdoff; + } + std::__libcpp_unreachable(); +} + +[[nodiscard]] static year_month_day __to_year_month_day(year __year, month __month, __tz::__on __on) { + return std::visit( + [&](const auto& __value) { + using _Tp = decay_t; + if constexpr (same_as<_Tp, chrono::day>) + return year_month_day{__year, __month, __value}; + else if constexpr (same_as<_Tp, weekday_last>) + return year_month_day{static_cast(year_month_weekday_last{__year, __month, __value})}; + else if constexpr (same_as<_Tp, __tz::__constrained_weekday>) + return __value(__year, __month); + else + static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support + + std::__libcpp_unreachable(); + }, + __on); +} + +[[nodiscard]] static sys_seconds __until_to_sys_seconds(const __tz::__continuation& __continuation) { + // Does UNTIL contain the magic value for the last continuation? + if (__continuation.__year == chrono::year::min()) + return sys_seconds::max(); + + year_month_day __ymd = chrono::__to_year_month_day(__continuation.__year, __continuation.__in, __continuation.__on); + return chrono::__to_sys_seconds(__ymd, chrono::__at_to_sys_seconds(__continuation)); +} + +// Holds the UNTIL time for a continuation with a named rule. +// +// Unlike continuations with an fixed SAVE named rules have a variable SAVE. +// This means when the UNTIL uses the local wall time the actual UNTIL value can +// only be determined when the SAVE is known. This class holds that abstraction. +class __named_rule_until { +public: + explicit __named_rule_until(const __tz::__continuation& __continuation) + : __until_{chrono::__until_to_sys_seconds(__continuation)}, + __needs_adjustment_{ + // The last continuation of a ZONE has no UNTIL which basically is + // until the end of _local_ time. This value is expressed by + // sys_seconds::max(). Subtracting the SAVE leaves large value. + // However SAVE can be negative, which would add a value to maximum + // leading to undefined behaviour. In practice this often results in + // an overflow to a very small value. + __until_ != sys_seconds::max() && __continuation.__at.__clock == __tz::__clock::__local} {} + + // Gives the unadjusted until value, this is useful when the SAVE is not known + // at all. + sys_seconds __until() const noexcept { return __until_; } + + bool __needs_adjustment() const noexcept { return __needs_adjustment_; } + + // Returns the UNTIL adjusted for SAVE. + sys_seconds operator()(seconds __save) const noexcept { return __until_ - __needs_adjustment_ * __save; } + +private: + sys_seconds __until_; + bool __needs_adjustment_; +}; + +[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, const __tz::__rule& __rule) { + switch (__rule.__at.__clock) { + case __tz::__clock::__local: + // Local time and standard time behave the same. This is not + // correct. Local time needs to adjust for the current saved time. + // To know the saved time the rules need to be known and sorted. + // This needs a time so to avoid the chicken and egg adjust the + // saving of the local time later. + return __rule.__at.__time - __stdoff; + + case __tz::__clock::__universal: + return __rule.__at.__time; + + case __tz::__clock::__standard: + return __rule.__at.__time - __stdoff; + } + std::__libcpp_unreachable(); +} + +[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule, year __year) { + year_month_day __ymd = chrono::__to_year_month_day(__year, __rule.__in, __rule.__on); + + seconds __at = chrono::__at_to_seconds(__stdoff, __rule); + return chrono::__to_sys_seconds(__ymd, __at); +} + +[[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule) { + return chrono::__from_to_sys_seconds(__stdoff, __rule, __rule.__from); +} + +[[nodiscard]] static const vector<__tz::__rule>& +__get_rules(const __tz::__rules_storage_type& __rules_db, const string& __rule_name) { + auto __result = chrono::__binary_find(__rules_db, __rule_name, {}, [](const auto& __p) { return __p.first; }); + if (__result == std::end(__rules_db)) + std::__throw_runtime_error(("corrupt tzdb: rule '" + __rule_name + " 'does not exist").c_str()); + + return __result->second; +} + +// Returns the letters field for a time before the first rule. +// +// Per https://data.iana.org/time-zones/tz-how-to.html +// One wrinkle, not fully explained in zic.8.txt, is what happens when switching +// to a named rule. To what values should the SAVE and LETTER data be +// initialized? +// +// 1 If at least one transition has happened, use the SAVE and LETTER data from +// the most recent. +// 2 If switching to a named rule before any transition has happened, assume +// standard time (SAVE zero), and use the LETTER data from the earliest +// transition with a SAVE of zero. +// +// This function implements case 2. +[[nodiscard]] static string __letters_before_first_rule(const vector<__tz::__rule>& __rules) { + auto __letters = + __rules // + | views::filter([](const __tz::__rule& __rule) { return __rule.__save.__time == 0s; }) // + | views::transform([](const __tz::__rule& __rule) { return __rule.__letters; }) // + | views::take(1); + + if (__letters.empty()) + std::__throw_runtime_error("corrupt tzdb: rule has zero entries"); + + return __letters.front(); +} + +// Determines the information based on the continuation and the rules. +// +// There are several special cases to take into account +// +// === Entries before the first rule becomes active === +// Asia/Hong_Kong +// 9 - JST 1945 N 18 2 // (1) +// 8 HK HK%sT // (2) +// R HK 1946 o - Ap 21 0 1 S // (3) +// There (1) is active until Novemer 18th 1945 at 02:00, after this time +// (2) becomes active. The first rule entry for HK (3) becomes active +// from April 21st 1945 at 01:00. In the period between (2) is active. +// This entry has an offset. +// This entry has no save, letters, or dst flag. So in the period +// after (1) and until (3) no rule entry is associated with the time. + +[[nodiscard]] static sys_info __get_sys_info_before_first_rule( + sys_seconds __begin, + sys_seconds __end, + const __tz::__continuation& __continuation, + const vector<__tz::__rule>& __rules) { + return sys_info{ + __begin, + __end, + __continuation.__stdoff, + chrono::minutes(0), + chrono::__format(__continuation, __letters_before_first_rule(__rules), 0s)}; +} + +// Returns the sys_info object for a time before the first rule. +// When this first rule has a SAVE of 0s the sys_info for the time before the +// first rule and for the first rule are identical and will be merged. +[[nodiscard]] static sys_info __get_sys_info_before_first_rule( + sys_seconds __begin, + sys_seconds __rule_end, // The end used when SAVE != 0s + sys_seconds __next_end, // The end used when SAVE == 0s the times are merged + const __tz::__continuation& __continuation, + const vector<__tz::__rule>& __rules, + vector<__tz::__rule>::const_iterator __rule) { + if (__rule->__save.__time != 0s) + return __get_sys_info_before_first_rule(__begin, __rule_end, __continuation, __rules); + + return sys_info{ + __begin, __next_end, __continuation.__stdoff, 0min, chrono::__format(__continuation, __rule->__letters, 0s)}; +} + +[[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule) { + switch (__rule.__at.__clock) { + case __tz::__clock::__local: + return __rule.__at.__time - __stdoff - __save; + + case __tz::__clock::__universal: + return __rule.__at.__time; + + case __tz::__clock::__standard: + return __rule.__at.__time - __stdoff; + } + std::__libcpp_unreachable(); +} + +[[nodiscard]] static sys_seconds +__rule_to_sys_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule, year __year) { + year_month_day __ymd = chrono::__to_year_month_day(__year, __rule.__in, __rule.__on); + + seconds __at = chrono::__at_to_seconds(__stdoff, __save, __rule); + return chrono::__to_sys_seconds(__ymd, __at); +} + +// Returns the first rule after __time. +// Note that a rule can be "active" in multiple years, this may result in an +// infinite loop where the same rule is returned every time, use __current to +// guard against that. +// +// When no next rule exists the returned time will be sys_seconds::max(). This +// can happen in practice. For example, +// +// R So 1945 o - May 24 2 2 M +// R So 1945 o - S 24 3 1 S +// R So 1945 o - N 18 2s 0 - +// +// Has 3 rules that are all only active in 1945. +[[nodiscard]] static pair::const_iterator> +__next_rule(sys_seconds __time, + seconds __stdoff, + seconds __save, + const vector<__tz::__rule>& __rules, + vector<__tz::__rule>::const_iterator __current) { + year __year = year_month_day{chrono::floor(__time)}.year(); + + // Note it would probably be better to store the pairs in a vector and then + // use min() to get the smallest element + map::const_iterator> __candidates; + // Note this evaluates all rules which is a waste of effort; when the entries + // are beyond the current year's "next year" (where "next year" is not always + // year + 1) the algorithm should end. + for (auto __it = __rules.begin(); __it != __rules.end(); ++__it) { + for (year __y = __it->__from; __y <= __it->__to; ++__y) { + // Adding the current entry for the current year may lead to infinite + // loops due to the SAVE adjustment. Skip these entries. + if (__y == __year && __it == __current) + continue; + + sys_seconds __t = chrono::__rule_to_sys_seconds(__stdoff, __save, *__it, __y); + if (__t <= __time) + continue; + + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__candidates.contains(__t), "duplicated rule"); + __candidates[__t] = __it; + break; + } + } + + if (!__candidates.empty()) [[likely]] { + auto __it = __candidates.begin(); + + // When no rule is selected the time before the first rule and the first rule + // should not be merged. + if (__time == sys_seconds::min()) + return *__it; + + // There can be two constitutive rules that are the same. For example, + // Hong Kong + // + // R HK 1973 o - D 30 3:30 1 S (R1) + // R HK 1965 1976 - Ap Su>=16 3:30 1 S (R2) + // + // 1973-12-29 19:30:00 R1 becomes active. + // 1974-04-20 18:30:00 R2 becomes active. + // Both rules have a SAVE of 1 hour and LETTERS are S for both of them. + while (__it != __candidates.end()) { + if (__current->__save.__time != __it->second->__save.__time || __current->__letters != __it->second->__letters) + return *__it; + + ++__it; + } + } + + return {sys_seconds::max(), __rules.end()}; +} + +// Returns the first rule of a set of rules. +// This is not always the first of the listed rules. For example +// R Sa 2008 2009 - Mar Su>=8 0 0 - +// R Sa 2007 2008 - O Su>=8 0 1 - +// The transition in October 2007 happens before the transition in March 2008. +[[nodiscard]] static vector<__tz::__rule>::const_iterator +__first_rule(seconds __stdoff, const vector<__tz::__rule>& __rules) { + return chrono::__next_rule(sys_seconds::min(), __stdoff, 0s, __rules, __rules.end()).second; +} + +[[nodiscard]] static __sys_info_result __get_sys_info_rule( + sys_seconds __time, + sys_seconds __continuation_begin, + const __tz::__continuation& __continuation, + const vector<__tz::__rule>& __rules) { + auto __rule = chrono::__first_rule(__continuation.__stdoff, __rules); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__rule != __rules.end(), "the set of rules has no first rule"); + + // Avoid selecting a time before the start of the continuation + __time = std::max(__time, __continuation_begin); + + sys_seconds __rule_begin = chrono::__from_to_sys_seconds(__continuation.__stdoff, *__rule); + + // The time sought is very likely inside the current rule. + // When the continuation's UNTIL uses the local clock there are edge cases + // where this is not true. + // + // Start to walk the rules to find the proper one. + // + // For now we just walk all the rules TODO TZDB investigate whether a smarter + // algorithm would work. + auto __next = chrono::__next_rule(__rule_begin, __continuation.__stdoff, __rule->__save.__time, __rules, __rule); + + // Ignore small steps, this happens with America/Punta_Arenas for the + // transition + // -4:42:46 - SMT 1927 S + // -5 x -05/-04 1932 S + // ... + // + // R x 1927 1931 - S 1 0 1 - + // R x 1928 1932 - Ap 1 0 0 - + // + // America/Punta_Arenas Thu Sep 1 04:42:45 1927 UT = Thu Sep 1 00:42:45 1927 -04 isdst=1 gmtoff=-14400 + // America/Punta_Arenas Sun Apr 1 03:59:59 1928 UT = Sat Mar 31 23:59:59 1928 -04 isdst=1 gmtoff=-14400 + // America/Punta_Arenas Sun Apr 1 04:00:00 1928 UT = Sat Mar 31 23:00:00 1928 -05 isdst=0 gmtoff=-18000 + // + // Without this there will be a transition + // [1927-09-01 04:42:45, 1927-09-01 05:00:00) -05:00:00 0min -05 + + if (sys_seconds __begin = __rule->__save.__time != 0s ? __rule_begin : __next.first; __time < __begin) { + if (__continuation_begin == sys_seconds::min() || __begin - __continuation_begin > 12h) + return __sys_info{__get_sys_info_before_first_rule( + __continuation_begin, __rule_begin, __next.first, __continuation, __rules, __rule), + false}; + + // Europe/Berlin + // 1 c CE%sT 1945 May 24 2 (C1) + // 1 So CE%sT 1946 (C2) + // + // R c 1944 1945 - Ap M>=1 2s 1 S (R1) + // + // R So 1945 o - May 24 2 2 M (R2) + // + // When C2 becomes active the time would be before the first rule R2, + // giving a 1 hour sys_info. + seconds __save = __rule->__save.__time; + __named_rule_until __continuation_end{__continuation}; + sys_seconds __sys_info_end = std::min(__continuation_end(__save), __next.first); + + return __sys_info{ + sys_info{__continuation_begin, + __sys_info_end, + __continuation.__stdoff + __save, + chrono::duration_cast(__save), + chrono::__format(__continuation, __rule->__letters, __save)}, + __sys_info_end == __continuation_end(__save)}; + } + + // See above for America/Asuncion + if (__rule->__save.__time == 0s && __time < __next.first) { + return __sys_info{ + sys_info{__continuation_begin, + __next.first, + __continuation.__stdoff, + 0min, + chrono::__format(__continuation, __rule->__letters, 0s)}, + false}; + } + + __named_rule_until __continuation_end{__continuation}; + if (__time >= __continuation_end.__until() && !__continuation_end.__needs_adjustment()) + // note std::unexpected(__end); is ambiguous with std::unexpected() in , + return __sys_info_result{std::unexpect, __continuation_end.__until()}; + + while (__next.second != __rules.end()) { +#ifdef PRINT + std::print( + stderr, + "Rule for {}: [{}, {}) off={} save={} duration={}\n", + __time, + __rule_begin, + __next.first, + __continuation.__stdoff, + __rule->__save.__time, + __next.first - __rule_begin); +#endif + + sys_seconds __end = __continuation_end(__rule->__save.__time); + + sys_seconds __sys_info_begin = std::max(__continuation_begin, __rule_begin); + sys_seconds __sys_info_end = std::min(__end, __next.first); + seconds __diff = chrono::abs(__sys_info_end - __sys_info_begin); + + if (__diff < 12h) { + // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31 + // -4:16:48 - CMT 1920 May + // -4 - -04 1930 D + // -4 A -04/-03 1969 O 5 + // -3 A -03/-02 1999 O 3 + // -4 A -04/-03 2000 Mar 3 + // ... + // + // ... + // R A 1989 1992 - O Su>=15 0 1 - + // R A 1999 o - O Su>=1 0 1 - + // R A 2000 o - Mar 3 0 0 - + // R A 2007 o - D 30 0 1 - + // ... + + // The 1999 switch uses the same rule, but with a different stdoff. + // R A 1999 o - O Su>=1 0 1 - + // stdoff -3 -> 1999-10-03 03:00:00 + // stdoff -4 -> 1999-10-03 04:00:00 + // This generates an invalid entry and this is evaluated as a transition. + // Looking at the zdump like output in libc++ this generates jumps in + // the UTC time. + + __rule = __next.second; + __next = __next_rule(__next.first, __continuation.__stdoff, __rule->__save.__time, __rules, __rule); + __end = __continuation_end(__rule->__save.__time); + __sys_info_end = std::min(__end, __next.first); + } + + if ((__time >= __rule_begin && __time < __next.first) || __next.first >= __end) { + __sys_info_begin = std::max(__continuation_begin, __rule_begin); + __sys_info_end = std::min(__end, __next.first); + + return __sys_info{ + sys_info{__sys_info_begin, + __sys_info_end, + __continuation.__stdoff + __rule->__save.__time, + chrono::duration_cast(__rule->__save.__time), + chrono::__format(__continuation, __rule->__letters, __rule->__save.__time)}, + __sys_info_end == __end}; + } + + __rule_begin = __next.first; + __rule = __next.second; + __next = __next_rule(__rule_begin, __continuation.__stdoff, __rule->__save.__time, __rules, __rule); + } + + return __sys_info{ + sys_info{std::max(__continuation_begin, __rule_begin), + __continuation_end(__rule->__save.__time), + __continuation.__stdoff + __rule->__save.__time, + chrono::duration_cast(__rule->__save.__time), + chrono::__format(__continuation, __rule->__letters, __rule->__save.__time)}, + true}; +} + +[[nodiscard]] static __sys_info_result __get_sys_info_basic( + sys_seconds __time, sys_seconds __continuation_begin, const __tz::__continuation& __continuation, seconds __save) { + sys_seconds __continuation_end = chrono::__until_to_sys_seconds(__continuation); + return __sys_info{ + sys_info{__continuation_begin, + __continuation_end, + __continuation.__stdoff + __save, + chrono::duration_cast(__save), + __continuation.__format}, + true}; +} + +[[nodiscard]] static __sys_info_result +__get_sys_info(sys_seconds __time, + sys_seconds __continuation_begin, + const __tz::__continuation& __continuation, + const __tz::__rules_storage_type& __rules_db) { + return std::visit( + [&](const auto& __value) { + using _Tp = decay_t; + if constexpr (same_as<_Tp, std::string>) + return chrono::__get_sys_info_rule( + __time, __continuation_begin, __continuation, __get_rules(__rules_db, __value)); + else if constexpr (same_as<_Tp, monostate>) + return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, chrono::seconds(0)); + else if constexpr (same_as<_Tp, __tz::__save>) + return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, __value.__time); + else + static_assert(sizeof(_Tp) == 0); // TODO TZDB static_assert(false); after droping clang-16 support + + std::__libcpp_unreachable(); + }, + __continuation.__rules); +} + +// The transition from one continuation to the next continuation may result in +// two constitutive continuations with the same "offset" information. +// [time.zone.info.sys]/3 +// The begin and end data members indicate that, for the associated time_zone +// and time_point, the offset and abbrev are in effect in the range +// [begin, end). This information can be used to efficiently iterate the +// transitions of a time_zone. +// +// Note that this does considers a change in the SAVE field not to be a +// different sys_info, zdump does consider this different. +// LWG XXXX The sys_info range should be affected by save +// matches the behaviour of the Standard and zdump. +// +// Iff the "offsets" are the same '__current.__end' is replaced with +// '__next.__end', which effectively merges the two objects in one object. The +// function returns true if a merge occurred. +[[nodiscard]] bool __merge_continuation(sys_info& __current, const sys_info& __next) { + if (__current.end != __next.begin) + return false; + + if (__current.offset != __next.offset || __current.abbrev != __next.abbrev || __current.save != __next.save) + return false; + + __current.end = __next.end; + return true; +} + +//===----------------------------------------------------------------------===// +// Public API +//===----------------------------------------------------------------------===// + [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI time_zone time_zone::__create(unique_ptr&& __p) { _LIBCPP_ASSERT_NON_NULL(__p != nullptr, "initialized time_zone without a valid pimpl object"); time_zone result; @@ -27,6 +725,173 @@ _LIBCPP_EXPORTED_FROM_ABI time_zone::~time_zone() = default; [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view time_zone::__name() const noexcept { return __impl_->__name(); } +[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info +time_zone::__get_info(sys_seconds __time) const { + optional __result; + bool __valid_result = false; // true iff __result.has_value() is true and + // __result.begin <= __time < __result.end is true. + bool __can_merge = false; + sys_seconds __continuation_begin = sys_seconds::min(); + // Iterates over the Zone entry and its continuations. Internally the Zone + // entry is split in a Zone information and the first continuation. The last + // continuation has no UNTIL field. This means the loop should always find a + // continuation. + // + // For more information on background of zone information please consult the + // following information + // [zic manual](https://www.man7.org/linux/man-pages/man8/zic.8.html) + // [tz source info](https://data.iana.org/time-zones/tz-how-to.html) + // On POSIX systems the zdump tool can be useful: + // zdump -v Asia/Hong_Kong + // Gives all transitions in the Hong Kong time zone. + // + // During iteration the result for the current continuation is returned. If + // no continuation is applicable it will return the end time as "error". When + // two continuations are contiguous and contain the "same" information these + // ranges are merged as one range. + // The merging requires keeping any result that occurs before __time, + // likewise when a valid result is found the algorithm needs to test the next + // continuation to see whether it can be merged. For example, Africa/Ceuta + // Continuations + // 0 s WE%sT 1929 (C1) + // 0 - WET 1967 (C2) + // 0 Sp WE%sT 1984 Mar 16 (C3) + // + // Rules + // R s 1926 1929 - O Sa>=1 24s 0 - (R1) + // + // R Sp 1967 o - Jun 3 12 1 S (R2) + // + // The rule R1 is the last rule used in C1. The rule R2 is the first rule in + // C3. Since R2 is the first rule this means when a continuation uses this + // rule its value prior to R2 will be SAVE 0 LETTERS of the first entry with a + // SAVE of 0, in this case WET. + // This gives the following changes in the information. + // 1928-10-07 00:00:00 C1 R1 becomes active: offset 0 save 0 abbrev WET + // 1929-01-01 00:00:00 C2 becomes active: offset 0 save 0 abbrev WET + // 1967-01-01 00:00:00 C3 becomes active: offset 0 save 0 abbrev WET + // 1967-06-03 12:00:00 C3 R2 becomes active: offset 0 save 1 abbrev WEST + // + // The first 3 entries are contiguous and contain the same information, this + // means the period [1928-10-07 00:00:00, 1967-06-03 12:00:00) should be + // returned in one sys_info object. + + const auto& __continuations = __impl_->__continuations(); + const __tz::__rules_storage_type& __rules_db = __impl_->__rules_db(); + for (auto __it = __continuations.begin(); __it != __continuations.end(); ++__it) { + const auto& __continuation = *__it; + __sys_info_result __sys_info = chrono::__get_sys_info(__time, __continuation_begin, __continuation, __rules_db); + + if (__sys_info) { + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( + __sys_info->__info.begin < __sys_info->__info.end, "invalid sys_info range"); + + // Filters out dummy entries + // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31 + // ... + // -4 A -04/-03 2000 Mar 3 (C1) + // -3 A -03/-02 (C2) + // + // ... + // R A 2000 o - Mar 3 0 0 - + // R A 2007 o - D 30 0 1 - + // ... + // + // This results in an entry + // [2000-03-03 03:00:00, 2000-03-03 04:00:00) -10800s 60min -03 + // for [C1 & R1, C1, R2) which due to the end of the continuation is an + // one hour "sys_info". Instead the entry should be ignored and replaced + // by [C2 & R1, C2 & R2) which is the proper range + // "[2000-03-03 03:00:00, 2007-12-30 03:00:00) -02:00:00 60min -02 + + if (std::holds_alternative(__continuation.__rules) && __sys_info->__can_merge && + __sys_info->__info.begin + 12h > __sys_info->__info.end) { + __continuation_begin = __sys_info->__info.begin; + continue; + } + + if (!__result) { + // First entry found, always keep it. + __result = __sys_info->__info; + + __valid_result = __time >= __result->begin && __time < __result->end; + __can_merge = __sys_info->__can_merge; + } else if (__can_merge && chrono::__merge_continuation(*__result, __sys_info->__info)) { + // The results are merged, update the result state. This may + // "overwrite" a valid sys_info object with another valid sys_info + // object. + __valid_result = __time >= __result->begin && __time < __result->end; + __can_merge = __sys_info->__can_merge; + } else { + // Here things get interesting: + // For example, America/Argentina/San_Luis + // + // -3 A -03/-02 2008 Ja 21 (C1) + // -4 Sa -04/-03 2009 O 11 (C2) + // + // R A 2007 o - D 30 0 1 - (R1) + // + // R Sa 2007 2008 - O Su>=8 0 1 - (R2) + // + // Based on C1 & R1 the end time of C1 is 2008-01-21 03:00:00 + // Based on C2 & R2 the end time of C1 is 2008-01-21 02:00:00 + // In this case the earlier time is the real time of the transition. + // However the algorithm used gives 2008-01-21 03:00:00. + // + // So we need to calculate the previous UNTIL in the current context and + // see whether it's earlier. + + // The results could not be merged. + // - When we have a valid result that result is the final result. + // - Otherwise the result we had is before __time and the result we got + // is at a later time (possibly valid). This result is always better + // than the previous result. + if (__valid_result) { + return *__result; + } else { + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( + __it != __continuations.begin(), "the first rule should always seed the result"); + const auto& __last = *(__it - 1); + if (std::holds_alternative(__last.__rules)) { + // Europe/Berlin + // 1 c CE%sT 1945 May 24 2 (C1) + // 1 So CE%sT 1946 (C2) + // + // R c 1944 1945 - Ap M>=1 2s 1 S (R1) + // + // R So 1945 o - May 24 2 2 M (R2) + // + // When C2 becomes active the time would be before the first rule R2, + // giving a 1 hour sys_info. This is not valid and the results need + // merging. + + if (__result->end != __sys_info->__info.begin) { + // When the UTC gap between the rules is due to the change of + // offsets adjust the new time to remove the gap. + sys_seconds __end = __result->end - __result->offset; + sys_seconds __begin = __sys_info->__info.begin - __sys_info->__info.offset; + if (__end == __begin) { + __sys_info->__info.begin = __result->end; + } + } + } + + __result = __sys_info->__info; + __valid_result = __time >= __result->begin && __time < __result->end; + __can_merge = __sys_info->__can_merge; + } + } + __continuation_begin = __result->end; + } else { + __continuation_begin = __sys_info.error(); + } + } + if (__valid_result) + return *__result; + + std::__throw_runtime_error("tzdb: corrupt db"); +} + } // namespace chrono _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/src/tzdb.cpp b/libcxx/src/tzdb.cpp index 7ba5ceb7ada3d..8909ecd026add 100644 --- a/libcxx/src/tzdb.cpp +++ b/libcxx/src/tzdb.cpp @@ -561,7 +561,7 @@ static void __parse_rule(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istr static void __parse_zone(tzdb& __tzdb, __tz::__rules_storage_type& __rules, istream& __input) { chrono::__skip_mandatory_whitespace(__input); - auto __p = std::make_unique(chrono::__parse_string(__input)); + auto __p = std::make_unique(chrono::__parse_string(__input), __rules); vector<__tz::__continuation>& __continuations = __p->__continuations(); chrono::__skip_mandatory_whitespace(__input); @@ -675,6 +675,61 @@ void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) { std::ranges::sort(__tzdb.leap_seconds); } +#ifdef _WIN32 +[[nodiscard]] static const time_zone* __current_zone_windows(const tzdb& tzdb) { + // TODO TZDB Implement this on Windows. + std::__throw_runtime_error("unknown time zone"); +} +#else // ifdef _WIN32 +[[nodiscard]] static const time_zone* __current_zone_posix(const tzdb& tzdb) { + // On POSIX systems there are several ways to configure the time zone. + // In order of priority they are: + // - TZ environment variable + // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08 + // The documentation is unclear whether or not it's allowed to + // change time zone information. For example the TZ string + // MST7MDT + // this is an entry in tzdata.zi. The value + // MST + // is also an entry. Is it allowed to use the following? + // MST-3 + // Even when this is valid there is no time_zone record in the + // database. Since the library would need to return a valid pointer, + // this means the library needs to allocate and leak a pointer. + // + // - The time zone name is the target of the symlink /etc/localtime + // relative to /usr/share/zoneinfo/ + + // The algorithm is like this: + // - If the environment variable TZ is set and points to a valid + // record use this value. + // - Else use the name based on the `/etc/localtime` symlink. + + if (const char* __tz = getenv("TZ")) + if (const time_zone* __result = tzdb.__locate_zone(__tz)) + return __result; + + filesystem::path __path = "/etc/localtime"; + if (!std::filesystem::exists(__path)) + std::__throw_runtime_error("tzdb: the symlink '/etc/localtime' does not exist"); + + if (!std::filesystem::is_symlink(__path)) + std::__throw_runtime_error("tzdb: the path '/etc/localtime' is not a symlink"); + + filesystem::path __tz = filesystem::read_symlink(__path); + // The path may be a relative path, in that case convert it to an absolute + // path based on the proper initial directory. + if (__tz.is_relative()) + __tz = filesystem::canonical("/etc" / __tz); + + string __name = filesystem::relative(__tz, "/usr/share/zoneinfo/"); + if (const time_zone* __result = tzdb.__locate_zone(__name)) + return __result; + + std::__throw_runtime_error(("tzdb: the time zone '" + __name + "' is not found in the database").c_str()); +} +#endif // ifdef _WIN32 + //===----------------------------------------------------------------------===// // Public API //===----------------------------------------------------------------------===// @@ -684,6 +739,14 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI tzdb_l return __result; } +[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const time_zone* tzdb::__current_zone() const { +#ifdef _WIN32 + return chrono::__current_zone_windows(*this); +#else + return chrono::__current_zone_posix(*this); +#endif +} + _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb() { if (chrono::remote_version() == chrono::get_tzdb().version) return chrono::get_tzdb(); diff --git a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp index defd43cf267a9..2790916edaf69 100644 --- a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp +++ b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp @@ -9,7 +9,7 @@ // This test fails with Clang <18 because diagnose_if doesn't emit all of the // diagnostics when -fdelayed-template-parsing is enabled, like it is in MSVC // mode. -// XFAIL: msvc && (clang-16 || clang-17) +// XFAIL: msvc && clang-17 // REQUIRES: diagnose-if-support diff --git a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp index c868832ea74ad..cbdb2ab1758e3 100644 --- a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp +++ b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp @@ -38,10 +38,19 @@ void test() { std::chrono::get_tzdb_list(); std::chrono::get_tzdb(); + std::chrono::locate_zone("name"); + std::chrono::current_zone(); std::chrono::remote_version(); + { + const std::chrono::tzdb& t = list.front(); + t.locate_zone("name"); + t.current_zone(); + } + { tz.name(); + tz.get_info(std::chrono::sys_seconds{}); operator==(tz, tz); operator<=>(tz, tz); } diff --git a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp index 4d26b46a89c91..e88c176af4a8b 100644 --- a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp @@ -33,13 +33,23 @@ void test() { list.cbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} list.cend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + { + const std::chrono::tzdb& t = list.front(); + t.locate_zone("name"); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + t.current_zone(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + } + namespace crno = std::chrono; crno::get_tzdb_list(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} crno::get_tzdb(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + crno::locate_zone("n"); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + crno::current_zone(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} crno::remote_version(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} { + std::chrono::sys_seconds s{}; tz.name(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + tz.get_info(s); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} operator==(tz, tz); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} operator<=>(tz, tz); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} } diff --git a/libcxx/test/libcxx/fuzzing/format_no_args.pass.cpp b/libcxx/test/libcxx/fuzzing/format_no_args.pass.cpp new file mode 100644 index 0000000000000..2faf27eda98c5 --- /dev/null +++ b/libcxx/test/libcxx/fuzzing/format_no_args.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-exceptions + +// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME + +// XFAIL: availability-fp_to_chars-missing + +#include +#include +#include + +#include "fuzz.h" + +extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t* data, std::size_t size) { + try { + [[maybe_unused]] auto result = std::vformat(std::string_view{(const char*)(data), size}, std::make_format_args()); + } catch (std::format_error const&) { + // If the fuzzing input isn't a valid thing we can format and we detect it, it's okay. We are looking for crashes. + return 0; + } + return 0; +} diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp index c2b23bba421bf..652783fc65134 100644 --- a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp +++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.cons/wchar_pointer.pass.cpp @@ -18,7 +18,7 @@ // UNSUPPORTED: no-wide-characters // TODO: This should not be necessary -// ADDITIONAL_COMPILE_FLAGS:-D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS:-D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT #include #include diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.members/open_wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.members/open_wchar_pointer.pass.cpp index 9a5564fa9e11c..b592492f84830 100644 --- a/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.members/open_wchar_pointer.pass.cpp +++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/fstream.members/open_wchar_pointer.pass.cpp @@ -18,7 +18,7 @@ // UNSUPPORTED: no-wide-characters // TODO: This should not be necessary -// ADDITIONAL_COMPILE_FLAGS:-D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS:-D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT #include #include diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp index 185ee9e5f96a3..3730e73648d30 100644 --- a/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp +++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.cons/wchar_pointer.pass.cpp @@ -18,7 +18,7 @@ // UNSUPPORTED: no-wide-characters // TODO: This should not be necessary -// ADDITIONAL_COMPILE_FLAGS:-D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS:-D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT #include #include diff --git a/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.members/open_wchar_pointer.pass.cpp b/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.members/open_wchar_pointer.pass.cpp index 9403643ad6ab6..bfbbd5322161f 100644 --- a/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.members/open_wchar_pointer.pass.cpp +++ b/libcxx/test/libcxx/input.output/file.streams/fstreams/ofstream.members/open_wchar_pointer.pass.cpp @@ -18,7 +18,7 @@ // UNSUPPORTED: no-wide-characters // TODO: This should not be necessary -// ADDITIONAL_COMPILE_FLAGS:-D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS:-D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT #include #include diff --git a/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp b/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp index 006bece21105b..a536e6e9b04c6 100644 --- a/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp +++ b/libcxx/test/libcxx/localization/locales/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp @@ -10,7 +10,7 @@ // UNSUPPORTED: c++03 -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp index b411ce198e253..a0bfb7c4a246b 100644 --- a/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: msvc && (clang-16 || clang-17) +// XFAIL: msvc && clang-17 // class lazy_split_view { // _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View(); diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.split/no_unique_address.compile.pass.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.split/no_unique_address.compile.pass.cpp index 0d8bfbc0316da..694cf1fd0d0e4 100644 --- a/libcxx/test/libcxx/ranges/range.adaptors/range.split/no_unique_address.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/range.adaptors/range.split/no_unique_address.compile.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: msvc && (clang-16 || clang-17) +// XFAIL: msvc && clang-17 // class split_view { // _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View(); diff --git a/libcxx/test/libcxx/ranges/range.factories/range.istream.view/no_unique_address.compile.pass.cpp b/libcxx/test/libcxx/ranges/range.factories/range.istream.view/no_unique_address.compile.pass.cpp index 8359d267245fe..a77c4e4d1bcdb 100644 --- a/libcxx/test/libcxx/ranges/range.factories/range.istream.view/no_unique_address.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/range.factories/range.istream.view/no_unique_address.compile.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: msvc && (clang-16 || clang-17) +// XFAIL: msvc && clang-17 // Test the libc++ extension that the value stored in `std::ranges::istream_view` has been marked // as _LIBCPP_NO_UNIQUE_ADDRESS diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp new file mode 100644 index 0000000000000..971f7f04c49a8 --- /dev/null +++ b/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// struct tzdb + +// const time_zone* locate_zone(string_view tz_name) const; + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" +#include "filesystem_test_helper.h" +#include "test_tzdb.h" + +scoped_test_env env; +[[maybe_unused]] const std::filesystem::path dir = env.create_dir("zoneinfo"); +const std::filesystem::path file = env.create_file("zoneinfo/tzdata.zi"); + +std::string_view std::chrono::__libcpp_tzdb_directory() { + static std::string result = dir.string(); + return result; +} + +void write(std::string_view input) { + static int version = 0; + + std::ofstream f{file}; + f << "# version " << version++ << '\n'; + f.write(input.data(), input.size()); +} + +static const std::chrono::tzdb& parse(std::string_view input) { + write(input); + return std::chrono::reload_tzdb(); +} + +int main(int, const char**) { + const std::chrono::tzdb& tzdb = parse( + R"( +Z zone 0 r f +L zone link +L link link_to_link +)"); + + { + const std::chrono::time_zone* tz = tzdb.locate_zone("zone"); + assert(tz); + assert(tz->name() == "zone"); + } + { + const std::chrono::time_zone* tz = tzdb.locate_zone("link"); + assert(tz); + assert(tz->name() == "zone"); + } + + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + std::string_view what{"tzdb: requested time zone not found"}; + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD tzdb.locate_zone("link_to_link")); + + return 0; +} diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp new file mode 100644 index 0000000000000..194f58215b925 --- /dev/null +++ b/libcxx/test/libcxx/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp @@ -0,0 +1,199 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class time_zone; + +// template +// sys_info get_info(const sys_time<_Duration>& time) const; + +// tests the parts not validated in the public test +// - Validates a zone with an UNTIL in its last continuation is corrupt +// - The formatting of the FORMAT field's constrains +// - Formatting of "%z", this is valid but not present in the actual database + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" +#include "filesystem_test_helper.h" +#include "test_tzdb.h" + +/***** ***** HELPERS ***** *****/ + +scoped_test_env env; +[[maybe_unused]] const std::filesystem::path dir = env.create_dir("zoneinfo"); +const std::filesystem::path file = env.create_file("zoneinfo/tzdata.zi"); + +std::string_view std::chrono::__libcpp_tzdb_directory() { + static std::string result = dir.string(); + return result; +} + +static void write(std::string_view input) { + static int version = 0; + + std::ofstream f{file}; + f << "# version " << version++ << '\n'; + f.write(input.data(), input.size()); +} + +static const std::chrono::tzdb& parse(std::string_view input) { + write(input); + return std::chrono::reload_tzdb(); +} + +[[nodiscard]] static std::chrono::sys_seconds to_sys_seconds(int year) { + std::chrono::year_month_day result{std::chrono::year{year}, std::chrono::January, std::chrono::day{1}}; + + return std::chrono::time_point_cast(static_cast(result)); +} + +static void test_exception([[maybe_unused]] std::string_view input, [[maybe_unused]] std::string_view what) { +#ifndef TEST_HAS_NO_EXCEPTIONS + const std::chrono::tzdb& tzdb = parse(input); + const std::chrono::time_zone* tz = tzdb.locate_zone("Format"); + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD tz->get_info(to_sys_seconds(2000))); +#endif // TEST_HAS_NO_EXCEPTIONS +} + +static void zone_without_until_entry() { +#ifndef TEST_HAS_NO_EXCEPTIONS + const std::chrono::tzdb& tzdb = parse( + R"( +Z America/Paramaribo -3:40:40 - LMT 1911 +-3:40:52 - PMT 1935 +-3:40:36 - PMT 1945 O +-3:30 - -0330 1984 O +# -3 - -03 Commented out so the last entry has an UNTIL field. +)"); + const std::chrono::time_zone* tz = tzdb.locate_zone("America/Paramaribo"); + + TEST_IGNORE_NODISCARD tz->get_info(to_sys_seconds(1984)); + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + std::string what = "tzdb: corrupt db"; + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD tz->get_info(to_sys_seconds(1985))); +#endif // TEST_HAS_NO_EXCEPTIONS +} + +static void invalid_format() { + test_exception( + R"( +R F 2000 max - Jan 5 0 0 foo +Z Format 0 F %zandfoo)", + "corrupt tzdb FORMAT field: %z should be the entire contents, instead contains '%zandfoo'"); + + test_exception( + R"( +R F 2000 max - Jan 5 0 0 foo +Z Format 0 F %q)", + "corrupt tzdb FORMAT field: invalid sequence '%q' found, expected %s or %z"); + + test_exception( + R"( +R F 2000 max - Jan 5 0 0 foo +Z Format 0 F !)", + "corrupt tzdb FORMAT field: invalid character '!' found, expected +, -, or an alphanumeric value"); + + test_exception( + R"( +R F 2000 max - Jan 5 0 0 foo +Z Format 0 F @)", + "corrupt tzdb FORMAT field: invalid character '@' found, expected +, -, or an alphanumeric value"); + + test_exception( + R"( +R F 2000 max - Jan 5 0 0 foo +Z Format 0 F $)", + "corrupt tzdb FORMAT field: invalid character '$' found, expected +, -, or an alphanumeric value"); + + test_exception( + R"( +R F 1970 max - Jan 5 0 0 foo +Z Format 0 F %)", + "corrupt tzdb FORMAT field: input ended with the start of the escape sequence '%'"); + + test_exception( + R"( +R F 2000 max - Jan 5 0 0 - +Z Format 0 F %s)", + "corrupt tzdb FORMAT field: result is empty"); +} + +static void test_abbrev(std::string_view input, std::string_view expected) { + const std::chrono::tzdb& tzdb = parse(input); + const std::chrono::time_zone* tz = tzdb.locate_zone("Format"); + std::string result = tz->get_info(to_sys_seconds(2000)).abbrev; + TEST_LIBCPP_REQUIRE(result == expected, TEST_WRITE_CONCATENATED("\nExpected ", expected, "\nActual ", result, '\n')); +} + +// This format is valid, however is not used in the tzdata.zi. +static void percentage_z_format() { + test_abbrev( + R"( +R F 1999 max - Jan 5 0 0 foo +Z Format 0 F %z)", + "+00"); + + test_abbrev( + R"( +R F 1999 max - Jan 5 0 1 foo +Z Format 0 F %z)", + "+01"); + + test_abbrev( + R"( +R F 1999 max - Jan 5 0 -1 foo +Z Format 0 F %z)", + "-01"); + + test_abbrev( + R"( +R F 1999 max - Jan 5 0 0 foo +Z Format 0:45 F %z)", + "+0045"); + + test_abbrev( + R"( +R F 1999 max - Jan 5 0 -1 foo +Z Format 0:45 F %z)", + "-0015"); +} + +int main(int, const char**) { + zone_without_until_entry(); + invalid_format(); + percentage_z_format(); + + return 0; +} diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.rule_selection.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.rule_selection.pass.cpp new file mode 100644 index 0000000000000..accd5bcdc89e2 --- /dev/null +++ b/libcxx/test/libcxx/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.rule_selection.pass.cpp @@ -0,0 +1,185 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class time_zone; + +// template +// sys_info get_info(const sys_time<_Duration>& time) const; + +// The time zone database contains of the following entries +// - Zones, +// - Rules, +// - Links, and +// - Leapseconds. +// +// The public tzdb struct stores all entries except the Rules. How +// implementations keep track of the Rules is not specified. When the sys_info +// for a time_zone is requested it needs to use the correct Rules. This lookup +// cannot rely on 'get_tzdb()` since that returns the most recently loaded +// database. +// +// A reload could change the rules of a time zone or the time zone could no +// longer be present in the current database. These two conditions are tested. +// +// It is possible the tzdb entry has been removed by the user from the tzdb_list +// after a reload. This is UB and not tested. + +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" +#include "filesystem_test_helper.h" +#include "test_tzdb.h" + +/***** ***** HELPERS ***** *****/ + +scoped_test_env env; +[[maybe_unused]] const std::filesystem::path dir = env.create_dir("zoneinfo"); +const std::filesystem::path file = env.create_file("zoneinfo/tzdata.zi"); + +std::string_view std::chrono::__libcpp_tzdb_directory() { + static std::string result = dir.string(); + return result; +} + +static void write(std::string_view input) { + static int version = 0; + + std::ofstream f{file}; + f << "# version " << version++ << '\n'; + f.write(input.data(), input.size()); +} + +static const std::chrono::tzdb& parse(std::string_view input) { + write(input); + return std::chrono::reload_tzdb(); +} + +[[nodiscard]] static std::chrono::sys_seconds to_sys_seconds( + std::chrono::year year, + std::chrono::month month, + std::chrono::day day, + std::chrono::hours h = std::chrono::hours(0), + std::chrono::minutes m = std::chrono::minutes{0}, + std::chrono::seconds s = std::chrono::seconds{0}) { + std::chrono::year_month_day result{year, month, day}; + + return std::chrono::time_point_cast(static_cast(result)) + h + m + s; +} + +static void assert_equal(const std::chrono::sys_info& lhs, const std::chrono::sys_info& rhs) { + TEST_REQUIRE(lhs.begin == rhs.begin, + TEST_WRITE_CONCATENATED("\nBegin:\nExpected output ", lhs.begin, "\nActual output ", rhs.begin, '\n')); + TEST_REQUIRE(lhs.end == rhs.end, + TEST_WRITE_CONCATENATED("\nEnd:\nExpected output ", lhs.end, "\nActual output ", rhs.end, '\n')); + TEST_REQUIRE( + lhs.offset == rhs.offset, + TEST_WRITE_CONCATENATED("\nOffset:\nExpected output ", lhs.offset, "\nActual output ", rhs.offset, '\n')); + TEST_REQUIRE(lhs.save == rhs.save, + TEST_WRITE_CONCATENATED("\nSave:\nExpected output ", lhs.save, "\nActual output ", rhs.save, '\n')); + TEST_REQUIRE( + lhs.abbrev == rhs.abbrev, + TEST_WRITE_CONCATENATED("\nAbbrev:\nExpected output ", lhs.abbrev, "\nActual output ", rhs.abbrev, '\n')); +} + +/***** ***** TESTS ***** *****/ + +int main(int, const char**) { + using namespace std::literals::chrono_literals; + + // DST starts on the first of March. + const std::chrono::tzdb& tzdb_1 = parse( + R"( +Z Test 0 - LMT 1900 +0 Rule %s + +R Rule 1900 max - Mar 1 2u 1 Summer +R Rule 1900 max - Oct 1 2u 0 Winter +)"); + + const std::chrono::time_zone* tz_1 = tzdb_1.locate_zone("Test"); + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1901y, std::chrono::March, 1d, 2h), + to_sys_seconds(1901y, std::chrono::October, 1d, 2h), + 1h, + 60min, + "Summer"), + tz_1->get_info(to_sys_seconds(1901y, std::chrono::March, 1d, 2h))); + + // The DST start changes from the first of March to the first of April. + const std::chrono::tzdb& tzdb_2 = parse( + R"( +Z Test 0 - LMT 1900 +0 Rule %s + +R Rule 1900 max - Apr 1 2u 1 Summer +R Rule 1900 max - Oct 1 2u 0 Winter +)"); + + const std::chrono::time_zone* tz_2 = tzdb_2.locate_zone("Test"); + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1900y, std::chrono::October, 1d, 2h), + to_sys_seconds(1901y, std::chrono::April, 1d, 2h), + 0s, + 0min, + "Winter"), + tz_2->get_info(to_sys_seconds(1901y, std::chrono::March, 1d, 2h))); + + // Validate when using tz_1 the DST still starts on the first of March. + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1901y, std::chrono::March, 1d, 2h), + to_sys_seconds(1901y, std::chrono::October, 1d, 2h), + 1h, + 60min, + "Summer"), + tz_1->get_info(to_sys_seconds(1901y, std::chrono::March, 1d, 2h))); + + // The zone Test is no longer present + [[maybe_unused]] const std::chrono::tzdb& tzdb_3 = parse("Z Etc/UTC 0 - UTC"); +#ifndef TEST_HAS_NO_EXCEPTIONS + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + std::string what = "tzdb: requested time zone not found"; + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD tzdb_3.locate_zone("Test")); +#endif // TEST_HAS_NO_EXCEPTIONS + + // Search the zone Test in the original version 1 of the TZDB. + // This database should be unaffected by the removal in version 3. + tz_1 = tzdb_1.locate_zone("Test"); + + // Validate the rules still uses version 1's DST switch in March. + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1901y, std::chrono::March, 1d, 2h), + to_sys_seconds(1901y, std::chrono::October, 1d, 2h), + 1h, + 60min, + "Summer"), + tz_1->get_info(to_sys_seconds(1901y, std::chrono::March, 1d, 2h))); + + return 0; +} diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index 69429b5bce825..9ae422a31f074 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -80,7 +80,6 @@ chrono cstring chrono ctime chrono cwchar chrono forward_list -chrono initializer_list chrono limits chrono new chrono optional diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index 69429b5bce825..9ae422a31f074 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -80,7 +80,6 @@ chrono cstring chrono ctime chrono cwchar chrono forward_list -chrono initializer_list chrono limits chrono new chrono optional diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h index 204dcacb1152c..eb7500a828ccf 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h @@ -82,7 +82,7 @@ struct data { }; /// The data for UTF-8. -std::array, 602> data_utf8 = {{ +std::array, 1187> data_utf8 = {{ {"\U00000020\U00000020", {32, 32}, {1, 2}}, {"\U00000020\U00000308\U00000020", {32, 32}, {3, 4}}, {"\U00000020\U0000000d", {32, 13}, {1, 2}}, @@ -97,8 +97,8 @@ std::array, 602> data_utf8 = {{ {"\U00000020\U00000308\U0001f1e6", {32, 127462}, {3, 7}}, {"\U00000020\U00000600", {32, 1536}, {1, 3}}, {"\U00000020\U00000308\U00000600", {32, 1536}, {3, 5}}, - {"\U00000020\U00000903", {32}, {4}}, - {"\U00000020\U00000308\U00000903", {32}, {6}}, + {"\U00000020\U00000a03", {32}, {4}}, + {"\U00000020\U00000308\U00000a03", {32}, {6}}, {"\U00000020\U00001100", {32, 4352}, {1, 4}}, {"\U00000020\U00000308\U00001100", {32, 4352}, {3, 6}}, {"\U00000020\U00001160", {32, 4448}, {1, 4}}, @@ -109,10 +109,24 @@ std::array, 602> data_utf8 = {{ {"\U00000020\U00000308\U0000ac00", {32, 44032}, {3, 6}}, {"\U00000020\U0000ac01", {32, 44033}, {1, 4}}, {"\U00000020\U00000308\U0000ac01", {32, 44033}, {3, 6}}, + {"\U00000020\U00000900", {32}, {4}}, + {"\U00000020\U00000308\U00000900", {32}, {6}}, + {"\U00000020\U00000903", {32}, {4}}, + {"\U00000020\U00000308\U00000903", {32}, {6}}, + {"\U00000020\U00000904", {32, 2308}, {1, 4}}, + {"\U00000020\U00000308\U00000904", {32, 2308}, {3, 6}}, + {"\U00000020\U00000d4e", {32, 3406}, {1, 4}}, + {"\U00000020\U00000308\U00000d4e", {32, 3406}, {3, 6}}, + {"\U00000020\U00000915", {32, 2325}, {1, 4}}, + {"\U00000020\U00000308\U00000915", {32, 2325}, {3, 6}}, {"\U00000020\U0000231a", {32, 8986}, {1, 4}}, {"\U00000020\U00000308\U0000231a", {32, 8986}, {3, 6}}, {"\U00000020\U00000300", {32}, {3}}, {"\U00000020\U00000308\U00000300", {32}, {5}}, + {"\U00000020\U0000093c", {32}, {4}}, + {"\U00000020\U00000308\U0000093c", {32}, {6}}, + {"\U00000020\U0000094d", {32}, {4}}, + {"\U00000020\U00000308\U0000094d", {32}, {6}}, {"\U00000020\U0000200d", {32}, {4}}, {"\U00000020\U00000308\U0000200d", {32}, {6}}, {"\U00000020\U00000378", {32, 888}, {1, 3}}, @@ -131,8 +145,8 @@ std::array, 602> data_utf8 = {{ {"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 3, 7}}, {"\U0000000d\U00000600", {13, 1536}, {1, 3}}, {"\U0000000d\U00000308\U00000600", {13, 776, 1536}, {1, 3, 5}}, - {"\U0000000d\U00000903", {13, 2307}, {1, 4}}, - {"\U0000000d\U00000308\U00000903", {13, 776}, {1, 6}}, + {"\U0000000d\U00000a03", {13, 2563}, {1, 4}}, + {"\U0000000d\U00000308\U00000a03", {13, 776}, {1, 6}}, {"\U0000000d\U00001100", {13, 4352}, {1, 4}}, {"\U0000000d\U00000308\U00001100", {13, 776, 4352}, {1, 3, 6}}, {"\U0000000d\U00001160", {13, 4448}, {1, 4}}, @@ -143,10 +157,24 @@ std::array, 602> data_utf8 = {{ {"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 3, 6}}, {"\U0000000d\U0000ac01", {13, 44033}, {1, 4}}, {"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 3, 6}}, + {"\U0000000d\U00000900", {13, 2304}, {1, 4}}, + {"\U0000000d\U00000308\U00000900", {13, 776}, {1, 6}}, + {"\U0000000d\U00000903", {13, 2307}, {1, 4}}, + {"\U0000000d\U00000308\U00000903", {13, 776}, {1, 6}}, + {"\U0000000d\U00000904", {13, 2308}, {1, 4}}, + {"\U0000000d\U00000308\U00000904", {13, 776, 2308}, {1, 3, 6}}, + {"\U0000000d\U00000d4e", {13, 3406}, {1, 4}}, + {"\U0000000d\U00000308\U00000d4e", {13, 776, 3406}, {1, 3, 6}}, + {"\U0000000d\U00000915", {13, 2325}, {1, 4}}, + {"\U0000000d\U00000308\U00000915", {13, 776, 2325}, {1, 3, 6}}, {"\U0000000d\U0000231a", {13, 8986}, {1, 4}}, {"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 3, 6}}, {"\U0000000d\U00000300", {13, 768}, {1, 3}}, {"\U0000000d\U00000308\U00000300", {13, 776}, {1, 5}}, + {"\U0000000d\U0000093c", {13, 2364}, {1, 4}}, + {"\U0000000d\U00000308\U0000093c", {13, 776}, {1, 6}}, + {"\U0000000d\U0000094d", {13, 2381}, {1, 4}}, + {"\U0000000d\U00000308\U0000094d", {13, 776}, {1, 6}}, {"\U0000000d\U0000200d", {13, 8205}, {1, 4}}, {"\U0000000d\U00000308\U0000200d", {13, 776}, {1, 6}}, {"\U0000000d\U00000378", {13, 888}, {1, 3}}, @@ -165,8 +193,8 @@ std::array, 602> data_utf8 = {{ {"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 3, 7}}, {"\U0000000a\U00000600", {10, 1536}, {1, 3}}, {"\U0000000a\U00000308\U00000600", {10, 776, 1536}, {1, 3, 5}}, - {"\U0000000a\U00000903", {10, 2307}, {1, 4}}, - {"\U0000000a\U00000308\U00000903", {10, 776}, {1, 6}}, + {"\U0000000a\U00000a03", {10, 2563}, {1, 4}}, + {"\U0000000a\U00000308\U00000a03", {10, 776}, {1, 6}}, {"\U0000000a\U00001100", {10, 4352}, {1, 4}}, {"\U0000000a\U00000308\U00001100", {10, 776, 4352}, {1, 3, 6}}, {"\U0000000a\U00001160", {10, 4448}, {1, 4}}, @@ -177,10 +205,24 @@ std::array, 602> data_utf8 = {{ {"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 3, 6}}, {"\U0000000a\U0000ac01", {10, 44033}, {1, 4}}, {"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 3, 6}}, + {"\U0000000a\U00000900", {10, 2304}, {1, 4}}, + {"\U0000000a\U00000308\U00000900", {10, 776}, {1, 6}}, + {"\U0000000a\U00000903", {10, 2307}, {1, 4}}, + {"\U0000000a\U00000308\U00000903", {10, 776}, {1, 6}}, + {"\U0000000a\U00000904", {10, 2308}, {1, 4}}, + {"\U0000000a\U00000308\U00000904", {10, 776, 2308}, {1, 3, 6}}, + {"\U0000000a\U00000d4e", {10, 3406}, {1, 4}}, + {"\U0000000a\U00000308\U00000d4e", {10, 776, 3406}, {1, 3, 6}}, + {"\U0000000a\U00000915", {10, 2325}, {1, 4}}, + {"\U0000000a\U00000308\U00000915", {10, 776, 2325}, {1, 3, 6}}, {"\U0000000a\U0000231a", {10, 8986}, {1, 4}}, {"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 3, 6}}, {"\U0000000a\U00000300", {10, 768}, {1, 3}}, {"\U0000000a\U00000308\U00000300", {10, 776}, {1, 5}}, + {"\U0000000a\U0000093c", {10, 2364}, {1, 4}}, + {"\U0000000a\U00000308\U0000093c", {10, 776}, {1, 6}}, + {"\U0000000a\U0000094d", {10, 2381}, {1, 4}}, + {"\U0000000a\U00000308\U0000094d", {10, 776}, {1, 6}}, {"\U0000000a\U0000200d", {10, 8205}, {1, 4}}, {"\U0000000a\U00000308\U0000200d", {10, 776}, {1, 6}}, {"\U0000000a\U00000378", {10, 888}, {1, 3}}, @@ -199,8 +241,8 @@ std::array, 602> data_utf8 = {{ {"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 3, 7}}, {"\U00000001\U00000600", {1, 1536}, {1, 3}}, {"\U00000001\U00000308\U00000600", {1, 776, 1536}, {1, 3, 5}}, - {"\U00000001\U00000903", {1, 2307}, {1, 4}}, - {"\U00000001\U00000308\U00000903", {1, 776}, {1, 6}}, + {"\U00000001\U00000a03", {1, 2563}, {1, 4}}, + {"\U00000001\U00000308\U00000a03", {1, 776}, {1, 6}}, {"\U00000001\U00001100", {1, 4352}, {1, 4}}, {"\U00000001\U00000308\U00001100", {1, 776, 4352}, {1, 3, 6}}, {"\U00000001\U00001160", {1, 4448}, {1, 4}}, @@ -211,10 +253,24 @@ std::array, 602> data_utf8 = {{ {"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 3, 6}}, {"\U00000001\U0000ac01", {1, 44033}, {1, 4}}, {"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 3, 6}}, + {"\U00000001\U00000900", {1, 2304}, {1, 4}}, + {"\U00000001\U00000308\U00000900", {1, 776}, {1, 6}}, + {"\U00000001\U00000903", {1, 2307}, {1, 4}}, + {"\U00000001\U00000308\U00000903", {1, 776}, {1, 6}}, + {"\U00000001\U00000904", {1, 2308}, {1, 4}}, + {"\U00000001\U00000308\U00000904", {1, 776, 2308}, {1, 3, 6}}, + {"\U00000001\U00000d4e", {1, 3406}, {1, 4}}, + {"\U00000001\U00000308\U00000d4e", {1, 776, 3406}, {1, 3, 6}}, + {"\U00000001\U00000915", {1, 2325}, {1, 4}}, + {"\U00000001\U00000308\U00000915", {1, 776, 2325}, {1, 3, 6}}, {"\U00000001\U0000231a", {1, 8986}, {1, 4}}, {"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 3, 6}}, {"\U00000001\U00000300", {1, 768}, {1, 3}}, {"\U00000001\U00000308\U00000300", {1, 776}, {1, 5}}, + {"\U00000001\U0000093c", {1, 2364}, {1, 4}}, + {"\U00000001\U00000308\U0000093c", {1, 776}, {1, 6}}, + {"\U00000001\U0000094d", {1, 2381}, {1, 4}}, + {"\U00000001\U00000308\U0000094d", {1, 776}, {1, 6}}, {"\U00000001\U0000200d", {1, 8205}, {1, 4}}, {"\U00000001\U00000308\U0000200d", {1, 776}, {1, 6}}, {"\U00000001\U00000378", {1, 888}, {1, 3}}, @@ -233,8 +289,8 @@ std::array, 602> data_utf8 = {{ {"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {4, 8}}, {"\U0000034f\U00000600", {847, 1536}, {2, 4}}, {"\U0000034f\U00000308\U00000600", {847, 1536}, {4, 6}}, - {"\U0000034f\U00000903", {847}, {5}}, - {"\U0000034f\U00000308\U00000903", {847}, {7}}, + {"\U0000034f\U00000a03", {847}, {5}}, + {"\U0000034f\U00000308\U00000a03", {847}, {7}}, {"\U0000034f\U00001100", {847, 4352}, {2, 5}}, {"\U0000034f\U00000308\U00001100", {847, 4352}, {4, 7}}, {"\U0000034f\U00001160", {847, 4448}, {2, 5}}, @@ -245,10 +301,24 @@ std::array, 602> data_utf8 = {{ {"\U0000034f\U00000308\U0000ac00", {847, 44032}, {4, 7}}, {"\U0000034f\U0000ac01", {847, 44033}, {2, 5}}, {"\U0000034f\U00000308\U0000ac01", {847, 44033}, {4, 7}}, + {"\U0000034f\U00000900", {847}, {5}}, + {"\U0000034f\U00000308\U00000900", {847}, {7}}, + {"\U0000034f\U00000903", {847}, {5}}, + {"\U0000034f\U00000308\U00000903", {847}, {7}}, + {"\U0000034f\U00000904", {847, 2308}, {2, 5}}, + {"\U0000034f\U00000308\U00000904", {847, 2308}, {4, 7}}, + {"\U0000034f\U00000d4e", {847, 3406}, {2, 5}}, + {"\U0000034f\U00000308\U00000d4e", {847, 3406}, {4, 7}}, + {"\U0000034f\U00000915", {847, 2325}, {2, 5}}, + {"\U0000034f\U00000308\U00000915", {847, 2325}, {4, 7}}, {"\U0000034f\U0000231a", {847, 8986}, {2, 5}}, {"\U0000034f\U00000308\U0000231a", {847, 8986}, {4, 7}}, {"\U0000034f\U00000300", {847}, {4}}, {"\U0000034f\U00000308\U00000300", {847}, {6}}, + {"\U0000034f\U0000093c", {847}, {5}}, + {"\U0000034f\U00000308\U0000093c", {847}, {7}}, + {"\U0000034f\U0000094d", {847}, {5}}, + {"\U0000034f\U00000308\U0000094d", {847}, {7}}, {"\U0000034f\U0000200d", {847}, {5}}, {"\U0000034f\U00000308\U0000200d", {847}, {7}}, {"\U0000034f\U00000378", {847, 888}, {2, 4}}, @@ -267,8 +337,8 @@ std::array, 602> data_utf8 = {{ {"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {6, 10}}, {"\U0001f1e6\U00000600", {127462, 1536}, {4, 6}}, {"\U0001f1e6\U00000308\U00000600", {127462, 1536}, {6, 8}}, - {"\U0001f1e6\U00000903", {127462}, {7}}, - {"\U0001f1e6\U00000308\U00000903", {127462}, {9}}, + {"\U0001f1e6\U00000a03", {127462}, {7}}, + {"\U0001f1e6\U00000308\U00000a03", {127462}, {9}}, {"\U0001f1e6\U00001100", {127462, 4352}, {4, 7}}, {"\U0001f1e6\U00000308\U00001100", {127462, 4352}, {6, 9}}, {"\U0001f1e6\U00001160", {127462, 4448}, {4, 7}}, @@ -279,10 +349,24 @@ std::array, 602> data_utf8 = {{ {"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {6, 9}}, {"\U0001f1e6\U0000ac01", {127462, 44033}, {4, 7}}, {"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {6, 9}}, + {"\U0001f1e6\U00000900", {127462}, {7}}, + {"\U0001f1e6\U00000308\U00000900", {127462}, {9}}, + {"\U0001f1e6\U00000903", {127462}, {7}}, + {"\U0001f1e6\U00000308\U00000903", {127462}, {9}}, + {"\U0001f1e6\U00000904", {127462, 2308}, {4, 7}}, + {"\U0001f1e6\U00000308\U00000904", {127462, 2308}, {6, 9}}, + {"\U0001f1e6\U00000d4e", {127462, 3406}, {4, 7}}, + {"\U0001f1e6\U00000308\U00000d4e", {127462, 3406}, {6, 9}}, + {"\U0001f1e6\U00000915", {127462, 2325}, {4, 7}}, + {"\U0001f1e6\U00000308\U00000915", {127462, 2325}, {6, 9}}, {"\U0001f1e6\U0000231a", {127462, 8986}, {4, 7}}, {"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {6, 9}}, {"\U0001f1e6\U00000300", {127462}, {6}}, {"\U0001f1e6\U00000308\U00000300", {127462}, {8}}, + {"\U0001f1e6\U0000093c", {127462}, {7}}, + {"\U0001f1e6\U00000308\U0000093c", {127462}, {9}}, + {"\U0001f1e6\U0000094d", {127462}, {7}}, + {"\U0001f1e6\U00000308\U0000094d", {127462}, {9}}, {"\U0001f1e6\U0000200d", {127462}, {7}}, {"\U0001f1e6\U00000308\U0000200d", {127462}, {9}}, {"\U0001f1e6\U00000378", {127462, 888}, {4, 6}}, @@ -301,8 +385,8 @@ std::array, 602> data_utf8 = {{ {"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {4, 8}}, {"\U00000600\U00000600", {1536}, {4}}, {"\U00000600\U00000308\U00000600", {1536, 1536}, {4, 6}}, - {"\U00000600\U00000903", {1536}, {5}}, - {"\U00000600\U00000308\U00000903", {1536}, {7}}, + {"\U00000600\U00000a03", {1536}, {5}}, + {"\U00000600\U00000308\U00000a03", {1536}, {7}}, {"\U00000600\U00001100", {1536}, {5}}, {"\U00000600\U00000308\U00001100", {1536, 4352}, {4, 7}}, {"\U00000600\U00001160", {1536}, {5}}, @@ -313,48 +397,76 @@ std::array, 602> data_utf8 = {{ {"\U00000600\U00000308\U0000ac00", {1536, 44032}, {4, 7}}, {"\U00000600\U0000ac01", {1536}, {5}}, {"\U00000600\U00000308\U0000ac01", {1536, 44033}, {4, 7}}, + {"\U00000600\U00000900", {1536}, {5}}, + {"\U00000600\U00000308\U00000900", {1536}, {7}}, + {"\U00000600\U00000903", {1536}, {5}}, + {"\U00000600\U00000308\U00000903", {1536}, {7}}, + {"\U00000600\U00000904", {1536}, {5}}, + {"\U00000600\U00000308\U00000904", {1536, 2308}, {4, 7}}, + {"\U00000600\U00000d4e", {1536}, {5}}, + {"\U00000600\U00000308\U00000d4e", {1536, 3406}, {4, 7}}, + {"\U00000600\U00000915", {1536}, {5}}, + {"\U00000600\U00000308\U00000915", {1536, 2325}, {4, 7}}, {"\U00000600\U0000231a", {1536}, {5}}, {"\U00000600\U00000308\U0000231a", {1536, 8986}, {4, 7}}, {"\U00000600\U00000300", {1536}, {4}}, {"\U00000600\U00000308\U00000300", {1536}, {6}}, + {"\U00000600\U0000093c", {1536}, {5}}, + {"\U00000600\U00000308\U0000093c", {1536}, {7}}, + {"\U00000600\U0000094d", {1536}, {5}}, + {"\U00000600\U00000308\U0000094d", {1536}, {7}}, {"\U00000600\U0000200d", {1536}, {5}}, {"\U00000600\U00000308\U0000200d", {1536}, {7}}, {"\U00000600\U00000378", {1536}, {4}}, {"\U00000600\U00000308\U00000378", {1536, 888}, {4, 6}}, - {"\U00000903\U00000020", {2307, 32}, {3, 4}}, - {"\U00000903\U00000308\U00000020", {2307, 32}, {5, 6}}, - {"\U00000903\U0000000d", {2307, 13}, {3, 4}}, - {"\U00000903\U00000308\U0000000d", {2307, 13}, {5, 6}}, - {"\U00000903\U0000000a", {2307, 10}, {3, 4}}, - {"\U00000903\U00000308\U0000000a", {2307, 10}, {5, 6}}, - {"\U00000903\U00000001", {2307, 1}, {3, 4}}, - {"\U00000903\U00000308\U00000001", {2307, 1}, {5, 6}}, - {"\U00000903\U0000034f", {2307}, {5}}, - {"\U00000903\U00000308\U0000034f", {2307}, {7}}, - {"\U00000903\U0001f1e6", {2307, 127462}, {3, 7}}, - {"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {5, 9}}, - {"\U00000903\U00000600", {2307, 1536}, {3, 5}}, - {"\U00000903\U00000308\U00000600", {2307, 1536}, {5, 7}}, - {"\U00000903\U00000903", {2307}, {6}}, - {"\U00000903\U00000308\U00000903", {2307}, {8}}, - {"\U00000903\U00001100", {2307, 4352}, {3, 6}}, - {"\U00000903\U00000308\U00001100", {2307, 4352}, {5, 8}}, - {"\U00000903\U00001160", {2307, 4448}, {3, 6}}, - {"\U00000903\U00000308\U00001160", {2307, 4448}, {5, 8}}, - {"\U00000903\U000011a8", {2307, 4520}, {3, 6}}, - {"\U00000903\U00000308\U000011a8", {2307, 4520}, {5, 8}}, - {"\U00000903\U0000ac00", {2307, 44032}, {3, 6}}, - {"\U00000903\U00000308\U0000ac00", {2307, 44032}, {5, 8}}, - {"\U00000903\U0000ac01", {2307, 44033}, {3, 6}}, - {"\U00000903\U00000308\U0000ac01", {2307, 44033}, {5, 8}}, - {"\U00000903\U0000231a", {2307, 8986}, {3, 6}}, - {"\U00000903\U00000308\U0000231a", {2307, 8986}, {5, 8}}, - {"\U00000903\U00000300", {2307}, {5}}, - {"\U00000903\U00000308\U00000300", {2307}, {7}}, - {"\U00000903\U0000200d", {2307}, {6}}, - {"\U00000903\U00000308\U0000200d", {2307}, {8}}, - {"\U00000903\U00000378", {2307, 888}, {3, 5}}, - {"\U00000903\U00000308\U00000378", {2307, 888}, {5, 7}}, + {"\U00000a03\U00000020", {2563, 32}, {3, 4}}, + {"\U00000a03\U00000308\U00000020", {2563, 32}, {5, 6}}, + {"\U00000a03\U0000000d", {2563, 13}, {3, 4}}, + {"\U00000a03\U00000308\U0000000d", {2563, 13}, {5, 6}}, + {"\U00000a03\U0000000a", {2563, 10}, {3, 4}}, + {"\U00000a03\U00000308\U0000000a", {2563, 10}, {5, 6}}, + {"\U00000a03\U00000001", {2563, 1}, {3, 4}}, + {"\U00000a03\U00000308\U00000001", {2563, 1}, {5, 6}}, + {"\U00000a03\U0000034f", {2563}, {5}}, + {"\U00000a03\U00000308\U0000034f", {2563}, {7}}, + {"\U00000a03\U0001f1e6", {2563, 127462}, {3, 7}}, + {"\U00000a03\U00000308\U0001f1e6", {2563, 127462}, {5, 9}}, + {"\U00000a03\U00000600", {2563, 1536}, {3, 5}}, + {"\U00000a03\U00000308\U00000600", {2563, 1536}, {5, 7}}, + {"\U00000a03\U00000a03", {2563}, {6}}, + {"\U00000a03\U00000308\U00000a03", {2563}, {8}}, + {"\U00000a03\U00001100", {2563, 4352}, {3, 6}}, + {"\U00000a03\U00000308\U00001100", {2563, 4352}, {5, 8}}, + {"\U00000a03\U00001160", {2563, 4448}, {3, 6}}, + {"\U00000a03\U00000308\U00001160", {2563, 4448}, {5, 8}}, + {"\U00000a03\U000011a8", {2563, 4520}, {3, 6}}, + {"\U00000a03\U00000308\U000011a8", {2563, 4520}, {5, 8}}, + {"\U00000a03\U0000ac00", {2563, 44032}, {3, 6}}, + {"\U00000a03\U00000308\U0000ac00", {2563, 44032}, {5, 8}}, + {"\U00000a03\U0000ac01", {2563, 44033}, {3, 6}}, + {"\U00000a03\U00000308\U0000ac01", {2563, 44033}, {5, 8}}, + {"\U00000a03\U00000900", {2563}, {6}}, + {"\U00000a03\U00000308\U00000900", {2563}, {8}}, + {"\U00000a03\U00000903", {2563}, {6}}, + {"\U00000a03\U00000308\U00000903", {2563}, {8}}, + {"\U00000a03\U00000904", {2563, 2308}, {3, 6}}, + {"\U00000a03\U00000308\U00000904", {2563, 2308}, {5, 8}}, + {"\U00000a03\U00000d4e", {2563, 3406}, {3, 6}}, + {"\U00000a03\U00000308\U00000d4e", {2563, 3406}, {5, 8}}, + {"\U00000a03\U00000915", {2563, 2325}, {3, 6}}, + {"\U00000a03\U00000308\U00000915", {2563, 2325}, {5, 8}}, + {"\U00000a03\U0000231a", {2563, 8986}, {3, 6}}, + {"\U00000a03\U00000308\U0000231a", {2563, 8986}, {5, 8}}, + {"\U00000a03\U00000300", {2563}, {5}}, + {"\U00000a03\U00000308\U00000300", {2563}, {7}}, + {"\U00000a03\U0000093c", {2563}, {6}}, + {"\U00000a03\U00000308\U0000093c", {2563}, {8}}, + {"\U00000a03\U0000094d", {2563}, {6}}, + {"\U00000a03\U00000308\U0000094d", {2563}, {8}}, + {"\U00000a03\U0000200d", {2563}, {6}}, + {"\U00000a03\U00000308\U0000200d", {2563}, {8}}, + {"\U00000a03\U00000378", {2563, 888}, {3, 5}}, + {"\U00000a03\U00000308\U00000378", {2563, 888}, {5, 7}}, {"\U00001100\U00000020", {4352, 32}, {3, 4}}, {"\U00001100\U00000308\U00000020", {4352, 32}, {5, 6}}, {"\U00001100\U0000000d", {4352, 13}, {3, 4}}, @@ -369,8 +481,8 @@ std::array, 602> data_utf8 = {{ {"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {5, 9}}, {"\U00001100\U00000600", {4352, 1536}, {3, 5}}, {"\U00001100\U00000308\U00000600", {4352, 1536}, {5, 7}}, - {"\U00001100\U00000903", {4352}, {6}}, - {"\U00001100\U00000308\U00000903", {4352}, {8}}, + {"\U00001100\U00000a03", {4352}, {6}}, + {"\U00001100\U00000308\U00000a03", {4352}, {8}}, {"\U00001100\U00001100", {4352}, {6}}, {"\U00001100\U00000308\U00001100", {4352, 4352}, {5, 8}}, {"\U00001100\U00001160", {4352}, {6}}, @@ -381,10 +493,24 @@ std::array, 602> data_utf8 = {{ {"\U00001100\U00000308\U0000ac00", {4352, 44032}, {5, 8}}, {"\U00001100\U0000ac01", {4352}, {6}}, {"\U00001100\U00000308\U0000ac01", {4352, 44033}, {5, 8}}, + {"\U00001100\U00000900", {4352}, {6}}, + {"\U00001100\U00000308\U00000900", {4352}, {8}}, + {"\U00001100\U00000903", {4352}, {6}}, + {"\U00001100\U00000308\U00000903", {4352}, {8}}, + {"\U00001100\U00000904", {4352, 2308}, {3, 6}}, + {"\U00001100\U00000308\U00000904", {4352, 2308}, {5, 8}}, + {"\U00001100\U00000d4e", {4352, 3406}, {3, 6}}, + {"\U00001100\U00000308\U00000d4e", {4352, 3406}, {5, 8}}, + {"\U00001100\U00000915", {4352, 2325}, {3, 6}}, + {"\U00001100\U00000308\U00000915", {4352, 2325}, {5, 8}}, {"\U00001100\U0000231a", {4352, 8986}, {3, 6}}, {"\U00001100\U00000308\U0000231a", {4352, 8986}, {5, 8}}, {"\U00001100\U00000300", {4352}, {5}}, {"\U00001100\U00000308\U00000300", {4352}, {7}}, + {"\U00001100\U0000093c", {4352}, {6}}, + {"\U00001100\U00000308\U0000093c", {4352}, {8}}, + {"\U00001100\U0000094d", {4352}, {6}}, + {"\U00001100\U00000308\U0000094d", {4352}, {8}}, {"\U00001100\U0000200d", {4352}, {6}}, {"\U00001100\U00000308\U0000200d", {4352}, {8}}, {"\U00001100\U00000378", {4352, 888}, {3, 5}}, @@ -403,8 +529,8 @@ std::array, 602> data_utf8 = {{ {"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {5, 9}}, {"\U00001160\U00000600", {4448, 1536}, {3, 5}}, {"\U00001160\U00000308\U00000600", {4448, 1536}, {5, 7}}, - {"\U00001160\U00000903", {4448}, {6}}, - {"\U00001160\U00000308\U00000903", {4448}, {8}}, + {"\U00001160\U00000a03", {4448}, {6}}, + {"\U00001160\U00000308\U00000a03", {4448}, {8}}, {"\U00001160\U00001100", {4448, 4352}, {3, 6}}, {"\U00001160\U00000308\U00001100", {4448, 4352}, {5, 8}}, {"\U00001160\U00001160", {4448}, {6}}, @@ -415,10 +541,24 @@ std::array, 602> data_utf8 = {{ {"\U00001160\U00000308\U0000ac00", {4448, 44032}, {5, 8}}, {"\U00001160\U0000ac01", {4448, 44033}, {3, 6}}, {"\U00001160\U00000308\U0000ac01", {4448, 44033}, {5, 8}}, + {"\U00001160\U00000900", {4448}, {6}}, + {"\U00001160\U00000308\U00000900", {4448}, {8}}, + {"\U00001160\U00000903", {4448}, {6}}, + {"\U00001160\U00000308\U00000903", {4448}, {8}}, + {"\U00001160\U00000904", {4448, 2308}, {3, 6}}, + {"\U00001160\U00000308\U00000904", {4448, 2308}, {5, 8}}, + {"\U00001160\U00000d4e", {4448, 3406}, {3, 6}}, + {"\U00001160\U00000308\U00000d4e", {4448, 3406}, {5, 8}}, + {"\U00001160\U00000915", {4448, 2325}, {3, 6}}, + {"\U00001160\U00000308\U00000915", {4448, 2325}, {5, 8}}, {"\U00001160\U0000231a", {4448, 8986}, {3, 6}}, {"\U00001160\U00000308\U0000231a", {4448, 8986}, {5, 8}}, {"\U00001160\U00000300", {4448}, {5}}, {"\U00001160\U00000308\U00000300", {4448}, {7}}, + {"\U00001160\U0000093c", {4448}, {6}}, + {"\U00001160\U00000308\U0000093c", {4448}, {8}}, + {"\U00001160\U0000094d", {4448}, {6}}, + {"\U00001160\U00000308\U0000094d", {4448}, {8}}, {"\U00001160\U0000200d", {4448}, {6}}, {"\U00001160\U00000308\U0000200d", {4448}, {8}}, {"\U00001160\U00000378", {4448, 888}, {3, 5}}, @@ -437,8 +577,8 @@ std::array, 602> data_utf8 = {{ {"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {5, 9}}, {"\U000011a8\U00000600", {4520, 1536}, {3, 5}}, {"\U000011a8\U00000308\U00000600", {4520, 1536}, {5, 7}}, - {"\U000011a8\U00000903", {4520}, {6}}, - {"\U000011a8\U00000308\U00000903", {4520}, {8}}, + {"\U000011a8\U00000a03", {4520}, {6}}, + {"\U000011a8\U00000308\U00000a03", {4520}, {8}}, {"\U000011a8\U00001100", {4520, 4352}, {3, 6}}, {"\U000011a8\U00000308\U00001100", {4520, 4352}, {5, 8}}, {"\U000011a8\U00001160", {4520, 4448}, {3, 6}}, @@ -449,10 +589,24 @@ std::array, 602> data_utf8 = {{ {"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {5, 8}}, {"\U000011a8\U0000ac01", {4520, 44033}, {3, 6}}, {"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {5, 8}}, + {"\U000011a8\U00000900", {4520}, {6}}, + {"\U000011a8\U00000308\U00000900", {4520}, {8}}, + {"\U000011a8\U00000903", {4520}, {6}}, + {"\U000011a8\U00000308\U00000903", {4520}, {8}}, + {"\U000011a8\U00000904", {4520, 2308}, {3, 6}}, + {"\U000011a8\U00000308\U00000904", {4520, 2308}, {5, 8}}, + {"\U000011a8\U00000d4e", {4520, 3406}, {3, 6}}, + {"\U000011a8\U00000308\U00000d4e", {4520, 3406}, {5, 8}}, + {"\U000011a8\U00000915", {4520, 2325}, {3, 6}}, + {"\U000011a8\U00000308\U00000915", {4520, 2325}, {5, 8}}, {"\U000011a8\U0000231a", {4520, 8986}, {3, 6}}, {"\U000011a8\U00000308\U0000231a", {4520, 8986}, {5, 8}}, {"\U000011a8\U00000300", {4520}, {5}}, {"\U000011a8\U00000308\U00000300", {4520}, {7}}, + {"\U000011a8\U0000093c", {4520}, {6}}, + {"\U000011a8\U00000308\U0000093c", {4520}, {8}}, + {"\U000011a8\U0000094d", {4520}, {6}}, + {"\U000011a8\U00000308\U0000094d", {4520}, {8}}, {"\U000011a8\U0000200d", {4520}, {6}}, {"\U000011a8\U00000308\U0000200d", {4520}, {8}}, {"\U000011a8\U00000378", {4520, 888}, {3, 5}}, @@ -471,8 +625,8 @@ std::array, 602> data_utf8 = {{ {"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {5, 9}}, {"\U0000ac00\U00000600", {44032, 1536}, {3, 5}}, {"\U0000ac00\U00000308\U00000600", {44032, 1536}, {5, 7}}, - {"\U0000ac00\U00000903", {44032}, {6}}, - {"\U0000ac00\U00000308\U00000903", {44032}, {8}}, + {"\U0000ac00\U00000a03", {44032}, {6}}, + {"\U0000ac00\U00000308\U00000a03", {44032}, {8}}, {"\U0000ac00\U00001100", {44032, 4352}, {3, 6}}, {"\U0000ac00\U00000308\U00001100", {44032, 4352}, {5, 8}}, {"\U0000ac00\U00001160", {44032}, {6}}, @@ -483,10 +637,24 @@ std::array, 602> data_utf8 = {{ {"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {5, 8}}, {"\U0000ac00\U0000ac01", {44032, 44033}, {3, 6}}, {"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {5, 8}}, + {"\U0000ac00\U00000900", {44032}, {6}}, + {"\U0000ac00\U00000308\U00000900", {44032}, {8}}, + {"\U0000ac00\U00000903", {44032}, {6}}, + {"\U0000ac00\U00000308\U00000903", {44032}, {8}}, + {"\U0000ac00\U00000904", {44032, 2308}, {3, 6}}, + {"\U0000ac00\U00000308\U00000904", {44032, 2308}, {5, 8}}, + {"\U0000ac00\U00000d4e", {44032, 3406}, {3, 6}}, + {"\U0000ac00\U00000308\U00000d4e", {44032, 3406}, {5, 8}}, + {"\U0000ac00\U00000915", {44032, 2325}, {3, 6}}, + {"\U0000ac00\U00000308\U00000915", {44032, 2325}, {5, 8}}, {"\U0000ac00\U0000231a", {44032, 8986}, {3, 6}}, {"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {5, 8}}, {"\U0000ac00\U00000300", {44032}, {5}}, {"\U0000ac00\U00000308\U00000300", {44032}, {7}}, + {"\U0000ac00\U0000093c", {44032}, {6}}, + {"\U0000ac00\U00000308\U0000093c", {44032}, {8}}, + {"\U0000ac00\U0000094d", {44032}, {6}}, + {"\U0000ac00\U00000308\U0000094d", {44032}, {8}}, {"\U0000ac00\U0000200d", {44032}, {6}}, {"\U0000ac00\U00000308\U0000200d", {44032}, {8}}, {"\U0000ac00\U00000378", {44032, 888}, {3, 5}}, @@ -505,8 +673,8 @@ std::array, 602> data_utf8 = {{ {"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {5, 9}}, {"\U0000ac01\U00000600", {44033, 1536}, {3, 5}}, {"\U0000ac01\U00000308\U00000600", {44033, 1536}, {5, 7}}, - {"\U0000ac01\U00000903", {44033}, {6}}, - {"\U0000ac01\U00000308\U00000903", {44033}, {8}}, + {"\U0000ac01\U00000a03", {44033}, {6}}, + {"\U0000ac01\U00000308\U00000a03", {44033}, {8}}, {"\U0000ac01\U00001100", {44033, 4352}, {3, 6}}, {"\U0000ac01\U00000308\U00001100", {44033, 4352}, {5, 8}}, {"\U0000ac01\U00001160", {44033, 4448}, {3, 6}}, @@ -517,14 +685,268 @@ std::array, 602> data_utf8 = {{ {"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {5, 8}}, {"\U0000ac01\U0000ac01", {44033, 44033}, {3, 6}}, {"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {5, 8}}, + {"\U0000ac01\U00000900", {44033}, {6}}, + {"\U0000ac01\U00000308\U00000900", {44033}, {8}}, + {"\U0000ac01\U00000903", {44033}, {6}}, + {"\U0000ac01\U00000308\U00000903", {44033}, {8}}, + {"\U0000ac01\U00000904", {44033, 2308}, {3, 6}}, + {"\U0000ac01\U00000308\U00000904", {44033, 2308}, {5, 8}}, + {"\U0000ac01\U00000d4e", {44033, 3406}, {3, 6}}, + {"\U0000ac01\U00000308\U00000d4e", {44033, 3406}, {5, 8}}, + {"\U0000ac01\U00000915", {44033, 2325}, {3, 6}}, + {"\U0000ac01\U00000308\U00000915", {44033, 2325}, {5, 8}}, {"\U0000ac01\U0000231a", {44033, 8986}, {3, 6}}, {"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {5, 8}}, {"\U0000ac01\U00000300", {44033}, {5}}, {"\U0000ac01\U00000308\U00000300", {44033}, {7}}, + {"\U0000ac01\U0000093c", {44033}, {6}}, + {"\U0000ac01\U00000308\U0000093c", {44033}, {8}}, + {"\U0000ac01\U0000094d", {44033}, {6}}, + {"\U0000ac01\U00000308\U0000094d", {44033}, {8}}, {"\U0000ac01\U0000200d", {44033}, {6}}, {"\U0000ac01\U00000308\U0000200d", {44033}, {8}}, {"\U0000ac01\U00000378", {44033, 888}, {3, 5}}, {"\U0000ac01\U00000308\U00000378", {44033, 888}, {5, 7}}, + {"\U00000900\U00000020", {2304, 32}, {3, 4}}, + {"\U00000900\U00000308\U00000020", {2304, 32}, {5, 6}}, + {"\U00000900\U0000000d", {2304, 13}, {3, 4}}, + {"\U00000900\U00000308\U0000000d", {2304, 13}, {5, 6}}, + {"\U00000900\U0000000a", {2304, 10}, {3, 4}}, + {"\U00000900\U00000308\U0000000a", {2304, 10}, {5, 6}}, + {"\U00000900\U00000001", {2304, 1}, {3, 4}}, + {"\U00000900\U00000308\U00000001", {2304, 1}, {5, 6}}, + {"\U00000900\U0000034f", {2304}, {5}}, + {"\U00000900\U00000308\U0000034f", {2304}, {7}}, + {"\U00000900\U0001f1e6", {2304, 127462}, {3, 7}}, + {"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {5, 9}}, + {"\U00000900\U00000600", {2304, 1536}, {3, 5}}, + {"\U00000900\U00000308\U00000600", {2304, 1536}, {5, 7}}, + {"\U00000900\U00000a03", {2304}, {6}}, + {"\U00000900\U00000308\U00000a03", {2304}, {8}}, + {"\U00000900\U00001100", {2304, 4352}, {3, 6}}, + {"\U00000900\U00000308\U00001100", {2304, 4352}, {5, 8}}, + {"\U00000900\U00001160", {2304, 4448}, {3, 6}}, + {"\U00000900\U00000308\U00001160", {2304, 4448}, {5, 8}}, + {"\U00000900\U000011a8", {2304, 4520}, {3, 6}}, + {"\U00000900\U00000308\U000011a8", {2304, 4520}, {5, 8}}, + {"\U00000900\U0000ac00", {2304, 44032}, {3, 6}}, + {"\U00000900\U00000308\U0000ac00", {2304, 44032}, {5, 8}}, + {"\U00000900\U0000ac01", {2304, 44033}, {3, 6}}, + {"\U00000900\U00000308\U0000ac01", {2304, 44033}, {5, 8}}, + {"\U00000900\U00000900", {2304}, {6}}, + {"\U00000900\U00000308\U00000900", {2304}, {8}}, + {"\U00000900\U00000903", {2304}, {6}}, + {"\U00000900\U00000308\U00000903", {2304}, {8}}, + {"\U00000900\U00000904", {2304, 2308}, {3, 6}}, + {"\U00000900\U00000308\U00000904", {2304, 2308}, {5, 8}}, + {"\U00000900\U00000d4e", {2304, 3406}, {3, 6}}, + {"\U00000900\U00000308\U00000d4e", {2304, 3406}, {5, 8}}, + {"\U00000900\U00000915", {2304, 2325}, {3, 6}}, + {"\U00000900\U00000308\U00000915", {2304, 2325}, {5, 8}}, + {"\U00000900\U0000231a", {2304, 8986}, {3, 6}}, + {"\U00000900\U00000308\U0000231a", {2304, 8986}, {5, 8}}, + {"\U00000900\U00000300", {2304}, {5}}, + {"\U00000900\U00000308\U00000300", {2304}, {7}}, + {"\U00000900\U0000093c", {2304}, {6}}, + {"\U00000900\U00000308\U0000093c", {2304}, {8}}, + {"\U00000900\U0000094d", {2304}, {6}}, + {"\U00000900\U00000308\U0000094d", {2304}, {8}}, + {"\U00000900\U0000200d", {2304}, {6}}, + {"\U00000900\U00000308\U0000200d", {2304}, {8}}, + {"\U00000900\U00000378", {2304, 888}, {3, 5}}, + {"\U00000900\U00000308\U00000378", {2304, 888}, {5, 7}}, + {"\U00000903\U00000020", {2307, 32}, {3, 4}}, + {"\U00000903\U00000308\U00000020", {2307, 32}, {5, 6}}, + {"\U00000903\U0000000d", {2307, 13}, {3, 4}}, + {"\U00000903\U00000308\U0000000d", {2307, 13}, {5, 6}}, + {"\U00000903\U0000000a", {2307, 10}, {3, 4}}, + {"\U00000903\U00000308\U0000000a", {2307, 10}, {5, 6}}, + {"\U00000903\U00000001", {2307, 1}, {3, 4}}, + {"\U00000903\U00000308\U00000001", {2307, 1}, {5, 6}}, + {"\U00000903\U0000034f", {2307}, {5}}, + {"\U00000903\U00000308\U0000034f", {2307}, {7}}, + {"\U00000903\U0001f1e6", {2307, 127462}, {3, 7}}, + {"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {5, 9}}, + {"\U00000903\U00000600", {2307, 1536}, {3, 5}}, + {"\U00000903\U00000308\U00000600", {2307, 1536}, {5, 7}}, + {"\U00000903\U00000a03", {2307}, {6}}, + {"\U00000903\U00000308\U00000a03", {2307}, {8}}, + {"\U00000903\U00001100", {2307, 4352}, {3, 6}}, + {"\U00000903\U00000308\U00001100", {2307, 4352}, {5, 8}}, + {"\U00000903\U00001160", {2307, 4448}, {3, 6}}, + {"\U00000903\U00000308\U00001160", {2307, 4448}, {5, 8}}, + {"\U00000903\U000011a8", {2307, 4520}, {3, 6}}, + {"\U00000903\U00000308\U000011a8", {2307, 4520}, {5, 8}}, + {"\U00000903\U0000ac00", {2307, 44032}, {3, 6}}, + {"\U00000903\U00000308\U0000ac00", {2307, 44032}, {5, 8}}, + {"\U00000903\U0000ac01", {2307, 44033}, {3, 6}}, + {"\U00000903\U00000308\U0000ac01", {2307, 44033}, {5, 8}}, + {"\U00000903\U00000900", {2307}, {6}}, + {"\U00000903\U00000308\U00000900", {2307}, {8}}, + {"\U00000903\U00000903", {2307}, {6}}, + {"\U00000903\U00000308\U00000903", {2307}, {8}}, + {"\U00000903\U00000904", {2307, 2308}, {3, 6}}, + {"\U00000903\U00000308\U00000904", {2307, 2308}, {5, 8}}, + {"\U00000903\U00000d4e", {2307, 3406}, {3, 6}}, + {"\U00000903\U00000308\U00000d4e", {2307, 3406}, {5, 8}}, + {"\U00000903\U00000915", {2307, 2325}, {3, 6}}, + {"\U00000903\U00000308\U00000915", {2307, 2325}, {5, 8}}, + {"\U00000903\U0000231a", {2307, 8986}, {3, 6}}, + {"\U00000903\U00000308\U0000231a", {2307, 8986}, {5, 8}}, + {"\U00000903\U00000300", {2307}, {5}}, + {"\U00000903\U00000308\U00000300", {2307}, {7}}, + {"\U00000903\U0000093c", {2307}, {6}}, + {"\U00000903\U00000308\U0000093c", {2307}, {8}}, + {"\U00000903\U0000094d", {2307}, {6}}, + {"\U00000903\U00000308\U0000094d", {2307}, {8}}, + {"\U00000903\U0000200d", {2307}, {6}}, + {"\U00000903\U00000308\U0000200d", {2307}, {8}}, + {"\U00000903\U00000378", {2307, 888}, {3, 5}}, + {"\U00000903\U00000308\U00000378", {2307, 888}, {5, 7}}, + {"\U00000904\U00000020", {2308, 32}, {3, 4}}, + {"\U00000904\U00000308\U00000020", {2308, 32}, {5, 6}}, + {"\U00000904\U0000000d", {2308, 13}, {3, 4}}, + {"\U00000904\U00000308\U0000000d", {2308, 13}, {5, 6}}, + {"\U00000904\U0000000a", {2308, 10}, {3, 4}}, + {"\U00000904\U00000308\U0000000a", {2308, 10}, {5, 6}}, + {"\U00000904\U00000001", {2308, 1}, {3, 4}}, + {"\U00000904\U00000308\U00000001", {2308, 1}, {5, 6}}, + {"\U00000904\U0000034f", {2308}, {5}}, + {"\U00000904\U00000308\U0000034f", {2308}, {7}}, + {"\U00000904\U0001f1e6", {2308, 127462}, {3, 7}}, + {"\U00000904\U00000308\U0001f1e6", {2308, 127462}, {5, 9}}, + {"\U00000904\U00000600", {2308, 1536}, {3, 5}}, + {"\U00000904\U00000308\U00000600", {2308, 1536}, {5, 7}}, + {"\U00000904\U00000a03", {2308}, {6}}, + {"\U00000904\U00000308\U00000a03", {2308}, {8}}, + {"\U00000904\U00001100", {2308, 4352}, {3, 6}}, + {"\U00000904\U00000308\U00001100", {2308, 4352}, {5, 8}}, + {"\U00000904\U00001160", {2308, 4448}, {3, 6}}, + {"\U00000904\U00000308\U00001160", {2308, 4448}, {5, 8}}, + {"\U00000904\U000011a8", {2308, 4520}, {3, 6}}, + {"\U00000904\U00000308\U000011a8", {2308, 4520}, {5, 8}}, + {"\U00000904\U0000ac00", {2308, 44032}, {3, 6}}, + {"\U00000904\U00000308\U0000ac00", {2308, 44032}, {5, 8}}, + {"\U00000904\U0000ac01", {2308, 44033}, {3, 6}}, + {"\U00000904\U00000308\U0000ac01", {2308, 44033}, {5, 8}}, + {"\U00000904\U00000900", {2308}, {6}}, + {"\U00000904\U00000308\U00000900", {2308}, {8}}, + {"\U00000904\U00000903", {2308}, {6}}, + {"\U00000904\U00000308\U00000903", {2308}, {8}}, + {"\U00000904\U00000904", {2308, 2308}, {3, 6}}, + {"\U00000904\U00000308\U00000904", {2308, 2308}, {5, 8}}, + {"\U00000904\U00000d4e", {2308, 3406}, {3, 6}}, + {"\U00000904\U00000308\U00000d4e", {2308, 3406}, {5, 8}}, + {"\U00000904\U00000915", {2308, 2325}, {3, 6}}, + {"\U00000904\U00000308\U00000915", {2308, 2325}, {5, 8}}, + {"\U00000904\U0000231a", {2308, 8986}, {3, 6}}, + {"\U00000904\U00000308\U0000231a", {2308, 8986}, {5, 8}}, + {"\U00000904\U00000300", {2308}, {5}}, + {"\U00000904\U00000308\U00000300", {2308}, {7}}, + {"\U00000904\U0000093c", {2308}, {6}}, + {"\U00000904\U00000308\U0000093c", {2308}, {8}}, + {"\U00000904\U0000094d", {2308}, {6}}, + {"\U00000904\U00000308\U0000094d", {2308}, {8}}, + {"\U00000904\U0000200d", {2308}, {6}}, + {"\U00000904\U00000308\U0000200d", {2308}, {8}}, + {"\U00000904\U00000378", {2308, 888}, {3, 5}}, + {"\U00000904\U00000308\U00000378", {2308, 888}, {5, 7}}, + {"\U00000d4e\U00000020", {3406}, {4}}, + {"\U00000d4e\U00000308\U00000020", {3406, 32}, {5, 6}}, + {"\U00000d4e\U0000000d", {3406, 13}, {3, 4}}, + {"\U00000d4e\U00000308\U0000000d", {3406, 13}, {5, 6}}, + {"\U00000d4e\U0000000a", {3406, 10}, {3, 4}}, + {"\U00000d4e\U00000308\U0000000a", {3406, 10}, {5, 6}}, + {"\U00000d4e\U00000001", {3406, 1}, {3, 4}}, + {"\U00000d4e\U00000308\U00000001", {3406, 1}, {5, 6}}, + {"\U00000d4e\U0000034f", {3406}, {5}}, + {"\U00000d4e\U00000308\U0000034f", {3406}, {7}}, + {"\U00000d4e\U0001f1e6", {3406}, {7}}, + {"\U00000d4e\U00000308\U0001f1e6", {3406, 127462}, {5, 9}}, + {"\U00000d4e\U00000600", {3406}, {5}}, + {"\U00000d4e\U00000308\U00000600", {3406, 1536}, {5, 7}}, + {"\U00000d4e\U00000a03", {3406}, {6}}, + {"\U00000d4e\U00000308\U00000a03", {3406}, {8}}, + {"\U00000d4e\U00001100", {3406}, {6}}, + {"\U00000d4e\U00000308\U00001100", {3406, 4352}, {5, 8}}, + {"\U00000d4e\U00001160", {3406}, {6}}, + {"\U00000d4e\U00000308\U00001160", {3406, 4448}, {5, 8}}, + {"\U00000d4e\U000011a8", {3406}, {6}}, + {"\U00000d4e\U00000308\U000011a8", {3406, 4520}, {5, 8}}, + {"\U00000d4e\U0000ac00", {3406}, {6}}, + {"\U00000d4e\U00000308\U0000ac00", {3406, 44032}, {5, 8}}, + {"\U00000d4e\U0000ac01", {3406}, {6}}, + {"\U00000d4e\U00000308\U0000ac01", {3406, 44033}, {5, 8}}, + {"\U00000d4e\U00000900", {3406}, {6}}, + {"\U00000d4e\U00000308\U00000900", {3406}, {8}}, + {"\U00000d4e\U00000903", {3406}, {6}}, + {"\U00000d4e\U00000308\U00000903", {3406}, {8}}, + {"\U00000d4e\U00000904", {3406}, {6}}, + {"\U00000d4e\U00000308\U00000904", {3406, 2308}, {5, 8}}, + {"\U00000d4e\U00000d4e", {3406}, {6}}, + {"\U00000d4e\U00000308\U00000d4e", {3406, 3406}, {5, 8}}, + {"\U00000d4e\U00000915", {3406}, {6}}, + {"\U00000d4e\U00000308\U00000915", {3406, 2325}, {5, 8}}, + {"\U00000d4e\U0000231a", {3406}, {6}}, + {"\U00000d4e\U00000308\U0000231a", {3406, 8986}, {5, 8}}, + {"\U00000d4e\U00000300", {3406}, {5}}, + {"\U00000d4e\U00000308\U00000300", {3406}, {7}}, + {"\U00000d4e\U0000093c", {3406}, {6}}, + {"\U00000d4e\U00000308\U0000093c", {3406}, {8}}, + {"\U00000d4e\U0000094d", {3406}, {6}}, + {"\U00000d4e\U00000308\U0000094d", {3406}, {8}}, + {"\U00000d4e\U0000200d", {3406}, {6}}, + {"\U00000d4e\U00000308\U0000200d", {3406}, {8}}, + {"\U00000d4e\U00000378", {3406}, {5}}, + {"\U00000d4e\U00000308\U00000378", {3406, 888}, {5, 7}}, + {"\U00000915\U00000020", {2325, 32}, {3, 4}}, + {"\U00000915\U00000308\U00000020", {2325, 32}, {5, 6}}, + {"\U00000915\U0000000d", {2325, 13}, {3, 4}}, + {"\U00000915\U00000308\U0000000d", {2325, 13}, {5, 6}}, + {"\U00000915\U0000000a", {2325, 10}, {3, 4}}, + {"\U00000915\U00000308\U0000000a", {2325, 10}, {5, 6}}, + {"\U00000915\U00000001", {2325, 1}, {3, 4}}, + {"\U00000915\U00000308\U00000001", {2325, 1}, {5, 6}}, + {"\U00000915\U0000034f", {2325}, {5}}, + {"\U00000915\U00000308\U0000034f", {2325}, {7}}, + {"\U00000915\U0001f1e6", {2325, 127462}, {3, 7}}, + {"\U00000915\U00000308\U0001f1e6", {2325, 127462}, {5, 9}}, + {"\U00000915\U00000600", {2325, 1536}, {3, 5}}, + {"\U00000915\U00000308\U00000600", {2325, 1536}, {5, 7}}, + {"\U00000915\U00000a03", {2325}, {6}}, + {"\U00000915\U00000308\U00000a03", {2325}, {8}}, + {"\U00000915\U00001100", {2325, 4352}, {3, 6}}, + {"\U00000915\U00000308\U00001100", {2325, 4352}, {5, 8}}, + {"\U00000915\U00001160", {2325, 4448}, {3, 6}}, + {"\U00000915\U00000308\U00001160", {2325, 4448}, {5, 8}}, + {"\U00000915\U000011a8", {2325, 4520}, {3, 6}}, + {"\U00000915\U00000308\U000011a8", {2325, 4520}, {5, 8}}, + {"\U00000915\U0000ac00", {2325, 44032}, {3, 6}}, + {"\U00000915\U00000308\U0000ac00", {2325, 44032}, {5, 8}}, + {"\U00000915\U0000ac01", {2325, 44033}, {3, 6}}, + {"\U00000915\U00000308\U0000ac01", {2325, 44033}, {5, 8}}, + {"\U00000915\U00000900", {2325}, {6}}, + {"\U00000915\U00000308\U00000900", {2325}, {8}}, + {"\U00000915\U00000903", {2325}, {6}}, + {"\U00000915\U00000308\U00000903", {2325}, {8}}, + {"\U00000915\U00000904", {2325, 2308}, {3, 6}}, + {"\U00000915\U00000308\U00000904", {2325, 2308}, {5, 8}}, + {"\U00000915\U00000d4e", {2325, 3406}, {3, 6}}, + {"\U00000915\U00000308\U00000d4e", {2325, 3406}, {5, 8}}, + {"\U00000915\U00000915", {2325, 2325}, {3, 6}}, + {"\U00000915\U00000308\U00000915", {2325, 2325}, {5, 8}}, + {"\U00000915\U0000231a", {2325, 8986}, {3, 6}}, + {"\U00000915\U00000308\U0000231a", {2325, 8986}, {5, 8}}, + {"\U00000915\U00000300", {2325}, {5}}, + {"\U00000915\U00000308\U00000300", {2325}, {7}}, + {"\U00000915\U0000093c", {2325}, {6}}, + {"\U00000915\U00000308\U0000093c", {2325}, {8}}, + {"\U00000915\U0000094d", {2325}, {6}}, + {"\U00000915\U00000308\U0000094d", {2325}, {8}}, + {"\U00000915\U0000200d", {2325}, {6}}, + {"\U00000915\U00000308\U0000200d", {2325}, {8}}, + {"\U00000915\U00000378", {2325, 888}, {3, 5}}, + {"\U00000915\U00000308\U00000378", {2325, 888}, {5, 7}}, {"\U0000231a\U00000020", {8986, 32}, {3, 4}}, {"\U0000231a\U00000308\U00000020", {8986, 32}, {5, 6}}, {"\U0000231a\U0000000d", {8986, 13}, {3, 4}}, @@ -539,8 +961,8 @@ std::array, 602> data_utf8 = {{ {"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {5, 9}}, {"\U0000231a\U00000600", {8986, 1536}, {3, 5}}, {"\U0000231a\U00000308\U00000600", {8986, 1536}, {5, 7}}, - {"\U0000231a\U00000903", {8986}, {6}}, - {"\U0000231a\U00000308\U00000903", {8986}, {8}}, + {"\U0000231a\U00000a03", {8986}, {6}}, + {"\U0000231a\U00000308\U00000a03", {8986}, {8}}, {"\U0000231a\U00001100", {8986, 4352}, {3, 6}}, {"\U0000231a\U00000308\U00001100", {8986, 4352}, {5, 8}}, {"\U0000231a\U00001160", {8986, 4448}, {3, 6}}, @@ -551,10 +973,24 @@ std::array, 602> data_utf8 = {{ {"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {5, 8}}, {"\U0000231a\U0000ac01", {8986, 44033}, {3, 6}}, {"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {5, 8}}, + {"\U0000231a\U00000900", {8986}, {6}}, + {"\U0000231a\U00000308\U00000900", {8986}, {8}}, + {"\U0000231a\U00000903", {8986}, {6}}, + {"\U0000231a\U00000308\U00000903", {8986}, {8}}, + {"\U0000231a\U00000904", {8986, 2308}, {3, 6}}, + {"\U0000231a\U00000308\U00000904", {8986, 2308}, {5, 8}}, + {"\U0000231a\U00000d4e", {8986, 3406}, {3, 6}}, + {"\U0000231a\U00000308\U00000d4e", {8986, 3406}, {5, 8}}, + {"\U0000231a\U00000915", {8986, 2325}, {3, 6}}, + {"\U0000231a\U00000308\U00000915", {8986, 2325}, {5, 8}}, {"\U0000231a\U0000231a", {8986, 8986}, {3, 6}}, {"\U0000231a\U00000308\U0000231a", {8986, 8986}, {5, 8}}, {"\U0000231a\U00000300", {8986}, {5}}, {"\U0000231a\U00000308\U00000300", {8986}, {7}}, + {"\U0000231a\U0000093c", {8986}, {6}}, + {"\U0000231a\U00000308\U0000093c", {8986}, {8}}, + {"\U0000231a\U0000094d", {8986}, {6}}, + {"\U0000231a\U00000308\U0000094d", {8986}, {8}}, {"\U0000231a\U0000200d", {8986}, {6}}, {"\U0000231a\U00000308\U0000200d", {8986}, {8}}, {"\U0000231a\U00000378", {8986, 888}, {3, 5}}, @@ -573,8 +1009,8 @@ std::array, 602> data_utf8 = {{ {"\U00000300\U00000308\U0001f1e6", {768, 127462}, {4, 8}}, {"\U00000300\U00000600", {768, 1536}, {2, 4}}, {"\U00000300\U00000308\U00000600", {768, 1536}, {4, 6}}, - {"\U00000300\U00000903", {768}, {5}}, - {"\U00000300\U00000308\U00000903", {768}, {7}}, + {"\U00000300\U00000a03", {768}, {5}}, + {"\U00000300\U00000308\U00000a03", {768}, {7}}, {"\U00000300\U00001100", {768, 4352}, {2, 5}}, {"\U00000300\U00000308\U00001100", {768, 4352}, {4, 7}}, {"\U00000300\U00001160", {768, 4448}, {2, 5}}, @@ -585,14 +1021,124 @@ std::array, 602> data_utf8 = {{ {"\U00000300\U00000308\U0000ac00", {768, 44032}, {4, 7}}, {"\U00000300\U0000ac01", {768, 44033}, {2, 5}}, {"\U00000300\U00000308\U0000ac01", {768, 44033}, {4, 7}}, + {"\U00000300\U00000900", {768}, {5}}, + {"\U00000300\U00000308\U00000900", {768}, {7}}, + {"\U00000300\U00000903", {768}, {5}}, + {"\U00000300\U00000308\U00000903", {768}, {7}}, + {"\U00000300\U00000904", {768, 2308}, {2, 5}}, + {"\U00000300\U00000308\U00000904", {768, 2308}, {4, 7}}, + {"\U00000300\U00000d4e", {768, 3406}, {2, 5}}, + {"\U00000300\U00000308\U00000d4e", {768, 3406}, {4, 7}}, + {"\U00000300\U00000915", {768, 2325}, {2, 5}}, + {"\U00000300\U00000308\U00000915", {768, 2325}, {4, 7}}, {"\U00000300\U0000231a", {768, 8986}, {2, 5}}, {"\U00000300\U00000308\U0000231a", {768, 8986}, {4, 7}}, {"\U00000300\U00000300", {768}, {4}}, {"\U00000300\U00000308\U00000300", {768}, {6}}, + {"\U00000300\U0000093c", {768}, {5}}, + {"\U00000300\U00000308\U0000093c", {768}, {7}}, + {"\U00000300\U0000094d", {768}, {5}}, + {"\U00000300\U00000308\U0000094d", {768}, {7}}, {"\U00000300\U0000200d", {768}, {5}}, {"\U00000300\U00000308\U0000200d", {768}, {7}}, {"\U00000300\U00000378", {768, 888}, {2, 4}}, {"\U00000300\U00000308\U00000378", {768, 888}, {4, 6}}, + {"\U0000093c\U00000020", {2364, 32}, {3, 4}}, + {"\U0000093c\U00000308\U00000020", {2364, 32}, {5, 6}}, + {"\U0000093c\U0000000d", {2364, 13}, {3, 4}}, + {"\U0000093c\U00000308\U0000000d", {2364, 13}, {5, 6}}, + {"\U0000093c\U0000000a", {2364, 10}, {3, 4}}, + {"\U0000093c\U00000308\U0000000a", {2364, 10}, {5, 6}}, + {"\U0000093c\U00000001", {2364, 1}, {3, 4}}, + {"\U0000093c\U00000308\U00000001", {2364, 1}, {5, 6}}, + {"\U0000093c\U0000034f", {2364}, {5}}, + {"\U0000093c\U00000308\U0000034f", {2364}, {7}}, + {"\U0000093c\U0001f1e6", {2364, 127462}, {3, 7}}, + {"\U0000093c\U00000308\U0001f1e6", {2364, 127462}, {5, 9}}, + {"\U0000093c\U00000600", {2364, 1536}, {3, 5}}, + {"\U0000093c\U00000308\U00000600", {2364, 1536}, {5, 7}}, + {"\U0000093c\U00000a03", {2364}, {6}}, + {"\U0000093c\U00000308\U00000a03", {2364}, {8}}, + {"\U0000093c\U00001100", {2364, 4352}, {3, 6}}, + {"\U0000093c\U00000308\U00001100", {2364, 4352}, {5, 8}}, + {"\U0000093c\U00001160", {2364, 4448}, {3, 6}}, + {"\U0000093c\U00000308\U00001160", {2364, 4448}, {5, 8}}, + {"\U0000093c\U000011a8", {2364, 4520}, {3, 6}}, + {"\U0000093c\U00000308\U000011a8", {2364, 4520}, {5, 8}}, + {"\U0000093c\U0000ac00", {2364, 44032}, {3, 6}}, + {"\U0000093c\U00000308\U0000ac00", {2364, 44032}, {5, 8}}, + {"\U0000093c\U0000ac01", {2364, 44033}, {3, 6}}, + {"\U0000093c\U00000308\U0000ac01", {2364, 44033}, {5, 8}}, + {"\U0000093c\U00000900", {2364}, {6}}, + {"\U0000093c\U00000308\U00000900", {2364}, {8}}, + {"\U0000093c\U00000903", {2364}, {6}}, + {"\U0000093c\U00000308\U00000903", {2364}, {8}}, + {"\U0000093c\U00000904", {2364, 2308}, {3, 6}}, + {"\U0000093c\U00000308\U00000904", {2364, 2308}, {5, 8}}, + {"\U0000093c\U00000d4e", {2364, 3406}, {3, 6}}, + {"\U0000093c\U00000308\U00000d4e", {2364, 3406}, {5, 8}}, + {"\U0000093c\U00000915", {2364, 2325}, {3, 6}}, + {"\U0000093c\U00000308\U00000915", {2364, 2325}, {5, 8}}, + {"\U0000093c\U0000231a", {2364, 8986}, {3, 6}}, + {"\U0000093c\U00000308\U0000231a", {2364, 8986}, {5, 8}}, + {"\U0000093c\U00000300", {2364}, {5}}, + {"\U0000093c\U00000308\U00000300", {2364}, {7}}, + {"\U0000093c\U0000093c", {2364}, {6}}, + {"\U0000093c\U00000308\U0000093c", {2364}, {8}}, + {"\U0000093c\U0000094d", {2364}, {6}}, + {"\U0000093c\U00000308\U0000094d", {2364}, {8}}, + {"\U0000093c\U0000200d", {2364}, {6}}, + {"\U0000093c\U00000308\U0000200d", {2364}, {8}}, + {"\U0000093c\U00000378", {2364, 888}, {3, 5}}, + {"\U0000093c\U00000308\U00000378", {2364, 888}, {5, 7}}, + {"\U0000094d\U00000020", {2381, 32}, {3, 4}}, + {"\U0000094d\U00000308\U00000020", {2381, 32}, {5, 6}}, + {"\U0000094d\U0000000d", {2381, 13}, {3, 4}}, + {"\U0000094d\U00000308\U0000000d", {2381, 13}, {5, 6}}, + {"\U0000094d\U0000000a", {2381, 10}, {3, 4}}, + {"\U0000094d\U00000308\U0000000a", {2381, 10}, {5, 6}}, + {"\U0000094d\U00000001", {2381, 1}, {3, 4}}, + {"\U0000094d\U00000308\U00000001", {2381, 1}, {5, 6}}, + {"\U0000094d\U0000034f", {2381}, {5}}, + {"\U0000094d\U00000308\U0000034f", {2381}, {7}}, + {"\U0000094d\U0001f1e6", {2381, 127462}, {3, 7}}, + {"\U0000094d\U00000308\U0001f1e6", {2381, 127462}, {5, 9}}, + {"\U0000094d\U00000600", {2381, 1536}, {3, 5}}, + {"\U0000094d\U00000308\U00000600", {2381, 1536}, {5, 7}}, + {"\U0000094d\U00000a03", {2381}, {6}}, + {"\U0000094d\U00000308\U00000a03", {2381}, {8}}, + {"\U0000094d\U00001100", {2381, 4352}, {3, 6}}, + {"\U0000094d\U00000308\U00001100", {2381, 4352}, {5, 8}}, + {"\U0000094d\U00001160", {2381, 4448}, {3, 6}}, + {"\U0000094d\U00000308\U00001160", {2381, 4448}, {5, 8}}, + {"\U0000094d\U000011a8", {2381, 4520}, {3, 6}}, + {"\U0000094d\U00000308\U000011a8", {2381, 4520}, {5, 8}}, + {"\U0000094d\U0000ac00", {2381, 44032}, {3, 6}}, + {"\U0000094d\U00000308\U0000ac00", {2381, 44032}, {5, 8}}, + {"\U0000094d\U0000ac01", {2381, 44033}, {3, 6}}, + {"\U0000094d\U00000308\U0000ac01", {2381, 44033}, {5, 8}}, + {"\U0000094d\U00000900", {2381}, {6}}, + {"\U0000094d\U00000308\U00000900", {2381}, {8}}, + {"\U0000094d\U00000903", {2381}, {6}}, + {"\U0000094d\U00000308\U00000903", {2381}, {8}}, + {"\U0000094d\U00000904", {2381, 2308}, {3, 6}}, + {"\U0000094d\U00000308\U00000904", {2381, 2308}, {5, 8}}, + {"\U0000094d\U00000d4e", {2381, 3406}, {3, 6}}, + {"\U0000094d\U00000308\U00000d4e", {2381, 3406}, {5, 8}}, + {"\U0000094d\U00000915", {2381, 2325}, {3, 6}}, + {"\U0000094d\U00000308\U00000915", {2381, 2325}, {5, 8}}, + {"\U0000094d\U0000231a", {2381, 8986}, {3, 6}}, + {"\U0000094d\U00000308\U0000231a", {2381, 8986}, {5, 8}}, + {"\U0000094d\U00000300", {2381}, {5}}, + {"\U0000094d\U00000308\U00000300", {2381}, {7}}, + {"\U0000094d\U0000093c", {2381}, {6}}, + {"\U0000094d\U00000308\U0000093c", {2381}, {8}}, + {"\U0000094d\U0000094d", {2381}, {6}}, + {"\U0000094d\U00000308\U0000094d", {2381}, {8}}, + {"\U0000094d\U0000200d", {2381}, {6}}, + {"\U0000094d\U00000308\U0000200d", {2381}, {8}}, + {"\U0000094d\U00000378", {2381, 888}, {3, 5}}, + {"\U0000094d\U00000308\U00000378", {2381, 888}, {5, 7}}, {"\U0000200d\U00000020", {8205, 32}, {3, 4}}, {"\U0000200d\U00000308\U00000020", {8205, 32}, {5, 6}}, {"\U0000200d\U0000000d", {8205, 13}, {3, 4}}, @@ -607,8 +1153,8 @@ std::array, 602> data_utf8 = {{ {"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {5, 9}}, {"\U0000200d\U00000600", {8205, 1536}, {3, 5}}, {"\U0000200d\U00000308\U00000600", {8205, 1536}, {5, 7}}, - {"\U0000200d\U00000903", {8205}, {6}}, - {"\U0000200d\U00000308\U00000903", {8205}, {8}}, + {"\U0000200d\U00000a03", {8205}, {6}}, + {"\U0000200d\U00000308\U00000a03", {8205}, {8}}, {"\U0000200d\U00001100", {8205, 4352}, {3, 6}}, {"\U0000200d\U00000308\U00001100", {8205, 4352}, {5, 8}}, {"\U0000200d\U00001160", {8205, 4448}, {3, 6}}, @@ -619,10 +1165,24 @@ std::array, 602> data_utf8 = {{ {"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {5, 8}}, {"\U0000200d\U0000ac01", {8205, 44033}, {3, 6}}, {"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {5, 8}}, + {"\U0000200d\U00000900", {8205}, {6}}, + {"\U0000200d\U00000308\U00000900", {8205}, {8}}, + {"\U0000200d\U00000903", {8205}, {6}}, + {"\U0000200d\U00000308\U00000903", {8205}, {8}}, + {"\U0000200d\U00000904", {8205, 2308}, {3, 6}}, + {"\U0000200d\U00000308\U00000904", {8205, 2308}, {5, 8}}, + {"\U0000200d\U00000d4e", {8205, 3406}, {3, 6}}, + {"\U0000200d\U00000308\U00000d4e", {8205, 3406}, {5, 8}}, + {"\U0000200d\U00000915", {8205, 2325}, {3, 6}}, + {"\U0000200d\U00000308\U00000915", {8205, 2325}, {5, 8}}, {"\U0000200d\U0000231a", {8205, 8986}, {3, 6}}, {"\U0000200d\U00000308\U0000231a", {8205, 8986}, {5, 8}}, {"\U0000200d\U00000300", {8205}, {5}}, {"\U0000200d\U00000308\U00000300", {8205}, {7}}, + {"\U0000200d\U0000093c", {8205}, {6}}, + {"\U0000200d\U00000308\U0000093c", {8205}, {8}}, + {"\U0000200d\U0000094d", {8205}, {6}}, + {"\U0000200d\U00000308\U0000094d", {8205}, {8}}, {"\U0000200d\U0000200d", {8205}, {6}}, {"\U0000200d\U00000308\U0000200d", {8205}, {8}}, {"\U0000200d\U00000378", {8205, 888}, {3, 5}}, @@ -641,8 +1201,8 @@ std::array, 602> data_utf8 = {{ {"\U00000378\U00000308\U0001f1e6", {888, 127462}, {4, 8}}, {"\U00000378\U00000600", {888, 1536}, {2, 4}}, {"\U00000378\U00000308\U00000600", {888, 1536}, {4, 6}}, - {"\U00000378\U00000903", {888}, {5}}, - {"\U00000378\U00000308\U00000903", {888}, {7}}, + {"\U00000378\U00000a03", {888}, {5}}, + {"\U00000378\U00000308\U00000a03", {888}, {7}}, {"\U00000378\U00001100", {888, 4352}, {2, 5}}, {"\U00000378\U00000308\U00001100", {888, 4352}, {4, 7}}, {"\U00000378\U00001160", {888, 4448}, {2, 5}}, @@ -653,10 +1213,24 @@ std::array, 602> data_utf8 = {{ {"\U00000378\U00000308\U0000ac00", {888, 44032}, {4, 7}}, {"\U00000378\U0000ac01", {888, 44033}, {2, 5}}, {"\U00000378\U00000308\U0000ac01", {888, 44033}, {4, 7}}, + {"\U00000378\U00000900", {888}, {5}}, + {"\U00000378\U00000308\U00000900", {888}, {7}}, + {"\U00000378\U00000903", {888}, {5}}, + {"\U00000378\U00000308\U00000903", {888}, {7}}, + {"\U00000378\U00000904", {888, 2308}, {2, 5}}, + {"\U00000378\U00000308\U00000904", {888, 2308}, {4, 7}}, + {"\U00000378\U00000d4e", {888, 3406}, {2, 5}}, + {"\U00000378\U00000308\U00000d4e", {888, 3406}, {4, 7}}, + {"\U00000378\U00000915", {888, 2325}, {2, 5}}, + {"\U00000378\U00000308\U00000915", {888, 2325}, {4, 7}}, {"\U00000378\U0000231a", {888, 8986}, {2, 5}}, {"\U00000378\U00000308\U0000231a", {888, 8986}, {4, 7}}, {"\U00000378\U00000300", {888}, {4}}, {"\U00000378\U00000308\U00000300", {888}, {6}}, + {"\U00000378\U0000093c", {888}, {5}}, + {"\U00000378\U00000308\U0000093c", {888}, {7}}, + {"\U00000378\U0000094d", {888}, {5}}, + {"\U00000378\U00000308\U0000094d", {888}, {7}}, {"\U00000378\U0000200d", {888}, {5}}, {"\U00000378\U00000308\U0000200d", {888}, {7}}, {"\U00000378\U00000378", {888, 888}, {2, 4}}, @@ -684,7 +1258,18 @@ std::array, 602> data_utf8 = {{ {"\U0001f6d1\U0000200d\U0001f6d1", {128721}, {11}}, {"\U00000061\U0000200d\U0001f6d1", {97, 128721}, {4, 8}}, {"\U00002701\U0000200d\U00002701", {9985}, {9}}, - {"\U00000061\U0000200d\U00002701", {97, 9985}, {4, 7}}}}; + {"\U00000061\U0000200d\U00002701", {97, 9985}, {4, 7}}, + {"\U00000915\U00000924", {2325, 2340}, {3, 6}}, + {"\U00000915\U0000094d\U00000924", {2325}, {9}}, + {"\U00000915\U0000094d\U0000094d\U00000924", {2325}, {12}}, + {"\U00000915\U0000094d\U0000200d\U00000924", {2325}, {12}}, + {"\U00000915\U0000093c\U0000200d\U0000094d\U00000924", {2325}, {15}}, + {"\U00000915\U0000093c\U0000094d\U0000200d\U00000924", {2325}, {15}}, + {"\U00000915\U0000094d\U00000924\U0000094d\U0000092f", {2325}, {15}}, + {"\U00000915\U0000094d\U00000061", {2325, 97}, {6, 7}}, + {"\U00000061\U0000094d\U00000924", {97, 2340}, {4, 7}}, + {"\U0000003f\U0000094d\U00000924", {63, 2340}, {4, 7}}, + {"\U00000915\U0000094d\U0000094d\U00000924", {2325}, {12}}}}; /// The data for UTF-16. /// @@ -692,7 +1277,7 @@ std::array, 602> data_utf8 = {{ /// since the size of the code units differ the breaks can contain different /// values. #ifndef TEST_HAS_NO_WIDE_CHARACTERS -std::array, 602> data_utf16 = {{ +std::array, 1187> data_utf16 = {{ {L"\U00000020\U00000020", {32, 32}, {1, 2}}, {L"\U00000020\U00000308\U00000020", {32, 32}, {2, 3}}, {L"\U00000020\U0000000d", {32, 13}, {1, 2}}, @@ -707,8 +1292,8 @@ std::array, 602> data_utf16 = {{ {L"\U00000020\U00000308\U0001f1e6", {32, 127462}, {2, 4}}, {L"\U00000020\U00000600", {32, 1536}, {1, 2}}, {L"\U00000020\U00000308\U00000600", {32, 1536}, {2, 3}}, - {L"\U00000020\U00000903", {32}, {2}}, - {L"\U00000020\U00000308\U00000903", {32}, {3}}, + {L"\U00000020\U00000a03", {32}, {2}}, + {L"\U00000020\U00000308\U00000a03", {32}, {3}}, {L"\U00000020\U00001100", {32, 4352}, {1, 2}}, {L"\U00000020\U00000308\U00001100", {32, 4352}, {2, 3}}, {L"\U00000020\U00001160", {32, 4448}, {1, 2}}, @@ -719,10 +1304,24 @@ std::array, 602> data_utf16 = {{ {L"\U00000020\U00000308\U0000ac00", {32, 44032}, {2, 3}}, {L"\U00000020\U0000ac01", {32, 44033}, {1, 2}}, {L"\U00000020\U00000308\U0000ac01", {32, 44033}, {2, 3}}, + {L"\U00000020\U00000900", {32}, {2}}, + {L"\U00000020\U00000308\U00000900", {32}, {3}}, + {L"\U00000020\U00000903", {32}, {2}}, + {L"\U00000020\U00000308\U00000903", {32}, {3}}, + {L"\U00000020\U00000904", {32, 2308}, {1, 2}}, + {L"\U00000020\U00000308\U00000904", {32, 2308}, {2, 3}}, + {L"\U00000020\U00000d4e", {32, 3406}, {1, 2}}, + {L"\U00000020\U00000308\U00000d4e", {32, 3406}, {2, 3}}, + {L"\U00000020\U00000915", {32, 2325}, {1, 2}}, + {L"\U00000020\U00000308\U00000915", {32, 2325}, {2, 3}}, {L"\U00000020\U0000231a", {32, 8986}, {1, 2}}, {L"\U00000020\U00000308\U0000231a", {32, 8986}, {2, 3}}, {L"\U00000020\U00000300", {32}, {2}}, {L"\U00000020\U00000308\U00000300", {32}, {3}}, + {L"\U00000020\U0000093c", {32}, {2}}, + {L"\U00000020\U00000308\U0000093c", {32}, {3}}, + {L"\U00000020\U0000094d", {32}, {2}}, + {L"\U00000020\U00000308\U0000094d", {32}, {3}}, {L"\U00000020\U0000200d", {32}, {2}}, {L"\U00000020\U00000308\U0000200d", {32}, {3}}, {L"\U00000020\U00000378", {32, 888}, {1, 2}}, @@ -741,8 +1340,8 @@ std::array, 602> data_utf16 = {{ {L"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 2, 4}}, {L"\U0000000d\U00000600", {13, 1536}, {1, 2}}, {L"\U0000000d\U00000308\U00000600", {13, 776, 1536}, {1, 2, 3}}, - {L"\U0000000d\U00000903", {13, 2307}, {1, 2}}, - {L"\U0000000d\U00000308\U00000903", {13, 776}, {1, 3}}, + {L"\U0000000d\U00000a03", {13, 2563}, {1, 2}}, + {L"\U0000000d\U00000308\U00000a03", {13, 776}, {1, 3}}, {L"\U0000000d\U00001100", {13, 4352}, {1, 2}}, {L"\U0000000d\U00000308\U00001100", {13, 776, 4352}, {1, 2, 3}}, {L"\U0000000d\U00001160", {13, 4448}, {1, 2}}, @@ -753,10 +1352,24 @@ std::array, 602> data_utf16 = {{ {L"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 2, 3}}, {L"\U0000000d\U0000ac01", {13, 44033}, {1, 2}}, {L"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 2, 3}}, + {L"\U0000000d\U00000900", {13, 2304}, {1, 2}}, + {L"\U0000000d\U00000308\U00000900", {13, 776}, {1, 3}}, + {L"\U0000000d\U00000903", {13, 2307}, {1, 2}}, + {L"\U0000000d\U00000308\U00000903", {13, 776}, {1, 3}}, + {L"\U0000000d\U00000904", {13, 2308}, {1, 2}}, + {L"\U0000000d\U00000308\U00000904", {13, 776, 2308}, {1, 2, 3}}, + {L"\U0000000d\U00000d4e", {13, 3406}, {1, 2}}, + {L"\U0000000d\U00000308\U00000d4e", {13, 776, 3406}, {1, 2, 3}}, + {L"\U0000000d\U00000915", {13, 2325}, {1, 2}}, + {L"\U0000000d\U00000308\U00000915", {13, 776, 2325}, {1, 2, 3}}, {L"\U0000000d\U0000231a", {13, 8986}, {1, 2}}, {L"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 2, 3}}, {L"\U0000000d\U00000300", {13, 768}, {1, 2}}, {L"\U0000000d\U00000308\U00000300", {13, 776}, {1, 3}}, + {L"\U0000000d\U0000093c", {13, 2364}, {1, 2}}, + {L"\U0000000d\U00000308\U0000093c", {13, 776}, {1, 3}}, + {L"\U0000000d\U0000094d", {13, 2381}, {1, 2}}, + {L"\U0000000d\U00000308\U0000094d", {13, 776}, {1, 3}}, {L"\U0000000d\U0000200d", {13, 8205}, {1, 2}}, {L"\U0000000d\U00000308\U0000200d", {13, 776}, {1, 3}}, {L"\U0000000d\U00000378", {13, 888}, {1, 2}}, @@ -775,8 +1388,8 @@ std::array, 602> data_utf16 = {{ {L"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 2, 4}}, {L"\U0000000a\U00000600", {10, 1536}, {1, 2}}, {L"\U0000000a\U00000308\U00000600", {10, 776, 1536}, {1, 2, 3}}, - {L"\U0000000a\U00000903", {10, 2307}, {1, 2}}, - {L"\U0000000a\U00000308\U00000903", {10, 776}, {1, 3}}, + {L"\U0000000a\U00000a03", {10, 2563}, {1, 2}}, + {L"\U0000000a\U00000308\U00000a03", {10, 776}, {1, 3}}, {L"\U0000000a\U00001100", {10, 4352}, {1, 2}}, {L"\U0000000a\U00000308\U00001100", {10, 776, 4352}, {1, 2, 3}}, {L"\U0000000a\U00001160", {10, 4448}, {1, 2}}, @@ -787,10 +1400,24 @@ std::array, 602> data_utf16 = {{ {L"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 2, 3}}, {L"\U0000000a\U0000ac01", {10, 44033}, {1, 2}}, {L"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 2, 3}}, + {L"\U0000000a\U00000900", {10, 2304}, {1, 2}}, + {L"\U0000000a\U00000308\U00000900", {10, 776}, {1, 3}}, + {L"\U0000000a\U00000903", {10, 2307}, {1, 2}}, + {L"\U0000000a\U00000308\U00000903", {10, 776}, {1, 3}}, + {L"\U0000000a\U00000904", {10, 2308}, {1, 2}}, + {L"\U0000000a\U00000308\U00000904", {10, 776, 2308}, {1, 2, 3}}, + {L"\U0000000a\U00000d4e", {10, 3406}, {1, 2}}, + {L"\U0000000a\U00000308\U00000d4e", {10, 776, 3406}, {1, 2, 3}}, + {L"\U0000000a\U00000915", {10, 2325}, {1, 2}}, + {L"\U0000000a\U00000308\U00000915", {10, 776, 2325}, {1, 2, 3}}, {L"\U0000000a\U0000231a", {10, 8986}, {1, 2}}, {L"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 2, 3}}, {L"\U0000000a\U00000300", {10, 768}, {1, 2}}, {L"\U0000000a\U00000308\U00000300", {10, 776}, {1, 3}}, + {L"\U0000000a\U0000093c", {10, 2364}, {1, 2}}, + {L"\U0000000a\U00000308\U0000093c", {10, 776}, {1, 3}}, + {L"\U0000000a\U0000094d", {10, 2381}, {1, 2}}, + {L"\U0000000a\U00000308\U0000094d", {10, 776}, {1, 3}}, {L"\U0000000a\U0000200d", {10, 8205}, {1, 2}}, {L"\U0000000a\U00000308\U0000200d", {10, 776}, {1, 3}}, {L"\U0000000a\U00000378", {10, 888}, {1, 2}}, @@ -809,8 +1436,8 @@ std::array, 602> data_utf16 = {{ {L"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 2, 4}}, {L"\U00000001\U00000600", {1, 1536}, {1, 2}}, {L"\U00000001\U00000308\U00000600", {1, 776, 1536}, {1, 2, 3}}, - {L"\U00000001\U00000903", {1, 2307}, {1, 2}}, - {L"\U00000001\U00000308\U00000903", {1, 776}, {1, 3}}, + {L"\U00000001\U00000a03", {1, 2563}, {1, 2}}, + {L"\U00000001\U00000308\U00000a03", {1, 776}, {1, 3}}, {L"\U00000001\U00001100", {1, 4352}, {1, 2}}, {L"\U00000001\U00000308\U00001100", {1, 776, 4352}, {1, 2, 3}}, {L"\U00000001\U00001160", {1, 4448}, {1, 2}}, @@ -821,10 +1448,24 @@ std::array, 602> data_utf16 = {{ {L"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 2, 3}}, {L"\U00000001\U0000ac01", {1, 44033}, {1, 2}}, {L"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 2, 3}}, + {L"\U00000001\U00000900", {1, 2304}, {1, 2}}, + {L"\U00000001\U00000308\U00000900", {1, 776}, {1, 3}}, + {L"\U00000001\U00000903", {1, 2307}, {1, 2}}, + {L"\U00000001\U00000308\U00000903", {1, 776}, {1, 3}}, + {L"\U00000001\U00000904", {1, 2308}, {1, 2}}, + {L"\U00000001\U00000308\U00000904", {1, 776, 2308}, {1, 2, 3}}, + {L"\U00000001\U00000d4e", {1, 3406}, {1, 2}}, + {L"\U00000001\U00000308\U00000d4e", {1, 776, 3406}, {1, 2, 3}}, + {L"\U00000001\U00000915", {1, 2325}, {1, 2}}, + {L"\U00000001\U00000308\U00000915", {1, 776, 2325}, {1, 2, 3}}, {L"\U00000001\U0000231a", {1, 8986}, {1, 2}}, {L"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 2, 3}}, {L"\U00000001\U00000300", {1, 768}, {1, 2}}, {L"\U00000001\U00000308\U00000300", {1, 776}, {1, 3}}, + {L"\U00000001\U0000093c", {1, 2364}, {1, 2}}, + {L"\U00000001\U00000308\U0000093c", {1, 776}, {1, 3}}, + {L"\U00000001\U0000094d", {1, 2381}, {1, 2}}, + {L"\U00000001\U00000308\U0000094d", {1, 776}, {1, 3}}, {L"\U00000001\U0000200d", {1, 8205}, {1, 2}}, {L"\U00000001\U00000308\U0000200d", {1, 776}, {1, 3}}, {L"\U00000001\U00000378", {1, 888}, {1, 2}}, @@ -843,8 +1484,8 @@ std::array, 602> data_utf16 = {{ {L"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {2, 4}}, {L"\U0000034f\U00000600", {847, 1536}, {1, 2}}, {L"\U0000034f\U00000308\U00000600", {847, 1536}, {2, 3}}, - {L"\U0000034f\U00000903", {847}, {2}}, - {L"\U0000034f\U00000308\U00000903", {847}, {3}}, + {L"\U0000034f\U00000a03", {847}, {2}}, + {L"\U0000034f\U00000308\U00000a03", {847}, {3}}, {L"\U0000034f\U00001100", {847, 4352}, {1, 2}}, {L"\U0000034f\U00000308\U00001100", {847, 4352}, {2, 3}}, {L"\U0000034f\U00001160", {847, 4448}, {1, 2}}, @@ -855,10 +1496,24 @@ std::array, 602> data_utf16 = {{ {L"\U0000034f\U00000308\U0000ac00", {847, 44032}, {2, 3}}, {L"\U0000034f\U0000ac01", {847, 44033}, {1, 2}}, {L"\U0000034f\U00000308\U0000ac01", {847, 44033}, {2, 3}}, + {L"\U0000034f\U00000900", {847}, {2}}, + {L"\U0000034f\U00000308\U00000900", {847}, {3}}, + {L"\U0000034f\U00000903", {847}, {2}}, + {L"\U0000034f\U00000308\U00000903", {847}, {3}}, + {L"\U0000034f\U00000904", {847, 2308}, {1, 2}}, + {L"\U0000034f\U00000308\U00000904", {847, 2308}, {2, 3}}, + {L"\U0000034f\U00000d4e", {847, 3406}, {1, 2}}, + {L"\U0000034f\U00000308\U00000d4e", {847, 3406}, {2, 3}}, + {L"\U0000034f\U00000915", {847, 2325}, {1, 2}}, + {L"\U0000034f\U00000308\U00000915", {847, 2325}, {2, 3}}, {L"\U0000034f\U0000231a", {847, 8986}, {1, 2}}, {L"\U0000034f\U00000308\U0000231a", {847, 8986}, {2, 3}}, {L"\U0000034f\U00000300", {847}, {2}}, {L"\U0000034f\U00000308\U00000300", {847}, {3}}, + {L"\U0000034f\U0000093c", {847}, {2}}, + {L"\U0000034f\U00000308\U0000093c", {847}, {3}}, + {L"\U0000034f\U0000094d", {847}, {2}}, + {L"\U0000034f\U00000308\U0000094d", {847}, {3}}, {L"\U0000034f\U0000200d", {847}, {2}}, {L"\U0000034f\U00000308\U0000200d", {847}, {3}}, {L"\U0000034f\U00000378", {847, 888}, {1, 2}}, @@ -877,8 +1532,8 @@ std::array, 602> data_utf16 = {{ {L"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {3, 5}}, {L"\U0001f1e6\U00000600", {127462, 1536}, {2, 3}}, {L"\U0001f1e6\U00000308\U00000600", {127462, 1536}, {3, 4}}, - {L"\U0001f1e6\U00000903", {127462}, {3}}, - {L"\U0001f1e6\U00000308\U00000903", {127462}, {4}}, + {L"\U0001f1e6\U00000a03", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U00000a03", {127462}, {4}}, {L"\U0001f1e6\U00001100", {127462, 4352}, {2, 3}}, {L"\U0001f1e6\U00000308\U00001100", {127462, 4352}, {3, 4}}, {L"\U0001f1e6\U00001160", {127462, 4448}, {2, 3}}, @@ -889,10 +1544,24 @@ std::array, 602> data_utf16 = {{ {L"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {3, 4}}, {L"\U0001f1e6\U0000ac01", {127462, 44033}, {2, 3}}, {L"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {3, 4}}, + {L"\U0001f1e6\U00000900", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U00000900", {127462}, {4}}, + {L"\U0001f1e6\U00000903", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U00000903", {127462}, {4}}, + {L"\U0001f1e6\U00000904", {127462, 2308}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00000904", {127462, 2308}, {3, 4}}, + {L"\U0001f1e6\U00000d4e", {127462, 3406}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00000d4e", {127462, 3406}, {3, 4}}, + {L"\U0001f1e6\U00000915", {127462, 2325}, {2, 3}}, + {L"\U0001f1e6\U00000308\U00000915", {127462, 2325}, {3, 4}}, {L"\U0001f1e6\U0000231a", {127462, 8986}, {2, 3}}, {L"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {3, 4}}, {L"\U0001f1e6\U00000300", {127462}, {3}}, {L"\U0001f1e6\U00000308\U00000300", {127462}, {4}}, + {L"\U0001f1e6\U0000093c", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U0000093c", {127462}, {4}}, + {L"\U0001f1e6\U0000094d", {127462}, {3}}, + {L"\U0001f1e6\U00000308\U0000094d", {127462}, {4}}, {L"\U0001f1e6\U0000200d", {127462}, {3}}, {L"\U0001f1e6\U00000308\U0000200d", {127462}, {4}}, {L"\U0001f1e6\U00000378", {127462, 888}, {2, 3}}, @@ -911,8 +1580,8 @@ std::array, 602> data_utf16 = {{ {L"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {2, 4}}, {L"\U00000600\U00000600", {1536}, {2}}, {L"\U00000600\U00000308\U00000600", {1536, 1536}, {2, 3}}, - {L"\U00000600\U00000903", {1536}, {2}}, - {L"\U00000600\U00000308\U00000903", {1536}, {3}}, + {L"\U00000600\U00000a03", {1536}, {2}}, + {L"\U00000600\U00000308\U00000a03", {1536}, {3}}, {L"\U00000600\U00001100", {1536}, {2}}, {L"\U00000600\U00000308\U00001100", {1536, 4352}, {2, 3}}, {L"\U00000600\U00001160", {1536}, {2}}, @@ -923,48 +1592,76 @@ std::array, 602> data_utf16 = {{ {L"\U00000600\U00000308\U0000ac00", {1536, 44032}, {2, 3}}, {L"\U00000600\U0000ac01", {1536}, {2}}, {L"\U00000600\U00000308\U0000ac01", {1536, 44033}, {2, 3}}, + {L"\U00000600\U00000900", {1536}, {2}}, + {L"\U00000600\U00000308\U00000900", {1536}, {3}}, + {L"\U00000600\U00000903", {1536}, {2}}, + {L"\U00000600\U00000308\U00000903", {1536}, {3}}, + {L"\U00000600\U00000904", {1536}, {2}}, + {L"\U00000600\U00000308\U00000904", {1536, 2308}, {2, 3}}, + {L"\U00000600\U00000d4e", {1536}, {2}}, + {L"\U00000600\U00000308\U00000d4e", {1536, 3406}, {2, 3}}, + {L"\U00000600\U00000915", {1536}, {2}}, + {L"\U00000600\U00000308\U00000915", {1536, 2325}, {2, 3}}, {L"\U00000600\U0000231a", {1536}, {2}}, {L"\U00000600\U00000308\U0000231a", {1536, 8986}, {2, 3}}, {L"\U00000600\U00000300", {1536}, {2}}, {L"\U00000600\U00000308\U00000300", {1536}, {3}}, + {L"\U00000600\U0000093c", {1536}, {2}}, + {L"\U00000600\U00000308\U0000093c", {1536}, {3}}, + {L"\U00000600\U0000094d", {1536}, {2}}, + {L"\U00000600\U00000308\U0000094d", {1536}, {3}}, {L"\U00000600\U0000200d", {1536}, {2}}, {L"\U00000600\U00000308\U0000200d", {1536}, {3}}, {L"\U00000600\U00000378", {1536}, {2}}, {L"\U00000600\U00000308\U00000378", {1536, 888}, {2, 3}}, - {L"\U00000903\U00000020", {2307, 32}, {1, 2}}, - {L"\U00000903\U00000308\U00000020", {2307, 32}, {2, 3}}, - {L"\U00000903\U0000000d", {2307, 13}, {1, 2}}, - {L"\U00000903\U00000308\U0000000d", {2307, 13}, {2, 3}}, - {L"\U00000903\U0000000a", {2307, 10}, {1, 2}}, - {L"\U00000903\U00000308\U0000000a", {2307, 10}, {2, 3}}, - {L"\U00000903\U00000001", {2307, 1}, {1, 2}}, - {L"\U00000903\U00000308\U00000001", {2307, 1}, {2, 3}}, - {L"\U00000903\U0000034f", {2307}, {2}}, - {L"\U00000903\U00000308\U0000034f", {2307}, {3}}, - {L"\U00000903\U0001f1e6", {2307, 127462}, {1, 3}}, - {L"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {2, 4}}, - {L"\U00000903\U00000600", {2307, 1536}, {1, 2}}, - {L"\U00000903\U00000308\U00000600", {2307, 1536}, {2, 3}}, - {L"\U00000903\U00000903", {2307}, {2}}, - {L"\U00000903\U00000308\U00000903", {2307}, {3}}, - {L"\U00000903\U00001100", {2307, 4352}, {1, 2}}, - {L"\U00000903\U00000308\U00001100", {2307, 4352}, {2, 3}}, - {L"\U00000903\U00001160", {2307, 4448}, {1, 2}}, - {L"\U00000903\U00000308\U00001160", {2307, 4448}, {2, 3}}, - {L"\U00000903\U000011a8", {2307, 4520}, {1, 2}}, - {L"\U00000903\U00000308\U000011a8", {2307, 4520}, {2, 3}}, - {L"\U00000903\U0000ac00", {2307, 44032}, {1, 2}}, - {L"\U00000903\U00000308\U0000ac00", {2307, 44032}, {2, 3}}, - {L"\U00000903\U0000ac01", {2307, 44033}, {1, 2}}, - {L"\U00000903\U00000308\U0000ac01", {2307, 44033}, {2, 3}}, - {L"\U00000903\U0000231a", {2307, 8986}, {1, 2}}, - {L"\U00000903\U00000308\U0000231a", {2307, 8986}, {2, 3}}, - {L"\U00000903\U00000300", {2307}, {2}}, - {L"\U00000903\U00000308\U00000300", {2307}, {3}}, - {L"\U00000903\U0000200d", {2307}, {2}}, - {L"\U00000903\U00000308\U0000200d", {2307}, {3}}, - {L"\U00000903\U00000378", {2307, 888}, {1, 2}}, - {L"\U00000903\U00000308\U00000378", {2307, 888}, {2, 3}}, + {L"\U00000a03\U00000020", {2563, 32}, {1, 2}}, + {L"\U00000a03\U00000308\U00000020", {2563, 32}, {2, 3}}, + {L"\U00000a03\U0000000d", {2563, 13}, {1, 2}}, + {L"\U00000a03\U00000308\U0000000d", {2563, 13}, {2, 3}}, + {L"\U00000a03\U0000000a", {2563, 10}, {1, 2}}, + {L"\U00000a03\U00000308\U0000000a", {2563, 10}, {2, 3}}, + {L"\U00000a03\U00000001", {2563, 1}, {1, 2}}, + {L"\U00000a03\U00000308\U00000001", {2563, 1}, {2, 3}}, + {L"\U00000a03\U0000034f", {2563}, {2}}, + {L"\U00000a03\U00000308\U0000034f", {2563}, {3}}, + {L"\U00000a03\U0001f1e6", {2563, 127462}, {1, 3}}, + {L"\U00000a03\U00000308\U0001f1e6", {2563, 127462}, {2, 4}}, + {L"\U00000a03\U00000600", {2563, 1536}, {1, 2}}, + {L"\U00000a03\U00000308\U00000600", {2563, 1536}, {2, 3}}, + {L"\U00000a03\U00000a03", {2563}, {2}}, + {L"\U00000a03\U00000308\U00000a03", {2563}, {3}}, + {L"\U00000a03\U00001100", {2563, 4352}, {1, 2}}, + {L"\U00000a03\U00000308\U00001100", {2563, 4352}, {2, 3}}, + {L"\U00000a03\U00001160", {2563, 4448}, {1, 2}}, + {L"\U00000a03\U00000308\U00001160", {2563, 4448}, {2, 3}}, + {L"\U00000a03\U000011a8", {2563, 4520}, {1, 2}}, + {L"\U00000a03\U00000308\U000011a8", {2563, 4520}, {2, 3}}, + {L"\U00000a03\U0000ac00", {2563, 44032}, {1, 2}}, + {L"\U00000a03\U00000308\U0000ac00", {2563, 44032}, {2, 3}}, + {L"\U00000a03\U0000ac01", {2563, 44033}, {1, 2}}, + {L"\U00000a03\U00000308\U0000ac01", {2563, 44033}, {2, 3}}, + {L"\U00000a03\U00000900", {2563}, {2}}, + {L"\U00000a03\U00000308\U00000900", {2563}, {3}}, + {L"\U00000a03\U00000903", {2563}, {2}}, + {L"\U00000a03\U00000308\U00000903", {2563}, {3}}, + {L"\U00000a03\U00000904", {2563, 2308}, {1, 2}}, + {L"\U00000a03\U00000308\U00000904", {2563, 2308}, {2, 3}}, + {L"\U00000a03\U00000d4e", {2563, 3406}, {1, 2}}, + {L"\U00000a03\U00000308\U00000d4e", {2563, 3406}, {2, 3}}, + {L"\U00000a03\U00000915", {2563, 2325}, {1, 2}}, + {L"\U00000a03\U00000308\U00000915", {2563, 2325}, {2, 3}}, + {L"\U00000a03\U0000231a", {2563, 8986}, {1, 2}}, + {L"\U00000a03\U00000308\U0000231a", {2563, 8986}, {2, 3}}, + {L"\U00000a03\U00000300", {2563}, {2}}, + {L"\U00000a03\U00000308\U00000300", {2563}, {3}}, + {L"\U00000a03\U0000093c", {2563}, {2}}, + {L"\U00000a03\U00000308\U0000093c", {2563}, {3}}, + {L"\U00000a03\U0000094d", {2563}, {2}}, + {L"\U00000a03\U00000308\U0000094d", {2563}, {3}}, + {L"\U00000a03\U0000200d", {2563}, {2}}, + {L"\U00000a03\U00000308\U0000200d", {2563}, {3}}, + {L"\U00000a03\U00000378", {2563, 888}, {1, 2}}, + {L"\U00000a03\U00000308\U00000378", {2563, 888}, {2, 3}}, {L"\U00001100\U00000020", {4352, 32}, {1, 2}}, {L"\U00001100\U00000308\U00000020", {4352, 32}, {2, 3}}, {L"\U00001100\U0000000d", {4352, 13}, {1, 2}}, @@ -979,8 +1676,8 @@ std::array, 602> data_utf16 = {{ {L"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {2, 4}}, {L"\U00001100\U00000600", {4352, 1536}, {1, 2}}, {L"\U00001100\U00000308\U00000600", {4352, 1536}, {2, 3}}, - {L"\U00001100\U00000903", {4352}, {2}}, - {L"\U00001100\U00000308\U00000903", {4352}, {3}}, + {L"\U00001100\U00000a03", {4352}, {2}}, + {L"\U00001100\U00000308\U00000a03", {4352}, {3}}, {L"\U00001100\U00001100", {4352}, {2}}, {L"\U00001100\U00000308\U00001100", {4352, 4352}, {2, 3}}, {L"\U00001100\U00001160", {4352}, {2}}, @@ -991,10 +1688,24 @@ std::array, 602> data_utf16 = {{ {L"\U00001100\U00000308\U0000ac00", {4352, 44032}, {2, 3}}, {L"\U00001100\U0000ac01", {4352}, {2}}, {L"\U00001100\U00000308\U0000ac01", {4352, 44033}, {2, 3}}, + {L"\U00001100\U00000900", {4352}, {2}}, + {L"\U00001100\U00000308\U00000900", {4352}, {3}}, + {L"\U00001100\U00000903", {4352}, {2}}, + {L"\U00001100\U00000308\U00000903", {4352}, {3}}, + {L"\U00001100\U00000904", {4352, 2308}, {1, 2}}, + {L"\U00001100\U00000308\U00000904", {4352, 2308}, {2, 3}}, + {L"\U00001100\U00000d4e", {4352, 3406}, {1, 2}}, + {L"\U00001100\U00000308\U00000d4e", {4352, 3406}, {2, 3}}, + {L"\U00001100\U00000915", {4352, 2325}, {1, 2}}, + {L"\U00001100\U00000308\U00000915", {4352, 2325}, {2, 3}}, {L"\U00001100\U0000231a", {4352, 8986}, {1, 2}}, {L"\U00001100\U00000308\U0000231a", {4352, 8986}, {2, 3}}, {L"\U00001100\U00000300", {4352}, {2}}, {L"\U00001100\U00000308\U00000300", {4352}, {3}}, + {L"\U00001100\U0000093c", {4352}, {2}}, + {L"\U00001100\U00000308\U0000093c", {4352}, {3}}, + {L"\U00001100\U0000094d", {4352}, {2}}, + {L"\U00001100\U00000308\U0000094d", {4352}, {3}}, {L"\U00001100\U0000200d", {4352}, {2}}, {L"\U00001100\U00000308\U0000200d", {4352}, {3}}, {L"\U00001100\U00000378", {4352, 888}, {1, 2}}, @@ -1013,8 +1724,8 @@ std::array, 602> data_utf16 = {{ {L"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {2, 4}}, {L"\U00001160\U00000600", {4448, 1536}, {1, 2}}, {L"\U00001160\U00000308\U00000600", {4448, 1536}, {2, 3}}, - {L"\U00001160\U00000903", {4448}, {2}}, - {L"\U00001160\U00000308\U00000903", {4448}, {3}}, + {L"\U00001160\U00000a03", {4448}, {2}}, + {L"\U00001160\U00000308\U00000a03", {4448}, {3}}, {L"\U00001160\U00001100", {4448, 4352}, {1, 2}}, {L"\U00001160\U00000308\U00001100", {4448, 4352}, {2, 3}}, {L"\U00001160\U00001160", {4448}, {2}}, @@ -1025,10 +1736,24 @@ std::array, 602> data_utf16 = {{ {L"\U00001160\U00000308\U0000ac00", {4448, 44032}, {2, 3}}, {L"\U00001160\U0000ac01", {4448, 44033}, {1, 2}}, {L"\U00001160\U00000308\U0000ac01", {4448, 44033}, {2, 3}}, + {L"\U00001160\U00000900", {4448}, {2}}, + {L"\U00001160\U00000308\U00000900", {4448}, {3}}, + {L"\U00001160\U00000903", {4448}, {2}}, + {L"\U00001160\U00000308\U00000903", {4448}, {3}}, + {L"\U00001160\U00000904", {4448, 2308}, {1, 2}}, + {L"\U00001160\U00000308\U00000904", {4448, 2308}, {2, 3}}, + {L"\U00001160\U00000d4e", {4448, 3406}, {1, 2}}, + {L"\U00001160\U00000308\U00000d4e", {4448, 3406}, {2, 3}}, + {L"\U00001160\U00000915", {4448, 2325}, {1, 2}}, + {L"\U00001160\U00000308\U00000915", {4448, 2325}, {2, 3}}, {L"\U00001160\U0000231a", {4448, 8986}, {1, 2}}, {L"\U00001160\U00000308\U0000231a", {4448, 8986}, {2, 3}}, {L"\U00001160\U00000300", {4448}, {2}}, {L"\U00001160\U00000308\U00000300", {4448}, {3}}, + {L"\U00001160\U0000093c", {4448}, {2}}, + {L"\U00001160\U00000308\U0000093c", {4448}, {3}}, + {L"\U00001160\U0000094d", {4448}, {2}}, + {L"\U00001160\U00000308\U0000094d", {4448}, {3}}, {L"\U00001160\U0000200d", {4448}, {2}}, {L"\U00001160\U00000308\U0000200d", {4448}, {3}}, {L"\U00001160\U00000378", {4448, 888}, {1, 2}}, @@ -1047,8 +1772,8 @@ std::array, 602> data_utf16 = {{ {L"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {2, 4}}, {L"\U000011a8\U00000600", {4520, 1536}, {1, 2}}, {L"\U000011a8\U00000308\U00000600", {4520, 1536}, {2, 3}}, - {L"\U000011a8\U00000903", {4520}, {2}}, - {L"\U000011a8\U00000308\U00000903", {4520}, {3}}, + {L"\U000011a8\U00000a03", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000a03", {4520}, {3}}, {L"\U000011a8\U00001100", {4520, 4352}, {1, 2}}, {L"\U000011a8\U00000308\U00001100", {4520, 4352}, {2, 3}}, {L"\U000011a8\U00001160", {4520, 4448}, {1, 2}}, @@ -1059,10 +1784,24 @@ std::array, 602> data_utf16 = {{ {L"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {2, 3}}, {L"\U000011a8\U0000ac01", {4520, 44033}, {1, 2}}, {L"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {2, 3}}, + {L"\U000011a8\U00000900", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000900", {4520}, {3}}, + {L"\U000011a8\U00000903", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000903", {4520}, {3}}, + {L"\U000011a8\U00000904", {4520, 2308}, {1, 2}}, + {L"\U000011a8\U00000308\U00000904", {4520, 2308}, {2, 3}}, + {L"\U000011a8\U00000d4e", {4520, 3406}, {1, 2}}, + {L"\U000011a8\U00000308\U00000d4e", {4520, 3406}, {2, 3}}, + {L"\U000011a8\U00000915", {4520, 2325}, {1, 2}}, + {L"\U000011a8\U00000308\U00000915", {4520, 2325}, {2, 3}}, {L"\U000011a8\U0000231a", {4520, 8986}, {1, 2}}, {L"\U000011a8\U00000308\U0000231a", {4520, 8986}, {2, 3}}, {L"\U000011a8\U00000300", {4520}, {2}}, {L"\U000011a8\U00000308\U00000300", {4520}, {3}}, + {L"\U000011a8\U0000093c", {4520}, {2}}, + {L"\U000011a8\U00000308\U0000093c", {4520}, {3}}, + {L"\U000011a8\U0000094d", {4520}, {2}}, + {L"\U000011a8\U00000308\U0000094d", {4520}, {3}}, {L"\U000011a8\U0000200d", {4520}, {2}}, {L"\U000011a8\U00000308\U0000200d", {4520}, {3}}, {L"\U000011a8\U00000378", {4520, 888}, {1, 2}}, @@ -1081,8 +1820,8 @@ std::array, 602> data_utf16 = {{ {L"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {2, 4}}, {L"\U0000ac00\U00000600", {44032, 1536}, {1, 2}}, {L"\U0000ac00\U00000308\U00000600", {44032, 1536}, {2, 3}}, - {L"\U0000ac00\U00000903", {44032}, {2}}, - {L"\U0000ac00\U00000308\U00000903", {44032}, {3}}, + {L"\U0000ac00\U00000a03", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000a03", {44032}, {3}}, {L"\U0000ac00\U00001100", {44032, 4352}, {1, 2}}, {L"\U0000ac00\U00000308\U00001100", {44032, 4352}, {2, 3}}, {L"\U0000ac00\U00001160", {44032}, {2}}, @@ -1093,10 +1832,24 @@ std::array, 602> data_utf16 = {{ {L"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {2, 3}}, {L"\U0000ac00\U0000ac01", {44032, 44033}, {1, 2}}, {L"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {2, 3}}, + {L"\U0000ac00\U00000900", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000900", {44032}, {3}}, + {L"\U0000ac00\U00000903", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000903", {44032}, {3}}, + {L"\U0000ac00\U00000904", {44032, 2308}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000904", {44032, 2308}, {2, 3}}, + {L"\U0000ac00\U00000d4e", {44032, 3406}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000d4e", {44032, 3406}, {2, 3}}, + {L"\U0000ac00\U00000915", {44032, 2325}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000915", {44032, 2325}, {2, 3}}, {L"\U0000ac00\U0000231a", {44032, 8986}, {1, 2}}, {L"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {2, 3}}, {L"\U0000ac00\U00000300", {44032}, {2}}, {L"\U0000ac00\U00000308\U00000300", {44032}, {3}}, + {L"\U0000ac00\U0000093c", {44032}, {2}}, + {L"\U0000ac00\U00000308\U0000093c", {44032}, {3}}, + {L"\U0000ac00\U0000094d", {44032}, {2}}, + {L"\U0000ac00\U00000308\U0000094d", {44032}, {3}}, {L"\U0000ac00\U0000200d", {44032}, {2}}, {L"\U0000ac00\U00000308\U0000200d", {44032}, {3}}, {L"\U0000ac00\U00000378", {44032, 888}, {1, 2}}, @@ -1115,8 +1868,8 @@ std::array, 602> data_utf16 = {{ {L"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {2, 4}}, {L"\U0000ac01\U00000600", {44033, 1536}, {1, 2}}, {L"\U0000ac01\U00000308\U00000600", {44033, 1536}, {2, 3}}, - {L"\U0000ac01\U00000903", {44033}, {2}}, - {L"\U0000ac01\U00000308\U00000903", {44033}, {3}}, + {L"\U0000ac01\U00000a03", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000a03", {44033}, {3}}, {L"\U0000ac01\U00001100", {44033, 4352}, {1, 2}}, {L"\U0000ac01\U00000308\U00001100", {44033, 4352}, {2, 3}}, {L"\U0000ac01\U00001160", {44033, 4448}, {1, 2}}, @@ -1127,14 +1880,268 @@ std::array, 602> data_utf16 = {{ {L"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {2, 3}}, {L"\U0000ac01\U0000ac01", {44033, 44033}, {1, 2}}, {L"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {2, 3}}, + {L"\U0000ac01\U00000900", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000900", {44033}, {3}}, + {L"\U0000ac01\U00000903", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000903", {44033}, {3}}, + {L"\U0000ac01\U00000904", {44033, 2308}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000904", {44033, 2308}, {2, 3}}, + {L"\U0000ac01\U00000d4e", {44033, 3406}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000d4e", {44033, 3406}, {2, 3}}, + {L"\U0000ac01\U00000915", {44033, 2325}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000915", {44033, 2325}, {2, 3}}, {L"\U0000ac01\U0000231a", {44033, 8986}, {1, 2}}, {L"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {2, 3}}, {L"\U0000ac01\U00000300", {44033}, {2}}, {L"\U0000ac01\U00000308\U00000300", {44033}, {3}}, + {L"\U0000ac01\U0000093c", {44033}, {2}}, + {L"\U0000ac01\U00000308\U0000093c", {44033}, {3}}, + {L"\U0000ac01\U0000094d", {44033}, {2}}, + {L"\U0000ac01\U00000308\U0000094d", {44033}, {3}}, {L"\U0000ac01\U0000200d", {44033}, {2}}, {L"\U0000ac01\U00000308\U0000200d", {44033}, {3}}, {L"\U0000ac01\U00000378", {44033, 888}, {1, 2}}, {L"\U0000ac01\U00000308\U00000378", {44033, 888}, {2, 3}}, + {L"\U00000900\U00000020", {2304, 32}, {1, 2}}, + {L"\U00000900\U00000308\U00000020", {2304, 32}, {2, 3}}, + {L"\U00000900\U0000000d", {2304, 13}, {1, 2}}, + {L"\U00000900\U00000308\U0000000d", {2304, 13}, {2, 3}}, + {L"\U00000900\U0000000a", {2304, 10}, {1, 2}}, + {L"\U00000900\U00000308\U0000000a", {2304, 10}, {2, 3}}, + {L"\U00000900\U00000001", {2304, 1}, {1, 2}}, + {L"\U00000900\U00000308\U00000001", {2304, 1}, {2, 3}}, + {L"\U00000900\U0000034f", {2304}, {2}}, + {L"\U00000900\U00000308\U0000034f", {2304}, {3}}, + {L"\U00000900\U0001f1e6", {2304, 127462}, {1, 3}}, + {L"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {2, 4}}, + {L"\U00000900\U00000600", {2304, 1536}, {1, 2}}, + {L"\U00000900\U00000308\U00000600", {2304, 1536}, {2, 3}}, + {L"\U00000900\U00000a03", {2304}, {2}}, + {L"\U00000900\U00000308\U00000a03", {2304}, {3}}, + {L"\U00000900\U00001100", {2304, 4352}, {1, 2}}, + {L"\U00000900\U00000308\U00001100", {2304, 4352}, {2, 3}}, + {L"\U00000900\U00001160", {2304, 4448}, {1, 2}}, + {L"\U00000900\U00000308\U00001160", {2304, 4448}, {2, 3}}, + {L"\U00000900\U000011a8", {2304, 4520}, {1, 2}}, + {L"\U00000900\U00000308\U000011a8", {2304, 4520}, {2, 3}}, + {L"\U00000900\U0000ac00", {2304, 44032}, {1, 2}}, + {L"\U00000900\U00000308\U0000ac00", {2304, 44032}, {2, 3}}, + {L"\U00000900\U0000ac01", {2304, 44033}, {1, 2}}, + {L"\U00000900\U00000308\U0000ac01", {2304, 44033}, {2, 3}}, + {L"\U00000900\U00000900", {2304}, {2}}, + {L"\U00000900\U00000308\U00000900", {2304}, {3}}, + {L"\U00000900\U00000903", {2304}, {2}}, + {L"\U00000900\U00000308\U00000903", {2304}, {3}}, + {L"\U00000900\U00000904", {2304, 2308}, {1, 2}}, + {L"\U00000900\U00000308\U00000904", {2304, 2308}, {2, 3}}, + {L"\U00000900\U00000d4e", {2304, 3406}, {1, 2}}, + {L"\U00000900\U00000308\U00000d4e", {2304, 3406}, {2, 3}}, + {L"\U00000900\U00000915", {2304, 2325}, {1, 2}}, + {L"\U00000900\U00000308\U00000915", {2304, 2325}, {2, 3}}, + {L"\U00000900\U0000231a", {2304, 8986}, {1, 2}}, + {L"\U00000900\U00000308\U0000231a", {2304, 8986}, {2, 3}}, + {L"\U00000900\U00000300", {2304}, {2}}, + {L"\U00000900\U00000308\U00000300", {2304}, {3}}, + {L"\U00000900\U0000093c", {2304}, {2}}, + {L"\U00000900\U00000308\U0000093c", {2304}, {3}}, + {L"\U00000900\U0000094d", {2304}, {2}}, + {L"\U00000900\U00000308\U0000094d", {2304}, {3}}, + {L"\U00000900\U0000200d", {2304}, {2}}, + {L"\U00000900\U00000308\U0000200d", {2304}, {3}}, + {L"\U00000900\U00000378", {2304, 888}, {1, 2}}, + {L"\U00000900\U00000308\U00000378", {2304, 888}, {2, 3}}, + {L"\U00000903\U00000020", {2307, 32}, {1, 2}}, + {L"\U00000903\U00000308\U00000020", {2307, 32}, {2, 3}}, + {L"\U00000903\U0000000d", {2307, 13}, {1, 2}}, + {L"\U00000903\U00000308\U0000000d", {2307, 13}, {2, 3}}, + {L"\U00000903\U0000000a", {2307, 10}, {1, 2}}, + {L"\U00000903\U00000308\U0000000a", {2307, 10}, {2, 3}}, + {L"\U00000903\U00000001", {2307, 1}, {1, 2}}, + {L"\U00000903\U00000308\U00000001", {2307, 1}, {2, 3}}, + {L"\U00000903\U0000034f", {2307}, {2}}, + {L"\U00000903\U00000308\U0000034f", {2307}, {3}}, + {L"\U00000903\U0001f1e6", {2307, 127462}, {1, 3}}, + {L"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {2, 4}}, + {L"\U00000903\U00000600", {2307, 1536}, {1, 2}}, + {L"\U00000903\U00000308\U00000600", {2307, 1536}, {2, 3}}, + {L"\U00000903\U00000a03", {2307}, {2}}, + {L"\U00000903\U00000308\U00000a03", {2307}, {3}}, + {L"\U00000903\U00001100", {2307, 4352}, {1, 2}}, + {L"\U00000903\U00000308\U00001100", {2307, 4352}, {2, 3}}, + {L"\U00000903\U00001160", {2307, 4448}, {1, 2}}, + {L"\U00000903\U00000308\U00001160", {2307, 4448}, {2, 3}}, + {L"\U00000903\U000011a8", {2307, 4520}, {1, 2}}, + {L"\U00000903\U00000308\U000011a8", {2307, 4520}, {2, 3}}, + {L"\U00000903\U0000ac00", {2307, 44032}, {1, 2}}, + {L"\U00000903\U00000308\U0000ac00", {2307, 44032}, {2, 3}}, + {L"\U00000903\U0000ac01", {2307, 44033}, {1, 2}}, + {L"\U00000903\U00000308\U0000ac01", {2307, 44033}, {2, 3}}, + {L"\U00000903\U00000900", {2307}, {2}}, + {L"\U00000903\U00000308\U00000900", {2307}, {3}}, + {L"\U00000903\U00000903", {2307}, {2}}, + {L"\U00000903\U00000308\U00000903", {2307}, {3}}, + {L"\U00000903\U00000904", {2307, 2308}, {1, 2}}, + {L"\U00000903\U00000308\U00000904", {2307, 2308}, {2, 3}}, + {L"\U00000903\U00000d4e", {2307, 3406}, {1, 2}}, + {L"\U00000903\U00000308\U00000d4e", {2307, 3406}, {2, 3}}, + {L"\U00000903\U00000915", {2307, 2325}, {1, 2}}, + {L"\U00000903\U00000308\U00000915", {2307, 2325}, {2, 3}}, + {L"\U00000903\U0000231a", {2307, 8986}, {1, 2}}, + {L"\U00000903\U00000308\U0000231a", {2307, 8986}, {2, 3}}, + {L"\U00000903\U00000300", {2307}, {2}}, + {L"\U00000903\U00000308\U00000300", {2307}, {3}}, + {L"\U00000903\U0000093c", {2307}, {2}}, + {L"\U00000903\U00000308\U0000093c", {2307}, {3}}, + {L"\U00000903\U0000094d", {2307}, {2}}, + {L"\U00000903\U00000308\U0000094d", {2307}, {3}}, + {L"\U00000903\U0000200d", {2307}, {2}}, + {L"\U00000903\U00000308\U0000200d", {2307}, {3}}, + {L"\U00000903\U00000378", {2307, 888}, {1, 2}}, + {L"\U00000903\U00000308\U00000378", {2307, 888}, {2, 3}}, + {L"\U00000904\U00000020", {2308, 32}, {1, 2}}, + {L"\U00000904\U00000308\U00000020", {2308, 32}, {2, 3}}, + {L"\U00000904\U0000000d", {2308, 13}, {1, 2}}, + {L"\U00000904\U00000308\U0000000d", {2308, 13}, {2, 3}}, + {L"\U00000904\U0000000a", {2308, 10}, {1, 2}}, + {L"\U00000904\U00000308\U0000000a", {2308, 10}, {2, 3}}, + {L"\U00000904\U00000001", {2308, 1}, {1, 2}}, + {L"\U00000904\U00000308\U00000001", {2308, 1}, {2, 3}}, + {L"\U00000904\U0000034f", {2308}, {2}}, + {L"\U00000904\U00000308\U0000034f", {2308}, {3}}, + {L"\U00000904\U0001f1e6", {2308, 127462}, {1, 3}}, + {L"\U00000904\U00000308\U0001f1e6", {2308, 127462}, {2, 4}}, + {L"\U00000904\U00000600", {2308, 1536}, {1, 2}}, + {L"\U00000904\U00000308\U00000600", {2308, 1536}, {2, 3}}, + {L"\U00000904\U00000a03", {2308}, {2}}, + {L"\U00000904\U00000308\U00000a03", {2308}, {3}}, + {L"\U00000904\U00001100", {2308, 4352}, {1, 2}}, + {L"\U00000904\U00000308\U00001100", {2308, 4352}, {2, 3}}, + {L"\U00000904\U00001160", {2308, 4448}, {1, 2}}, + {L"\U00000904\U00000308\U00001160", {2308, 4448}, {2, 3}}, + {L"\U00000904\U000011a8", {2308, 4520}, {1, 2}}, + {L"\U00000904\U00000308\U000011a8", {2308, 4520}, {2, 3}}, + {L"\U00000904\U0000ac00", {2308, 44032}, {1, 2}}, + {L"\U00000904\U00000308\U0000ac00", {2308, 44032}, {2, 3}}, + {L"\U00000904\U0000ac01", {2308, 44033}, {1, 2}}, + {L"\U00000904\U00000308\U0000ac01", {2308, 44033}, {2, 3}}, + {L"\U00000904\U00000900", {2308}, {2}}, + {L"\U00000904\U00000308\U00000900", {2308}, {3}}, + {L"\U00000904\U00000903", {2308}, {2}}, + {L"\U00000904\U00000308\U00000903", {2308}, {3}}, + {L"\U00000904\U00000904", {2308, 2308}, {1, 2}}, + {L"\U00000904\U00000308\U00000904", {2308, 2308}, {2, 3}}, + {L"\U00000904\U00000d4e", {2308, 3406}, {1, 2}}, + {L"\U00000904\U00000308\U00000d4e", {2308, 3406}, {2, 3}}, + {L"\U00000904\U00000915", {2308, 2325}, {1, 2}}, + {L"\U00000904\U00000308\U00000915", {2308, 2325}, {2, 3}}, + {L"\U00000904\U0000231a", {2308, 8986}, {1, 2}}, + {L"\U00000904\U00000308\U0000231a", {2308, 8986}, {2, 3}}, + {L"\U00000904\U00000300", {2308}, {2}}, + {L"\U00000904\U00000308\U00000300", {2308}, {3}}, + {L"\U00000904\U0000093c", {2308}, {2}}, + {L"\U00000904\U00000308\U0000093c", {2308}, {3}}, + {L"\U00000904\U0000094d", {2308}, {2}}, + {L"\U00000904\U00000308\U0000094d", {2308}, {3}}, + {L"\U00000904\U0000200d", {2308}, {2}}, + {L"\U00000904\U00000308\U0000200d", {2308}, {3}}, + {L"\U00000904\U00000378", {2308, 888}, {1, 2}}, + {L"\U00000904\U00000308\U00000378", {2308, 888}, {2, 3}}, + {L"\U00000d4e\U00000020", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000020", {3406, 32}, {2, 3}}, + {L"\U00000d4e\U0000000d", {3406, 13}, {1, 2}}, + {L"\U00000d4e\U00000308\U0000000d", {3406, 13}, {2, 3}}, + {L"\U00000d4e\U0000000a", {3406, 10}, {1, 2}}, + {L"\U00000d4e\U00000308\U0000000a", {3406, 10}, {2, 3}}, + {L"\U00000d4e\U00000001", {3406, 1}, {1, 2}}, + {L"\U00000d4e\U00000308\U00000001", {3406, 1}, {2, 3}}, + {L"\U00000d4e\U0000034f", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000034f", {3406}, {3}}, + {L"\U00000d4e\U0001f1e6", {3406}, {3}}, + {L"\U00000d4e\U00000308\U0001f1e6", {3406, 127462}, {2, 4}}, + {L"\U00000d4e\U00000600", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000600", {3406, 1536}, {2, 3}}, + {L"\U00000d4e\U00000a03", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000a03", {3406}, {3}}, + {L"\U00000d4e\U00001100", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00001100", {3406, 4352}, {2, 3}}, + {L"\U00000d4e\U00001160", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00001160", {3406, 4448}, {2, 3}}, + {L"\U00000d4e\U000011a8", {3406}, {2}}, + {L"\U00000d4e\U00000308\U000011a8", {3406, 4520}, {2, 3}}, + {L"\U00000d4e\U0000ac00", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000ac00", {3406, 44032}, {2, 3}}, + {L"\U00000d4e\U0000ac01", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000ac01", {3406, 44033}, {2, 3}}, + {L"\U00000d4e\U00000900", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000900", {3406}, {3}}, + {L"\U00000d4e\U00000903", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000903", {3406}, {3}}, + {L"\U00000d4e\U00000904", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000904", {3406, 2308}, {2, 3}}, + {L"\U00000d4e\U00000d4e", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000d4e", {3406, 3406}, {2, 3}}, + {L"\U00000d4e\U00000915", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000915", {3406, 2325}, {2, 3}}, + {L"\U00000d4e\U0000231a", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000231a", {3406, 8986}, {2, 3}}, + {L"\U00000d4e\U00000300", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000300", {3406}, {3}}, + {L"\U00000d4e\U0000093c", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000093c", {3406}, {3}}, + {L"\U00000d4e\U0000094d", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000094d", {3406}, {3}}, + {L"\U00000d4e\U0000200d", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000200d", {3406}, {3}}, + {L"\U00000d4e\U00000378", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000378", {3406, 888}, {2, 3}}, + {L"\U00000915\U00000020", {2325, 32}, {1, 2}}, + {L"\U00000915\U00000308\U00000020", {2325, 32}, {2, 3}}, + {L"\U00000915\U0000000d", {2325, 13}, {1, 2}}, + {L"\U00000915\U00000308\U0000000d", {2325, 13}, {2, 3}}, + {L"\U00000915\U0000000a", {2325, 10}, {1, 2}}, + {L"\U00000915\U00000308\U0000000a", {2325, 10}, {2, 3}}, + {L"\U00000915\U00000001", {2325, 1}, {1, 2}}, + {L"\U00000915\U00000308\U00000001", {2325, 1}, {2, 3}}, + {L"\U00000915\U0000034f", {2325}, {2}}, + {L"\U00000915\U00000308\U0000034f", {2325}, {3}}, + {L"\U00000915\U0001f1e6", {2325, 127462}, {1, 3}}, + {L"\U00000915\U00000308\U0001f1e6", {2325, 127462}, {2, 4}}, + {L"\U00000915\U00000600", {2325, 1536}, {1, 2}}, + {L"\U00000915\U00000308\U00000600", {2325, 1536}, {2, 3}}, + {L"\U00000915\U00000a03", {2325}, {2}}, + {L"\U00000915\U00000308\U00000a03", {2325}, {3}}, + {L"\U00000915\U00001100", {2325, 4352}, {1, 2}}, + {L"\U00000915\U00000308\U00001100", {2325, 4352}, {2, 3}}, + {L"\U00000915\U00001160", {2325, 4448}, {1, 2}}, + {L"\U00000915\U00000308\U00001160", {2325, 4448}, {2, 3}}, + {L"\U00000915\U000011a8", {2325, 4520}, {1, 2}}, + {L"\U00000915\U00000308\U000011a8", {2325, 4520}, {2, 3}}, + {L"\U00000915\U0000ac00", {2325, 44032}, {1, 2}}, + {L"\U00000915\U00000308\U0000ac00", {2325, 44032}, {2, 3}}, + {L"\U00000915\U0000ac01", {2325, 44033}, {1, 2}}, + {L"\U00000915\U00000308\U0000ac01", {2325, 44033}, {2, 3}}, + {L"\U00000915\U00000900", {2325}, {2}}, + {L"\U00000915\U00000308\U00000900", {2325}, {3}}, + {L"\U00000915\U00000903", {2325}, {2}}, + {L"\U00000915\U00000308\U00000903", {2325}, {3}}, + {L"\U00000915\U00000904", {2325, 2308}, {1, 2}}, + {L"\U00000915\U00000308\U00000904", {2325, 2308}, {2, 3}}, + {L"\U00000915\U00000d4e", {2325, 3406}, {1, 2}}, + {L"\U00000915\U00000308\U00000d4e", {2325, 3406}, {2, 3}}, + {L"\U00000915\U00000915", {2325, 2325}, {1, 2}}, + {L"\U00000915\U00000308\U00000915", {2325, 2325}, {2, 3}}, + {L"\U00000915\U0000231a", {2325, 8986}, {1, 2}}, + {L"\U00000915\U00000308\U0000231a", {2325, 8986}, {2, 3}}, + {L"\U00000915\U00000300", {2325}, {2}}, + {L"\U00000915\U00000308\U00000300", {2325}, {3}}, + {L"\U00000915\U0000093c", {2325}, {2}}, + {L"\U00000915\U00000308\U0000093c", {2325}, {3}}, + {L"\U00000915\U0000094d", {2325}, {2}}, + {L"\U00000915\U00000308\U0000094d", {2325}, {3}}, + {L"\U00000915\U0000200d", {2325}, {2}}, + {L"\U00000915\U00000308\U0000200d", {2325}, {3}}, + {L"\U00000915\U00000378", {2325, 888}, {1, 2}}, + {L"\U00000915\U00000308\U00000378", {2325, 888}, {2, 3}}, {L"\U0000231a\U00000020", {8986, 32}, {1, 2}}, {L"\U0000231a\U00000308\U00000020", {8986, 32}, {2, 3}}, {L"\U0000231a\U0000000d", {8986, 13}, {1, 2}}, @@ -1149,8 +2156,8 @@ std::array, 602> data_utf16 = {{ {L"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {2, 4}}, {L"\U0000231a\U00000600", {8986, 1536}, {1, 2}}, {L"\U0000231a\U00000308\U00000600", {8986, 1536}, {2, 3}}, - {L"\U0000231a\U00000903", {8986}, {2}}, - {L"\U0000231a\U00000308\U00000903", {8986}, {3}}, + {L"\U0000231a\U00000a03", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000a03", {8986}, {3}}, {L"\U0000231a\U00001100", {8986, 4352}, {1, 2}}, {L"\U0000231a\U00000308\U00001100", {8986, 4352}, {2, 3}}, {L"\U0000231a\U00001160", {8986, 4448}, {1, 2}}, @@ -1161,10 +2168,24 @@ std::array, 602> data_utf16 = {{ {L"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {2, 3}}, {L"\U0000231a\U0000ac01", {8986, 44033}, {1, 2}}, {L"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {2, 3}}, + {L"\U0000231a\U00000900", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000900", {8986}, {3}}, + {L"\U0000231a\U00000903", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000903", {8986}, {3}}, + {L"\U0000231a\U00000904", {8986, 2308}, {1, 2}}, + {L"\U0000231a\U00000308\U00000904", {8986, 2308}, {2, 3}}, + {L"\U0000231a\U00000d4e", {8986, 3406}, {1, 2}}, + {L"\U0000231a\U00000308\U00000d4e", {8986, 3406}, {2, 3}}, + {L"\U0000231a\U00000915", {8986, 2325}, {1, 2}}, + {L"\U0000231a\U00000308\U00000915", {8986, 2325}, {2, 3}}, {L"\U0000231a\U0000231a", {8986, 8986}, {1, 2}}, {L"\U0000231a\U00000308\U0000231a", {8986, 8986}, {2, 3}}, {L"\U0000231a\U00000300", {8986}, {2}}, {L"\U0000231a\U00000308\U00000300", {8986}, {3}}, + {L"\U0000231a\U0000093c", {8986}, {2}}, + {L"\U0000231a\U00000308\U0000093c", {8986}, {3}}, + {L"\U0000231a\U0000094d", {8986}, {2}}, + {L"\U0000231a\U00000308\U0000094d", {8986}, {3}}, {L"\U0000231a\U0000200d", {8986}, {2}}, {L"\U0000231a\U00000308\U0000200d", {8986}, {3}}, {L"\U0000231a\U00000378", {8986, 888}, {1, 2}}, @@ -1183,8 +2204,8 @@ std::array, 602> data_utf16 = {{ {L"\U00000300\U00000308\U0001f1e6", {768, 127462}, {2, 4}}, {L"\U00000300\U00000600", {768, 1536}, {1, 2}}, {L"\U00000300\U00000308\U00000600", {768, 1536}, {2, 3}}, - {L"\U00000300\U00000903", {768}, {2}}, - {L"\U00000300\U00000308\U00000903", {768}, {3}}, + {L"\U00000300\U00000a03", {768}, {2}}, + {L"\U00000300\U00000308\U00000a03", {768}, {3}}, {L"\U00000300\U00001100", {768, 4352}, {1, 2}}, {L"\U00000300\U00000308\U00001100", {768, 4352}, {2, 3}}, {L"\U00000300\U00001160", {768, 4448}, {1, 2}}, @@ -1195,14 +2216,124 @@ std::array, 602> data_utf16 = {{ {L"\U00000300\U00000308\U0000ac00", {768, 44032}, {2, 3}}, {L"\U00000300\U0000ac01", {768, 44033}, {1, 2}}, {L"\U00000300\U00000308\U0000ac01", {768, 44033}, {2, 3}}, + {L"\U00000300\U00000900", {768}, {2}}, + {L"\U00000300\U00000308\U00000900", {768}, {3}}, + {L"\U00000300\U00000903", {768}, {2}}, + {L"\U00000300\U00000308\U00000903", {768}, {3}}, + {L"\U00000300\U00000904", {768, 2308}, {1, 2}}, + {L"\U00000300\U00000308\U00000904", {768, 2308}, {2, 3}}, + {L"\U00000300\U00000d4e", {768, 3406}, {1, 2}}, + {L"\U00000300\U00000308\U00000d4e", {768, 3406}, {2, 3}}, + {L"\U00000300\U00000915", {768, 2325}, {1, 2}}, + {L"\U00000300\U00000308\U00000915", {768, 2325}, {2, 3}}, {L"\U00000300\U0000231a", {768, 8986}, {1, 2}}, {L"\U00000300\U00000308\U0000231a", {768, 8986}, {2, 3}}, {L"\U00000300\U00000300", {768}, {2}}, {L"\U00000300\U00000308\U00000300", {768}, {3}}, + {L"\U00000300\U0000093c", {768}, {2}}, + {L"\U00000300\U00000308\U0000093c", {768}, {3}}, + {L"\U00000300\U0000094d", {768}, {2}}, + {L"\U00000300\U00000308\U0000094d", {768}, {3}}, {L"\U00000300\U0000200d", {768}, {2}}, {L"\U00000300\U00000308\U0000200d", {768}, {3}}, {L"\U00000300\U00000378", {768, 888}, {1, 2}}, {L"\U00000300\U00000308\U00000378", {768, 888}, {2, 3}}, + {L"\U0000093c\U00000020", {2364, 32}, {1, 2}}, + {L"\U0000093c\U00000308\U00000020", {2364, 32}, {2, 3}}, + {L"\U0000093c\U0000000d", {2364, 13}, {1, 2}}, + {L"\U0000093c\U00000308\U0000000d", {2364, 13}, {2, 3}}, + {L"\U0000093c\U0000000a", {2364, 10}, {1, 2}}, + {L"\U0000093c\U00000308\U0000000a", {2364, 10}, {2, 3}}, + {L"\U0000093c\U00000001", {2364, 1}, {1, 2}}, + {L"\U0000093c\U00000308\U00000001", {2364, 1}, {2, 3}}, + {L"\U0000093c\U0000034f", {2364}, {2}}, + {L"\U0000093c\U00000308\U0000034f", {2364}, {3}}, + {L"\U0000093c\U0001f1e6", {2364, 127462}, {1, 3}}, + {L"\U0000093c\U00000308\U0001f1e6", {2364, 127462}, {2, 4}}, + {L"\U0000093c\U00000600", {2364, 1536}, {1, 2}}, + {L"\U0000093c\U00000308\U00000600", {2364, 1536}, {2, 3}}, + {L"\U0000093c\U00000a03", {2364}, {2}}, + {L"\U0000093c\U00000308\U00000a03", {2364}, {3}}, + {L"\U0000093c\U00001100", {2364, 4352}, {1, 2}}, + {L"\U0000093c\U00000308\U00001100", {2364, 4352}, {2, 3}}, + {L"\U0000093c\U00001160", {2364, 4448}, {1, 2}}, + {L"\U0000093c\U00000308\U00001160", {2364, 4448}, {2, 3}}, + {L"\U0000093c\U000011a8", {2364, 4520}, {1, 2}}, + {L"\U0000093c\U00000308\U000011a8", {2364, 4520}, {2, 3}}, + {L"\U0000093c\U0000ac00", {2364, 44032}, {1, 2}}, + {L"\U0000093c\U00000308\U0000ac00", {2364, 44032}, {2, 3}}, + {L"\U0000093c\U0000ac01", {2364, 44033}, {1, 2}}, + {L"\U0000093c\U00000308\U0000ac01", {2364, 44033}, {2, 3}}, + {L"\U0000093c\U00000900", {2364}, {2}}, + {L"\U0000093c\U00000308\U00000900", {2364}, {3}}, + {L"\U0000093c\U00000903", {2364}, {2}}, + {L"\U0000093c\U00000308\U00000903", {2364}, {3}}, + {L"\U0000093c\U00000904", {2364, 2308}, {1, 2}}, + {L"\U0000093c\U00000308\U00000904", {2364, 2308}, {2, 3}}, + {L"\U0000093c\U00000d4e", {2364, 3406}, {1, 2}}, + {L"\U0000093c\U00000308\U00000d4e", {2364, 3406}, {2, 3}}, + {L"\U0000093c\U00000915", {2364, 2325}, {1, 2}}, + {L"\U0000093c\U00000308\U00000915", {2364, 2325}, {2, 3}}, + {L"\U0000093c\U0000231a", {2364, 8986}, {1, 2}}, + {L"\U0000093c\U00000308\U0000231a", {2364, 8986}, {2, 3}}, + {L"\U0000093c\U00000300", {2364}, {2}}, + {L"\U0000093c\U00000308\U00000300", {2364}, {3}}, + {L"\U0000093c\U0000093c", {2364}, {2}}, + {L"\U0000093c\U00000308\U0000093c", {2364}, {3}}, + {L"\U0000093c\U0000094d", {2364}, {2}}, + {L"\U0000093c\U00000308\U0000094d", {2364}, {3}}, + {L"\U0000093c\U0000200d", {2364}, {2}}, + {L"\U0000093c\U00000308\U0000200d", {2364}, {3}}, + {L"\U0000093c\U00000378", {2364, 888}, {1, 2}}, + {L"\U0000093c\U00000308\U00000378", {2364, 888}, {2, 3}}, + {L"\U0000094d\U00000020", {2381, 32}, {1, 2}}, + {L"\U0000094d\U00000308\U00000020", {2381, 32}, {2, 3}}, + {L"\U0000094d\U0000000d", {2381, 13}, {1, 2}}, + {L"\U0000094d\U00000308\U0000000d", {2381, 13}, {2, 3}}, + {L"\U0000094d\U0000000a", {2381, 10}, {1, 2}}, + {L"\U0000094d\U00000308\U0000000a", {2381, 10}, {2, 3}}, + {L"\U0000094d\U00000001", {2381, 1}, {1, 2}}, + {L"\U0000094d\U00000308\U00000001", {2381, 1}, {2, 3}}, + {L"\U0000094d\U0000034f", {2381}, {2}}, + {L"\U0000094d\U00000308\U0000034f", {2381}, {3}}, + {L"\U0000094d\U0001f1e6", {2381, 127462}, {1, 3}}, + {L"\U0000094d\U00000308\U0001f1e6", {2381, 127462}, {2, 4}}, + {L"\U0000094d\U00000600", {2381, 1536}, {1, 2}}, + {L"\U0000094d\U00000308\U00000600", {2381, 1536}, {2, 3}}, + {L"\U0000094d\U00000a03", {2381}, {2}}, + {L"\U0000094d\U00000308\U00000a03", {2381}, {3}}, + {L"\U0000094d\U00001100", {2381, 4352}, {1, 2}}, + {L"\U0000094d\U00000308\U00001100", {2381, 4352}, {2, 3}}, + {L"\U0000094d\U00001160", {2381, 4448}, {1, 2}}, + {L"\U0000094d\U00000308\U00001160", {2381, 4448}, {2, 3}}, + {L"\U0000094d\U000011a8", {2381, 4520}, {1, 2}}, + {L"\U0000094d\U00000308\U000011a8", {2381, 4520}, {2, 3}}, + {L"\U0000094d\U0000ac00", {2381, 44032}, {1, 2}}, + {L"\U0000094d\U00000308\U0000ac00", {2381, 44032}, {2, 3}}, + {L"\U0000094d\U0000ac01", {2381, 44033}, {1, 2}}, + {L"\U0000094d\U00000308\U0000ac01", {2381, 44033}, {2, 3}}, + {L"\U0000094d\U00000900", {2381}, {2}}, + {L"\U0000094d\U00000308\U00000900", {2381}, {3}}, + {L"\U0000094d\U00000903", {2381}, {2}}, + {L"\U0000094d\U00000308\U00000903", {2381}, {3}}, + {L"\U0000094d\U00000904", {2381, 2308}, {1, 2}}, + {L"\U0000094d\U00000308\U00000904", {2381, 2308}, {2, 3}}, + {L"\U0000094d\U00000d4e", {2381, 3406}, {1, 2}}, + {L"\U0000094d\U00000308\U00000d4e", {2381, 3406}, {2, 3}}, + {L"\U0000094d\U00000915", {2381, 2325}, {1, 2}}, + {L"\U0000094d\U00000308\U00000915", {2381, 2325}, {2, 3}}, + {L"\U0000094d\U0000231a", {2381, 8986}, {1, 2}}, + {L"\U0000094d\U00000308\U0000231a", {2381, 8986}, {2, 3}}, + {L"\U0000094d\U00000300", {2381}, {2}}, + {L"\U0000094d\U00000308\U00000300", {2381}, {3}}, + {L"\U0000094d\U0000093c", {2381}, {2}}, + {L"\U0000094d\U00000308\U0000093c", {2381}, {3}}, + {L"\U0000094d\U0000094d", {2381}, {2}}, + {L"\U0000094d\U00000308\U0000094d", {2381}, {3}}, + {L"\U0000094d\U0000200d", {2381}, {2}}, + {L"\U0000094d\U00000308\U0000200d", {2381}, {3}}, + {L"\U0000094d\U00000378", {2381, 888}, {1, 2}}, + {L"\U0000094d\U00000308\U00000378", {2381, 888}, {2, 3}}, {L"\U0000200d\U00000020", {8205, 32}, {1, 2}}, {L"\U0000200d\U00000308\U00000020", {8205, 32}, {2, 3}}, {L"\U0000200d\U0000000d", {8205, 13}, {1, 2}}, @@ -1217,8 +2348,8 @@ std::array, 602> data_utf16 = {{ {L"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {2, 4}}, {L"\U0000200d\U00000600", {8205, 1536}, {1, 2}}, {L"\U0000200d\U00000308\U00000600", {8205, 1536}, {2, 3}}, - {L"\U0000200d\U00000903", {8205}, {2}}, - {L"\U0000200d\U00000308\U00000903", {8205}, {3}}, + {L"\U0000200d\U00000a03", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000a03", {8205}, {3}}, {L"\U0000200d\U00001100", {8205, 4352}, {1, 2}}, {L"\U0000200d\U00000308\U00001100", {8205, 4352}, {2, 3}}, {L"\U0000200d\U00001160", {8205, 4448}, {1, 2}}, @@ -1229,10 +2360,24 @@ std::array, 602> data_utf16 = {{ {L"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {2, 3}}, {L"\U0000200d\U0000ac01", {8205, 44033}, {1, 2}}, {L"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {2, 3}}, + {L"\U0000200d\U00000900", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000900", {8205}, {3}}, + {L"\U0000200d\U00000903", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000903", {8205}, {3}}, + {L"\U0000200d\U00000904", {8205, 2308}, {1, 2}}, + {L"\U0000200d\U00000308\U00000904", {8205, 2308}, {2, 3}}, + {L"\U0000200d\U00000d4e", {8205, 3406}, {1, 2}}, + {L"\U0000200d\U00000308\U00000d4e", {8205, 3406}, {2, 3}}, + {L"\U0000200d\U00000915", {8205, 2325}, {1, 2}}, + {L"\U0000200d\U00000308\U00000915", {8205, 2325}, {2, 3}}, {L"\U0000200d\U0000231a", {8205, 8986}, {1, 2}}, {L"\U0000200d\U00000308\U0000231a", {8205, 8986}, {2, 3}}, {L"\U0000200d\U00000300", {8205}, {2}}, {L"\U0000200d\U00000308\U00000300", {8205}, {3}}, + {L"\U0000200d\U0000093c", {8205}, {2}}, + {L"\U0000200d\U00000308\U0000093c", {8205}, {3}}, + {L"\U0000200d\U0000094d", {8205}, {2}}, + {L"\U0000200d\U00000308\U0000094d", {8205}, {3}}, {L"\U0000200d\U0000200d", {8205}, {2}}, {L"\U0000200d\U00000308\U0000200d", {8205}, {3}}, {L"\U0000200d\U00000378", {8205, 888}, {1, 2}}, @@ -1251,8 +2396,8 @@ std::array, 602> data_utf16 = {{ {L"\U00000378\U00000308\U0001f1e6", {888, 127462}, {2, 4}}, {L"\U00000378\U00000600", {888, 1536}, {1, 2}}, {L"\U00000378\U00000308\U00000600", {888, 1536}, {2, 3}}, - {L"\U00000378\U00000903", {888}, {2}}, - {L"\U00000378\U00000308\U00000903", {888}, {3}}, + {L"\U00000378\U00000a03", {888}, {2}}, + {L"\U00000378\U00000308\U00000a03", {888}, {3}}, {L"\U00000378\U00001100", {888, 4352}, {1, 2}}, {L"\U00000378\U00000308\U00001100", {888, 4352}, {2, 3}}, {L"\U00000378\U00001160", {888, 4448}, {1, 2}}, @@ -1263,10 +2408,24 @@ std::array, 602> data_utf16 = {{ {L"\U00000378\U00000308\U0000ac00", {888, 44032}, {2, 3}}, {L"\U00000378\U0000ac01", {888, 44033}, {1, 2}}, {L"\U00000378\U00000308\U0000ac01", {888, 44033}, {2, 3}}, + {L"\U00000378\U00000900", {888}, {2}}, + {L"\U00000378\U00000308\U00000900", {888}, {3}}, + {L"\U00000378\U00000903", {888}, {2}}, + {L"\U00000378\U00000308\U00000903", {888}, {3}}, + {L"\U00000378\U00000904", {888, 2308}, {1, 2}}, + {L"\U00000378\U00000308\U00000904", {888, 2308}, {2, 3}}, + {L"\U00000378\U00000d4e", {888, 3406}, {1, 2}}, + {L"\U00000378\U00000308\U00000d4e", {888, 3406}, {2, 3}}, + {L"\U00000378\U00000915", {888, 2325}, {1, 2}}, + {L"\U00000378\U00000308\U00000915", {888, 2325}, {2, 3}}, {L"\U00000378\U0000231a", {888, 8986}, {1, 2}}, {L"\U00000378\U00000308\U0000231a", {888, 8986}, {2, 3}}, {L"\U00000378\U00000300", {888}, {2}}, {L"\U00000378\U00000308\U00000300", {888}, {3}}, + {L"\U00000378\U0000093c", {888}, {2}}, + {L"\U00000378\U00000308\U0000093c", {888}, {3}}, + {L"\U00000378\U0000094d", {888}, {2}}, + {L"\U00000378\U00000308\U0000094d", {888}, {3}}, {L"\U00000378\U0000200d", {888}, {2}}, {L"\U00000378\U00000308\U0000200d", {888}, {3}}, {L"\U00000378\U00000378", {888, 888}, {1, 2}}, @@ -1294,14 +2453,25 @@ std::array, 602> data_utf16 = {{ {L"\U0001f6d1\U0000200d\U0001f6d1", {128721}, {5}}, {L"\U00000061\U0000200d\U0001f6d1", {97, 128721}, {2, 4}}, {L"\U00002701\U0000200d\U00002701", {9985}, {3}}, - {L"\U00000061\U0000200d\U00002701", {97, 9985}, {2, 3}}}}; + {L"\U00000061\U0000200d\U00002701", {97, 9985}, {2, 3}}, + {L"\U00000915\U00000924", {2325, 2340}, {1, 2}}, + {L"\U00000915\U0000094d\U00000924", {2325}, {3}}, + {L"\U00000915\U0000094d\U0000094d\U00000924", {2325}, {4}}, + {L"\U00000915\U0000094d\U0000200d\U00000924", {2325}, {4}}, + {L"\U00000915\U0000093c\U0000200d\U0000094d\U00000924", {2325}, {5}}, + {L"\U00000915\U0000093c\U0000094d\U0000200d\U00000924", {2325}, {5}}, + {L"\U00000915\U0000094d\U00000924\U0000094d\U0000092f", {2325}, {5}}, + {L"\U00000915\U0000094d\U00000061", {2325, 97}, {2, 3}}, + {L"\U00000061\U0000094d\U00000924", {97, 2340}, {2, 3}}, + {L"\U0000003f\U0000094d\U00000924", {63, 2340}, {2, 3}}, + {L"\U00000915\U0000094d\U0000094d\U00000924", {2325}, {4}}}}; /// The data for UTF-8. /// /// Note that most of the data for the UTF-16 and UTF-32 are identical. However /// since the size of the code units differ the breaks can contain different /// values. -std::array, 602> data_utf32 = {{ +std::array, 1187> data_utf32 = {{ {L"\U00000020\U00000020", {32, 32}, {1, 2}}, {L"\U00000020\U00000308\U00000020", {32, 32}, {2, 3}}, {L"\U00000020\U0000000d", {32, 13}, {1, 2}}, @@ -1316,8 +2486,8 @@ std::array, 602> data_utf32 = {{ {L"\U00000020\U00000308\U0001f1e6", {32, 127462}, {2, 3}}, {L"\U00000020\U00000600", {32, 1536}, {1, 2}}, {L"\U00000020\U00000308\U00000600", {32, 1536}, {2, 3}}, - {L"\U00000020\U00000903", {32}, {2}}, - {L"\U00000020\U00000308\U00000903", {32}, {3}}, + {L"\U00000020\U00000a03", {32}, {2}}, + {L"\U00000020\U00000308\U00000a03", {32}, {3}}, {L"\U00000020\U00001100", {32, 4352}, {1, 2}}, {L"\U00000020\U00000308\U00001100", {32, 4352}, {2, 3}}, {L"\U00000020\U00001160", {32, 4448}, {1, 2}}, @@ -1328,10 +2498,24 @@ std::array, 602> data_utf32 = {{ {L"\U00000020\U00000308\U0000ac00", {32, 44032}, {2, 3}}, {L"\U00000020\U0000ac01", {32, 44033}, {1, 2}}, {L"\U00000020\U00000308\U0000ac01", {32, 44033}, {2, 3}}, + {L"\U00000020\U00000900", {32}, {2}}, + {L"\U00000020\U00000308\U00000900", {32}, {3}}, + {L"\U00000020\U00000903", {32}, {2}}, + {L"\U00000020\U00000308\U00000903", {32}, {3}}, + {L"\U00000020\U00000904", {32, 2308}, {1, 2}}, + {L"\U00000020\U00000308\U00000904", {32, 2308}, {2, 3}}, + {L"\U00000020\U00000d4e", {32, 3406}, {1, 2}}, + {L"\U00000020\U00000308\U00000d4e", {32, 3406}, {2, 3}}, + {L"\U00000020\U00000915", {32, 2325}, {1, 2}}, + {L"\U00000020\U00000308\U00000915", {32, 2325}, {2, 3}}, {L"\U00000020\U0000231a", {32, 8986}, {1, 2}}, {L"\U00000020\U00000308\U0000231a", {32, 8986}, {2, 3}}, {L"\U00000020\U00000300", {32}, {2}}, {L"\U00000020\U00000308\U00000300", {32}, {3}}, + {L"\U00000020\U0000093c", {32}, {2}}, + {L"\U00000020\U00000308\U0000093c", {32}, {3}}, + {L"\U00000020\U0000094d", {32}, {2}}, + {L"\U00000020\U00000308\U0000094d", {32}, {3}}, {L"\U00000020\U0000200d", {32}, {2}}, {L"\U00000020\U00000308\U0000200d", {32}, {3}}, {L"\U00000020\U00000378", {32, 888}, {1, 2}}, @@ -1350,8 +2534,8 @@ std::array, 602> data_utf32 = {{ {L"\U0000000d\U00000308\U0001f1e6", {13, 776, 127462}, {1, 2, 3}}, {L"\U0000000d\U00000600", {13, 1536}, {1, 2}}, {L"\U0000000d\U00000308\U00000600", {13, 776, 1536}, {1, 2, 3}}, - {L"\U0000000d\U00000903", {13, 2307}, {1, 2}}, - {L"\U0000000d\U00000308\U00000903", {13, 776}, {1, 3}}, + {L"\U0000000d\U00000a03", {13, 2563}, {1, 2}}, + {L"\U0000000d\U00000308\U00000a03", {13, 776}, {1, 3}}, {L"\U0000000d\U00001100", {13, 4352}, {1, 2}}, {L"\U0000000d\U00000308\U00001100", {13, 776, 4352}, {1, 2, 3}}, {L"\U0000000d\U00001160", {13, 4448}, {1, 2}}, @@ -1362,10 +2546,24 @@ std::array, 602> data_utf32 = {{ {L"\U0000000d\U00000308\U0000ac00", {13, 776, 44032}, {1, 2, 3}}, {L"\U0000000d\U0000ac01", {13, 44033}, {1, 2}}, {L"\U0000000d\U00000308\U0000ac01", {13, 776, 44033}, {1, 2, 3}}, + {L"\U0000000d\U00000900", {13, 2304}, {1, 2}}, + {L"\U0000000d\U00000308\U00000900", {13, 776}, {1, 3}}, + {L"\U0000000d\U00000903", {13, 2307}, {1, 2}}, + {L"\U0000000d\U00000308\U00000903", {13, 776}, {1, 3}}, + {L"\U0000000d\U00000904", {13, 2308}, {1, 2}}, + {L"\U0000000d\U00000308\U00000904", {13, 776, 2308}, {1, 2, 3}}, + {L"\U0000000d\U00000d4e", {13, 3406}, {1, 2}}, + {L"\U0000000d\U00000308\U00000d4e", {13, 776, 3406}, {1, 2, 3}}, + {L"\U0000000d\U00000915", {13, 2325}, {1, 2}}, + {L"\U0000000d\U00000308\U00000915", {13, 776, 2325}, {1, 2, 3}}, {L"\U0000000d\U0000231a", {13, 8986}, {1, 2}}, {L"\U0000000d\U00000308\U0000231a", {13, 776, 8986}, {1, 2, 3}}, {L"\U0000000d\U00000300", {13, 768}, {1, 2}}, {L"\U0000000d\U00000308\U00000300", {13, 776}, {1, 3}}, + {L"\U0000000d\U0000093c", {13, 2364}, {1, 2}}, + {L"\U0000000d\U00000308\U0000093c", {13, 776}, {1, 3}}, + {L"\U0000000d\U0000094d", {13, 2381}, {1, 2}}, + {L"\U0000000d\U00000308\U0000094d", {13, 776}, {1, 3}}, {L"\U0000000d\U0000200d", {13, 8205}, {1, 2}}, {L"\U0000000d\U00000308\U0000200d", {13, 776}, {1, 3}}, {L"\U0000000d\U00000378", {13, 888}, {1, 2}}, @@ -1384,8 +2582,8 @@ std::array, 602> data_utf32 = {{ {L"\U0000000a\U00000308\U0001f1e6", {10, 776, 127462}, {1, 2, 3}}, {L"\U0000000a\U00000600", {10, 1536}, {1, 2}}, {L"\U0000000a\U00000308\U00000600", {10, 776, 1536}, {1, 2, 3}}, - {L"\U0000000a\U00000903", {10, 2307}, {1, 2}}, - {L"\U0000000a\U00000308\U00000903", {10, 776}, {1, 3}}, + {L"\U0000000a\U00000a03", {10, 2563}, {1, 2}}, + {L"\U0000000a\U00000308\U00000a03", {10, 776}, {1, 3}}, {L"\U0000000a\U00001100", {10, 4352}, {1, 2}}, {L"\U0000000a\U00000308\U00001100", {10, 776, 4352}, {1, 2, 3}}, {L"\U0000000a\U00001160", {10, 4448}, {1, 2}}, @@ -1396,10 +2594,24 @@ std::array, 602> data_utf32 = {{ {L"\U0000000a\U00000308\U0000ac00", {10, 776, 44032}, {1, 2, 3}}, {L"\U0000000a\U0000ac01", {10, 44033}, {1, 2}}, {L"\U0000000a\U00000308\U0000ac01", {10, 776, 44033}, {1, 2, 3}}, + {L"\U0000000a\U00000900", {10, 2304}, {1, 2}}, + {L"\U0000000a\U00000308\U00000900", {10, 776}, {1, 3}}, + {L"\U0000000a\U00000903", {10, 2307}, {1, 2}}, + {L"\U0000000a\U00000308\U00000903", {10, 776}, {1, 3}}, + {L"\U0000000a\U00000904", {10, 2308}, {1, 2}}, + {L"\U0000000a\U00000308\U00000904", {10, 776, 2308}, {1, 2, 3}}, + {L"\U0000000a\U00000d4e", {10, 3406}, {1, 2}}, + {L"\U0000000a\U00000308\U00000d4e", {10, 776, 3406}, {1, 2, 3}}, + {L"\U0000000a\U00000915", {10, 2325}, {1, 2}}, + {L"\U0000000a\U00000308\U00000915", {10, 776, 2325}, {1, 2, 3}}, {L"\U0000000a\U0000231a", {10, 8986}, {1, 2}}, {L"\U0000000a\U00000308\U0000231a", {10, 776, 8986}, {1, 2, 3}}, {L"\U0000000a\U00000300", {10, 768}, {1, 2}}, {L"\U0000000a\U00000308\U00000300", {10, 776}, {1, 3}}, + {L"\U0000000a\U0000093c", {10, 2364}, {1, 2}}, + {L"\U0000000a\U00000308\U0000093c", {10, 776}, {1, 3}}, + {L"\U0000000a\U0000094d", {10, 2381}, {1, 2}}, + {L"\U0000000a\U00000308\U0000094d", {10, 776}, {1, 3}}, {L"\U0000000a\U0000200d", {10, 8205}, {1, 2}}, {L"\U0000000a\U00000308\U0000200d", {10, 776}, {1, 3}}, {L"\U0000000a\U00000378", {10, 888}, {1, 2}}, @@ -1418,8 +2630,8 @@ std::array, 602> data_utf32 = {{ {L"\U00000001\U00000308\U0001f1e6", {1, 776, 127462}, {1, 2, 3}}, {L"\U00000001\U00000600", {1, 1536}, {1, 2}}, {L"\U00000001\U00000308\U00000600", {1, 776, 1536}, {1, 2, 3}}, - {L"\U00000001\U00000903", {1, 2307}, {1, 2}}, - {L"\U00000001\U00000308\U00000903", {1, 776}, {1, 3}}, + {L"\U00000001\U00000a03", {1, 2563}, {1, 2}}, + {L"\U00000001\U00000308\U00000a03", {1, 776}, {1, 3}}, {L"\U00000001\U00001100", {1, 4352}, {1, 2}}, {L"\U00000001\U00000308\U00001100", {1, 776, 4352}, {1, 2, 3}}, {L"\U00000001\U00001160", {1, 4448}, {1, 2}}, @@ -1430,10 +2642,24 @@ std::array, 602> data_utf32 = {{ {L"\U00000001\U00000308\U0000ac00", {1, 776, 44032}, {1, 2, 3}}, {L"\U00000001\U0000ac01", {1, 44033}, {1, 2}}, {L"\U00000001\U00000308\U0000ac01", {1, 776, 44033}, {1, 2, 3}}, + {L"\U00000001\U00000900", {1, 2304}, {1, 2}}, + {L"\U00000001\U00000308\U00000900", {1, 776}, {1, 3}}, + {L"\U00000001\U00000903", {1, 2307}, {1, 2}}, + {L"\U00000001\U00000308\U00000903", {1, 776}, {1, 3}}, + {L"\U00000001\U00000904", {1, 2308}, {1, 2}}, + {L"\U00000001\U00000308\U00000904", {1, 776, 2308}, {1, 2, 3}}, + {L"\U00000001\U00000d4e", {1, 3406}, {1, 2}}, + {L"\U00000001\U00000308\U00000d4e", {1, 776, 3406}, {1, 2, 3}}, + {L"\U00000001\U00000915", {1, 2325}, {1, 2}}, + {L"\U00000001\U00000308\U00000915", {1, 776, 2325}, {1, 2, 3}}, {L"\U00000001\U0000231a", {1, 8986}, {1, 2}}, {L"\U00000001\U00000308\U0000231a", {1, 776, 8986}, {1, 2, 3}}, {L"\U00000001\U00000300", {1, 768}, {1, 2}}, {L"\U00000001\U00000308\U00000300", {1, 776}, {1, 3}}, + {L"\U00000001\U0000093c", {1, 2364}, {1, 2}}, + {L"\U00000001\U00000308\U0000093c", {1, 776}, {1, 3}}, + {L"\U00000001\U0000094d", {1, 2381}, {1, 2}}, + {L"\U00000001\U00000308\U0000094d", {1, 776}, {1, 3}}, {L"\U00000001\U0000200d", {1, 8205}, {1, 2}}, {L"\U00000001\U00000308\U0000200d", {1, 776}, {1, 3}}, {L"\U00000001\U00000378", {1, 888}, {1, 2}}, @@ -1452,8 +2678,8 @@ std::array, 602> data_utf32 = {{ {L"\U0000034f\U00000308\U0001f1e6", {847, 127462}, {2, 3}}, {L"\U0000034f\U00000600", {847, 1536}, {1, 2}}, {L"\U0000034f\U00000308\U00000600", {847, 1536}, {2, 3}}, - {L"\U0000034f\U00000903", {847}, {2}}, - {L"\U0000034f\U00000308\U00000903", {847}, {3}}, + {L"\U0000034f\U00000a03", {847}, {2}}, + {L"\U0000034f\U00000308\U00000a03", {847}, {3}}, {L"\U0000034f\U00001100", {847, 4352}, {1, 2}}, {L"\U0000034f\U00000308\U00001100", {847, 4352}, {2, 3}}, {L"\U0000034f\U00001160", {847, 4448}, {1, 2}}, @@ -1464,10 +2690,24 @@ std::array, 602> data_utf32 = {{ {L"\U0000034f\U00000308\U0000ac00", {847, 44032}, {2, 3}}, {L"\U0000034f\U0000ac01", {847, 44033}, {1, 2}}, {L"\U0000034f\U00000308\U0000ac01", {847, 44033}, {2, 3}}, + {L"\U0000034f\U00000900", {847}, {2}}, + {L"\U0000034f\U00000308\U00000900", {847}, {3}}, + {L"\U0000034f\U00000903", {847}, {2}}, + {L"\U0000034f\U00000308\U00000903", {847}, {3}}, + {L"\U0000034f\U00000904", {847, 2308}, {1, 2}}, + {L"\U0000034f\U00000308\U00000904", {847, 2308}, {2, 3}}, + {L"\U0000034f\U00000d4e", {847, 3406}, {1, 2}}, + {L"\U0000034f\U00000308\U00000d4e", {847, 3406}, {2, 3}}, + {L"\U0000034f\U00000915", {847, 2325}, {1, 2}}, + {L"\U0000034f\U00000308\U00000915", {847, 2325}, {2, 3}}, {L"\U0000034f\U0000231a", {847, 8986}, {1, 2}}, {L"\U0000034f\U00000308\U0000231a", {847, 8986}, {2, 3}}, {L"\U0000034f\U00000300", {847}, {2}}, {L"\U0000034f\U00000308\U00000300", {847}, {3}}, + {L"\U0000034f\U0000093c", {847}, {2}}, + {L"\U0000034f\U00000308\U0000093c", {847}, {3}}, + {L"\U0000034f\U0000094d", {847}, {2}}, + {L"\U0000034f\U00000308\U0000094d", {847}, {3}}, {L"\U0000034f\U0000200d", {847}, {2}}, {L"\U0000034f\U00000308\U0000200d", {847}, {3}}, {L"\U0000034f\U00000378", {847, 888}, {1, 2}}, @@ -1486,8 +2726,8 @@ std::array, 602> data_utf32 = {{ {L"\U0001f1e6\U00000308\U0001f1e6", {127462, 127462}, {2, 3}}, {L"\U0001f1e6\U00000600", {127462, 1536}, {1, 2}}, {L"\U0001f1e6\U00000308\U00000600", {127462, 1536}, {2, 3}}, - {L"\U0001f1e6\U00000903", {127462}, {2}}, - {L"\U0001f1e6\U00000308\U00000903", {127462}, {3}}, + {L"\U0001f1e6\U00000a03", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U00000a03", {127462}, {3}}, {L"\U0001f1e6\U00001100", {127462, 4352}, {1, 2}}, {L"\U0001f1e6\U00000308\U00001100", {127462, 4352}, {2, 3}}, {L"\U0001f1e6\U00001160", {127462, 4448}, {1, 2}}, @@ -1498,10 +2738,24 @@ std::array, 602> data_utf32 = {{ {L"\U0001f1e6\U00000308\U0000ac00", {127462, 44032}, {2, 3}}, {L"\U0001f1e6\U0000ac01", {127462, 44033}, {1, 2}}, {L"\U0001f1e6\U00000308\U0000ac01", {127462, 44033}, {2, 3}}, + {L"\U0001f1e6\U00000900", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U00000900", {127462}, {3}}, + {L"\U0001f1e6\U00000903", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U00000903", {127462}, {3}}, + {L"\U0001f1e6\U00000904", {127462, 2308}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00000904", {127462, 2308}, {2, 3}}, + {L"\U0001f1e6\U00000d4e", {127462, 3406}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00000d4e", {127462, 3406}, {2, 3}}, + {L"\U0001f1e6\U00000915", {127462, 2325}, {1, 2}}, + {L"\U0001f1e6\U00000308\U00000915", {127462, 2325}, {2, 3}}, {L"\U0001f1e6\U0000231a", {127462, 8986}, {1, 2}}, {L"\U0001f1e6\U00000308\U0000231a", {127462, 8986}, {2, 3}}, {L"\U0001f1e6\U00000300", {127462}, {2}}, {L"\U0001f1e6\U00000308\U00000300", {127462}, {3}}, + {L"\U0001f1e6\U0000093c", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U0000093c", {127462}, {3}}, + {L"\U0001f1e6\U0000094d", {127462}, {2}}, + {L"\U0001f1e6\U00000308\U0000094d", {127462}, {3}}, {L"\U0001f1e6\U0000200d", {127462}, {2}}, {L"\U0001f1e6\U00000308\U0000200d", {127462}, {3}}, {L"\U0001f1e6\U00000378", {127462, 888}, {1, 2}}, @@ -1520,8 +2774,8 @@ std::array, 602> data_utf32 = {{ {L"\U00000600\U00000308\U0001f1e6", {1536, 127462}, {2, 3}}, {L"\U00000600\U00000600", {1536}, {2}}, {L"\U00000600\U00000308\U00000600", {1536, 1536}, {2, 3}}, - {L"\U00000600\U00000903", {1536}, {2}}, - {L"\U00000600\U00000308\U00000903", {1536}, {3}}, + {L"\U00000600\U00000a03", {1536}, {2}}, + {L"\U00000600\U00000308\U00000a03", {1536}, {3}}, {L"\U00000600\U00001100", {1536}, {2}}, {L"\U00000600\U00000308\U00001100", {1536, 4352}, {2, 3}}, {L"\U00000600\U00001160", {1536}, {2}}, @@ -1532,48 +2786,76 @@ std::array, 602> data_utf32 = {{ {L"\U00000600\U00000308\U0000ac00", {1536, 44032}, {2, 3}}, {L"\U00000600\U0000ac01", {1536}, {2}}, {L"\U00000600\U00000308\U0000ac01", {1536, 44033}, {2, 3}}, + {L"\U00000600\U00000900", {1536}, {2}}, + {L"\U00000600\U00000308\U00000900", {1536}, {3}}, + {L"\U00000600\U00000903", {1536}, {2}}, + {L"\U00000600\U00000308\U00000903", {1536}, {3}}, + {L"\U00000600\U00000904", {1536}, {2}}, + {L"\U00000600\U00000308\U00000904", {1536, 2308}, {2, 3}}, + {L"\U00000600\U00000d4e", {1536}, {2}}, + {L"\U00000600\U00000308\U00000d4e", {1536, 3406}, {2, 3}}, + {L"\U00000600\U00000915", {1536}, {2}}, + {L"\U00000600\U00000308\U00000915", {1536, 2325}, {2, 3}}, {L"\U00000600\U0000231a", {1536}, {2}}, {L"\U00000600\U00000308\U0000231a", {1536, 8986}, {2, 3}}, {L"\U00000600\U00000300", {1536}, {2}}, {L"\U00000600\U00000308\U00000300", {1536}, {3}}, + {L"\U00000600\U0000093c", {1536}, {2}}, + {L"\U00000600\U00000308\U0000093c", {1536}, {3}}, + {L"\U00000600\U0000094d", {1536}, {2}}, + {L"\U00000600\U00000308\U0000094d", {1536}, {3}}, {L"\U00000600\U0000200d", {1536}, {2}}, {L"\U00000600\U00000308\U0000200d", {1536}, {3}}, {L"\U00000600\U00000378", {1536}, {2}}, {L"\U00000600\U00000308\U00000378", {1536, 888}, {2, 3}}, - {L"\U00000903\U00000020", {2307, 32}, {1, 2}}, - {L"\U00000903\U00000308\U00000020", {2307, 32}, {2, 3}}, - {L"\U00000903\U0000000d", {2307, 13}, {1, 2}}, - {L"\U00000903\U00000308\U0000000d", {2307, 13}, {2, 3}}, - {L"\U00000903\U0000000a", {2307, 10}, {1, 2}}, - {L"\U00000903\U00000308\U0000000a", {2307, 10}, {2, 3}}, - {L"\U00000903\U00000001", {2307, 1}, {1, 2}}, - {L"\U00000903\U00000308\U00000001", {2307, 1}, {2, 3}}, - {L"\U00000903\U0000034f", {2307}, {2}}, - {L"\U00000903\U00000308\U0000034f", {2307}, {3}}, - {L"\U00000903\U0001f1e6", {2307, 127462}, {1, 2}}, - {L"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {2, 3}}, - {L"\U00000903\U00000600", {2307, 1536}, {1, 2}}, - {L"\U00000903\U00000308\U00000600", {2307, 1536}, {2, 3}}, - {L"\U00000903\U00000903", {2307}, {2}}, - {L"\U00000903\U00000308\U00000903", {2307}, {3}}, - {L"\U00000903\U00001100", {2307, 4352}, {1, 2}}, - {L"\U00000903\U00000308\U00001100", {2307, 4352}, {2, 3}}, - {L"\U00000903\U00001160", {2307, 4448}, {1, 2}}, - {L"\U00000903\U00000308\U00001160", {2307, 4448}, {2, 3}}, - {L"\U00000903\U000011a8", {2307, 4520}, {1, 2}}, - {L"\U00000903\U00000308\U000011a8", {2307, 4520}, {2, 3}}, - {L"\U00000903\U0000ac00", {2307, 44032}, {1, 2}}, - {L"\U00000903\U00000308\U0000ac00", {2307, 44032}, {2, 3}}, - {L"\U00000903\U0000ac01", {2307, 44033}, {1, 2}}, - {L"\U00000903\U00000308\U0000ac01", {2307, 44033}, {2, 3}}, - {L"\U00000903\U0000231a", {2307, 8986}, {1, 2}}, - {L"\U00000903\U00000308\U0000231a", {2307, 8986}, {2, 3}}, - {L"\U00000903\U00000300", {2307}, {2}}, - {L"\U00000903\U00000308\U00000300", {2307}, {3}}, - {L"\U00000903\U0000200d", {2307}, {2}}, - {L"\U00000903\U00000308\U0000200d", {2307}, {3}}, - {L"\U00000903\U00000378", {2307, 888}, {1, 2}}, - {L"\U00000903\U00000308\U00000378", {2307, 888}, {2, 3}}, + {L"\U00000a03\U00000020", {2563, 32}, {1, 2}}, + {L"\U00000a03\U00000308\U00000020", {2563, 32}, {2, 3}}, + {L"\U00000a03\U0000000d", {2563, 13}, {1, 2}}, + {L"\U00000a03\U00000308\U0000000d", {2563, 13}, {2, 3}}, + {L"\U00000a03\U0000000a", {2563, 10}, {1, 2}}, + {L"\U00000a03\U00000308\U0000000a", {2563, 10}, {2, 3}}, + {L"\U00000a03\U00000001", {2563, 1}, {1, 2}}, + {L"\U00000a03\U00000308\U00000001", {2563, 1}, {2, 3}}, + {L"\U00000a03\U0000034f", {2563}, {2}}, + {L"\U00000a03\U00000308\U0000034f", {2563}, {3}}, + {L"\U00000a03\U0001f1e6", {2563, 127462}, {1, 2}}, + {L"\U00000a03\U00000308\U0001f1e6", {2563, 127462}, {2, 3}}, + {L"\U00000a03\U00000600", {2563, 1536}, {1, 2}}, + {L"\U00000a03\U00000308\U00000600", {2563, 1536}, {2, 3}}, + {L"\U00000a03\U00000a03", {2563}, {2}}, + {L"\U00000a03\U00000308\U00000a03", {2563}, {3}}, + {L"\U00000a03\U00001100", {2563, 4352}, {1, 2}}, + {L"\U00000a03\U00000308\U00001100", {2563, 4352}, {2, 3}}, + {L"\U00000a03\U00001160", {2563, 4448}, {1, 2}}, + {L"\U00000a03\U00000308\U00001160", {2563, 4448}, {2, 3}}, + {L"\U00000a03\U000011a8", {2563, 4520}, {1, 2}}, + {L"\U00000a03\U00000308\U000011a8", {2563, 4520}, {2, 3}}, + {L"\U00000a03\U0000ac00", {2563, 44032}, {1, 2}}, + {L"\U00000a03\U00000308\U0000ac00", {2563, 44032}, {2, 3}}, + {L"\U00000a03\U0000ac01", {2563, 44033}, {1, 2}}, + {L"\U00000a03\U00000308\U0000ac01", {2563, 44033}, {2, 3}}, + {L"\U00000a03\U00000900", {2563}, {2}}, + {L"\U00000a03\U00000308\U00000900", {2563}, {3}}, + {L"\U00000a03\U00000903", {2563}, {2}}, + {L"\U00000a03\U00000308\U00000903", {2563}, {3}}, + {L"\U00000a03\U00000904", {2563, 2308}, {1, 2}}, + {L"\U00000a03\U00000308\U00000904", {2563, 2308}, {2, 3}}, + {L"\U00000a03\U00000d4e", {2563, 3406}, {1, 2}}, + {L"\U00000a03\U00000308\U00000d4e", {2563, 3406}, {2, 3}}, + {L"\U00000a03\U00000915", {2563, 2325}, {1, 2}}, + {L"\U00000a03\U00000308\U00000915", {2563, 2325}, {2, 3}}, + {L"\U00000a03\U0000231a", {2563, 8986}, {1, 2}}, + {L"\U00000a03\U00000308\U0000231a", {2563, 8986}, {2, 3}}, + {L"\U00000a03\U00000300", {2563}, {2}}, + {L"\U00000a03\U00000308\U00000300", {2563}, {3}}, + {L"\U00000a03\U0000093c", {2563}, {2}}, + {L"\U00000a03\U00000308\U0000093c", {2563}, {3}}, + {L"\U00000a03\U0000094d", {2563}, {2}}, + {L"\U00000a03\U00000308\U0000094d", {2563}, {3}}, + {L"\U00000a03\U0000200d", {2563}, {2}}, + {L"\U00000a03\U00000308\U0000200d", {2563}, {3}}, + {L"\U00000a03\U00000378", {2563, 888}, {1, 2}}, + {L"\U00000a03\U00000308\U00000378", {2563, 888}, {2, 3}}, {L"\U00001100\U00000020", {4352, 32}, {1, 2}}, {L"\U00001100\U00000308\U00000020", {4352, 32}, {2, 3}}, {L"\U00001100\U0000000d", {4352, 13}, {1, 2}}, @@ -1588,8 +2870,8 @@ std::array, 602> data_utf32 = {{ {L"\U00001100\U00000308\U0001f1e6", {4352, 127462}, {2, 3}}, {L"\U00001100\U00000600", {4352, 1536}, {1, 2}}, {L"\U00001100\U00000308\U00000600", {4352, 1536}, {2, 3}}, - {L"\U00001100\U00000903", {4352}, {2}}, - {L"\U00001100\U00000308\U00000903", {4352}, {3}}, + {L"\U00001100\U00000a03", {4352}, {2}}, + {L"\U00001100\U00000308\U00000a03", {4352}, {3}}, {L"\U00001100\U00001100", {4352}, {2}}, {L"\U00001100\U00000308\U00001100", {4352, 4352}, {2, 3}}, {L"\U00001100\U00001160", {4352}, {2}}, @@ -1600,10 +2882,24 @@ std::array, 602> data_utf32 = {{ {L"\U00001100\U00000308\U0000ac00", {4352, 44032}, {2, 3}}, {L"\U00001100\U0000ac01", {4352}, {2}}, {L"\U00001100\U00000308\U0000ac01", {4352, 44033}, {2, 3}}, + {L"\U00001100\U00000900", {4352}, {2}}, + {L"\U00001100\U00000308\U00000900", {4352}, {3}}, + {L"\U00001100\U00000903", {4352}, {2}}, + {L"\U00001100\U00000308\U00000903", {4352}, {3}}, + {L"\U00001100\U00000904", {4352, 2308}, {1, 2}}, + {L"\U00001100\U00000308\U00000904", {4352, 2308}, {2, 3}}, + {L"\U00001100\U00000d4e", {4352, 3406}, {1, 2}}, + {L"\U00001100\U00000308\U00000d4e", {4352, 3406}, {2, 3}}, + {L"\U00001100\U00000915", {4352, 2325}, {1, 2}}, + {L"\U00001100\U00000308\U00000915", {4352, 2325}, {2, 3}}, {L"\U00001100\U0000231a", {4352, 8986}, {1, 2}}, {L"\U00001100\U00000308\U0000231a", {4352, 8986}, {2, 3}}, {L"\U00001100\U00000300", {4352}, {2}}, {L"\U00001100\U00000308\U00000300", {4352}, {3}}, + {L"\U00001100\U0000093c", {4352}, {2}}, + {L"\U00001100\U00000308\U0000093c", {4352}, {3}}, + {L"\U00001100\U0000094d", {4352}, {2}}, + {L"\U00001100\U00000308\U0000094d", {4352}, {3}}, {L"\U00001100\U0000200d", {4352}, {2}}, {L"\U00001100\U00000308\U0000200d", {4352}, {3}}, {L"\U00001100\U00000378", {4352, 888}, {1, 2}}, @@ -1622,8 +2918,8 @@ std::array, 602> data_utf32 = {{ {L"\U00001160\U00000308\U0001f1e6", {4448, 127462}, {2, 3}}, {L"\U00001160\U00000600", {4448, 1536}, {1, 2}}, {L"\U00001160\U00000308\U00000600", {4448, 1536}, {2, 3}}, - {L"\U00001160\U00000903", {4448}, {2}}, - {L"\U00001160\U00000308\U00000903", {4448}, {3}}, + {L"\U00001160\U00000a03", {4448}, {2}}, + {L"\U00001160\U00000308\U00000a03", {4448}, {3}}, {L"\U00001160\U00001100", {4448, 4352}, {1, 2}}, {L"\U00001160\U00000308\U00001100", {4448, 4352}, {2, 3}}, {L"\U00001160\U00001160", {4448}, {2}}, @@ -1634,10 +2930,24 @@ std::array, 602> data_utf32 = {{ {L"\U00001160\U00000308\U0000ac00", {4448, 44032}, {2, 3}}, {L"\U00001160\U0000ac01", {4448, 44033}, {1, 2}}, {L"\U00001160\U00000308\U0000ac01", {4448, 44033}, {2, 3}}, + {L"\U00001160\U00000900", {4448}, {2}}, + {L"\U00001160\U00000308\U00000900", {4448}, {3}}, + {L"\U00001160\U00000903", {4448}, {2}}, + {L"\U00001160\U00000308\U00000903", {4448}, {3}}, + {L"\U00001160\U00000904", {4448, 2308}, {1, 2}}, + {L"\U00001160\U00000308\U00000904", {4448, 2308}, {2, 3}}, + {L"\U00001160\U00000d4e", {4448, 3406}, {1, 2}}, + {L"\U00001160\U00000308\U00000d4e", {4448, 3406}, {2, 3}}, + {L"\U00001160\U00000915", {4448, 2325}, {1, 2}}, + {L"\U00001160\U00000308\U00000915", {4448, 2325}, {2, 3}}, {L"\U00001160\U0000231a", {4448, 8986}, {1, 2}}, {L"\U00001160\U00000308\U0000231a", {4448, 8986}, {2, 3}}, {L"\U00001160\U00000300", {4448}, {2}}, {L"\U00001160\U00000308\U00000300", {4448}, {3}}, + {L"\U00001160\U0000093c", {4448}, {2}}, + {L"\U00001160\U00000308\U0000093c", {4448}, {3}}, + {L"\U00001160\U0000094d", {4448}, {2}}, + {L"\U00001160\U00000308\U0000094d", {4448}, {3}}, {L"\U00001160\U0000200d", {4448}, {2}}, {L"\U00001160\U00000308\U0000200d", {4448}, {3}}, {L"\U00001160\U00000378", {4448, 888}, {1, 2}}, @@ -1656,8 +2966,8 @@ std::array, 602> data_utf32 = {{ {L"\U000011a8\U00000308\U0001f1e6", {4520, 127462}, {2, 3}}, {L"\U000011a8\U00000600", {4520, 1536}, {1, 2}}, {L"\U000011a8\U00000308\U00000600", {4520, 1536}, {2, 3}}, - {L"\U000011a8\U00000903", {4520}, {2}}, - {L"\U000011a8\U00000308\U00000903", {4520}, {3}}, + {L"\U000011a8\U00000a03", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000a03", {4520}, {3}}, {L"\U000011a8\U00001100", {4520, 4352}, {1, 2}}, {L"\U000011a8\U00000308\U00001100", {4520, 4352}, {2, 3}}, {L"\U000011a8\U00001160", {4520, 4448}, {1, 2}}, @@ -1668,10 +2978,24 @@ std::array, 602> data_utf32 = {{ {L"\U000011a8\U00000308\U0000ac00", {4520, 44032}, {2, 3}}, {L"\U000011a8\U0000ac01", {4520, 44033}, {1, 2}}, {L"\U000011a8\U00000308\U0000ac01", {4520, 44033}, {2, 3}}, + {L"\U000011a8\U00000900", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000900", {4520}, {3}}, + {L"\U000011a8\U00000903", {4520}, {2}}, + {L"\U000011a8\U00000308\U00000903", {4520}, {3}}, + {L"\U000011a8\U00000904", {4520, 2308}, {1, 2}}, + {L"\U000011a8\U00000308\U00000904", {4520, 2308}, {2, 3}}, + {L"\U000011a8\U00000d4e", {4520, 3406}, {1, 2}}, + {L"\U000011a8\U00000308\U00000d4e", {4520, 3406}, {2, 3}}, + {L"\U000011a8\U00000915", {4520, 2325}, {1, 2}}, + {L"\U000011a8\U00000308\U00000915", {4520, 2325}, {2, 3}}, {L"\U000011a8\U0000231a", {4520, 8986}, {1, 2}}, {L"\U000011a8\U00000308\U0000231a", {4520, 8986}, {2, 3}}, {L"\U000011a8\U00000300", {4520}, {2}}, {L"\U000011a8\U00000308\U00000300", {4520}, {3}}, + {L"\U000011a8\U0000093c", {4520}, {2}}, + {L"\U000011a8\U00000308\U0000093c", {4520}, {3}}, + {L"\U000011a8\U0000094d", {4520}, {2}}, + {L"\U000011a8\U00000308\U0000094d", {4520}, {3}}, {L"\U000011a8\U0000200d", {4520}, {2}}, {L"\U000011a8\U00000308\U0000200d", {4520}, {3}}, {L"\U000011a8\U00000378", {4520, 888}, {1, 2}}, @@ -1690,8 +3014,8 @@ std::array, 602> data_utf32 = {{ {L"\U0000ac00\U00000308\U0001f1e6", {44032, 127462}, {2, 3}}, {L"\U0000ac00\U00000600", {44032, 1536}, {1, 2}}, {L"\U0000ac00\U00000308\U00000600", {44032, 1536}, {2, 3}}, - {L"\U0000ac00\U00000903", {44032}, {2}}, - {L"\U0000ac00\U00000308\U00000903", {44032}, {3}}, + {L"\U0000ac00\U00000a03", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000a03", {44032}, {3}}, {L"\U0000ac00\U00001100", {44032, 4352}, {1, 2}}, {L"\U0000ac00\U00000308\U00001100", {44032, 4352}, {2, 3}}, {L"\U0000ac00\U00001160", {44032}, {2}}, @@ -1702,10 +3026,24 @@ std::array, 602> data_utf32 = {{ {L"\U0000ac00\U00000308\U0000ac00", {44032, 44032}, {2, 3}}, {L"\U0000ac00\U0000ac01", {44032, 44033}, {1, 2}}, {L"\U0000ac00\U00000308\U0000ac01", {44032, 44033}, {2, 3}}, + {L"\U0000ac00\U00000900", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000900", {44032}, {3}}, + {L"\U0000ac00\U00000903", {44032}, {2}}, + {L"\U0000ac00\U00000308\U00000903", {44032}, {3}}, + {L"\U0000ac00\U00000904", {44032, 2308}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000904", {44032, 2308}, {2, 3}}, + {L"\U0000ac00\U00000d4e", {44032, 3406}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000d4e", {44032, 3406}, {2, 3}}, + {L"\U0000ac00\U00000915", {44032, 2325}, {1, 2}}, + {L"\U0000ac00\U00000308\U00000915", {44032, 2325}, {2, 3}}, {L"\U0000ac00\U0000231a", {44032, 8986}, {1, 2}}, {L"\U0000ac00\U00000308\U0000231a", {44032, 8986}, {2, 3}}, {L"\U0000ac00\U00000300", {44032}, {2}}, {L"\U0000ac00\U00000308\U00000300", {44032}, {3}}, + {L"\U0000ac00\U0000093c", {44032}, {2}}, + {L"\U0000ac00\U00000308\U0000093c", {44032}, {3}}, + {L"\U0000ac00\U0000094d", {44032}, {2}}, + {L"\U0000ac00\U00000308\U0000094d", {44032}, {3}}, {L"\U0000ac00\U0000200d", {44032}, {2}}, {L"\U0000ac00\U00000308\U0000200d", {44032}, {3}}, {L"\U0000ac00\U00000378", {44032, 888}, {1, 2}}, @@ -1724,8 +3062,8 @@ std::array, 602> data_utf32 = {{ {L"\U0000ac01\U00000308\U0001f1e6", {44033, 127462}, {2, 3}}, {L"\U0000ac01\U00000600", {44033, 1536}, {1, 2}}, {L"\U0000ac01\U00000308\U00000600", {44033, 1536}, {2, 3}}, - {L"\U0000ac01\U00000903", {44033}, {2}}, - {L"\U0000ac01\U00000308\U00000903", {44033}, {3}}, + {L"\U0000ac01\U00000a03", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000a03", {44033}, {3}}, {L"\U0000ac01\U00001100", {44033, 4352}, {1, 2}}, {L"\U0000ac01\U00000308\U00001100", {44033, 4352}, {2, 3}}, {L"\U0000ac01\U00001160", {44033, 4448}, {1, 2}}, @@ -1736,14 +3074,268 @@ std::array, 602> data_utf32 = {{ {L"\U0000ac01\U00000308\U0000ac00", {44033, 44032}, {2, 3}}, {L"\U0000ac01\U0000ac01", {44033, 44033}, {1, 2}}, {L"\U0000ac01\U00000308\U0000ac01", {44033, 44033}, {2, 3}}, + {L"\U0000ac01\U00000900", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000900", {44033}, {3}}, + {L"\U0000ac01\U00000903", {44033}, {2}}, + {L"\U0000ac01\U00000308\U00000903", {44033}, {3}}, + {L"\U0000ac01\U00000904", {44033, 2308}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000904", {44033, 2308}, {2, 3}}, + {L"\U0000ac01\U00000d4e", {44033, 3406}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000d4e", {44033, 3406}, {2, 3}}, + {L"\U0000ac01\U00000915", {44033, 2325}, {1, 2}}, + {L"\U0000ac01\U00000308\U00000915", {44033, 2325}, {2, 3}}, {L"\U0000ac01\U0000231a", {44033, 8986}, {1, 2}}, {L"\U0000ac01\U00000308\U0000231a", {44033, 8986}, {2, 3}}, {L"\U0000ac01\U00000300", {44033}, {2}}, {L"\U0000ac01\U00000308\U00000300", {44033}, {3}}, + {L"\U0000ac01\U0000093c", {44033}, {2}}, + {L"\U0000ac01\U00000308\U0000093c", {44033}, {3}}, + {L"\U0000ac01\U0000094d", {44033}, {2}}, + {L"\U0000ac01\U00000308\U0000094d", {44033}, {3}}, {L"\U0000ac01\U0000200d", {44033}, {2}}, {L"\U0000ac01\U00000308\U0000200d", {44033}, {3}}, {L"\U0000ac01\U00000378", {44033, 888}, {1, 2}}, {L"\U0000ac01\U00000308\U00000378", {44033, 888}, {2, 3}}, + {L"\U00000900\U00000020", {2304, 32}, {1, 2}}, + {L"\U00000900\U00000308\U00000020", {2304, 32}, {2, 3}}, + {L"\U00000900\U0000000d", {2304, 13}, {1, 2}}, + {L"\U00000900\U00000308\U0000000d", {2304, 13}, {2, 3}}, + {L"\U00000900\U0000000a", {2304, 10}, {1, 2}}, + {L"\U00000900\U00000308\U0000000a", {2304, 10}, {2, 3}}, + {L"\U00000900\U00000001", {2304, 1}, {1, 2}}, + {L"\U00000900\U00000308\U00000001", {2304, 1}, {2, 3}}, + {L"\U00000900\U0000034f", {2304}, {2}}, + {L"\U00000900\U00000308\U0000034f", {2304}, {3}}, + {L"\U00000900\U0001f1e6", {2304, 127462}, {1, 2}}, + {L"\U00000900\U00000308\U0001f1e6", {2304, 127462}, {2, 3}}, + {L"\U00000900\U00000600", {2304, 1536}, {1, 2}}, + {L"\U00000900\U00000308\U00000600", {2304, 1536}, {2, 3}}, + {L"\U00000900\U00000a03", {2304}, {2}}, + {L"\U00000900\U00000308\U00000a03", {2304}, {3}}, + {L"\U00000900\U00001100", {2304, 4352}, {1, 2}}, + {L"\U00000900\U00000308\U00001100", {2304, 4352}, {2, 3}}, + {L"\U00000900\U00001160", {2304, 4448}, {1, 2}}, + {L"\U00000900\U00000308\U00001160", {2304, 4448}, {2, 3}}, + {L"\U00000900\U000011a8", {2304, 4520}, {1, 2}}, + {L"\U00000900\U00000308\U000011a8", {2304, 4520}, {2, 3}}, + {L"\U00000900\U0000ac00", {2304, 44032}, {1, 2}}, + {L"\U00000900\U00000308\U0000ac00", {2304, 44032}, {2, 3}}, + {L"\U00000900\U0000ac01", {2304, 44033}, {1, 2}}, + {L"\U00000900\U00000308\U0000ac01", {2304, 44033}, {2, 3}}, + {L"\U00000900\U00000900", {2304}, {2}}, + {L"\U00000900\U00000308\U00000900", {2304}, {3}}, + {L"\U00000900\U00000903", {2304}, {2}}, + {L"\U00000900\U00000308\U00000903", {2304}, {3}}, + {L"\U00000900\U00000904", {2304, 2308}, {1, 2}}, + {L"\U00000900\U00000308\U00000904", {2304, 2308}, {2, 3}}, + {L"\U00000900\U00000d4e", {2304, 3406}, {1, 2}}, + {L"\U00000900\U00000308\U00000d4e", {2304, 3406}, {2, 3}}, + {L"\U00000900\U00000915", {2304, 2325}, {1, 2}}, + {L"\U00000900\U00000308\U00000915", {2304, 2325}, {2, 3}}, + {L"\U00000900\U0000231a", {2304, 8986}, {1, 2}}, + {L"\U00000900\U00000308\U0000231a", {2304, 8986}, {2, 3}}, + {L"\U00000900\U00000300", {2304}, {2}}, + {L"\U00000900\U00000308\U00000300", {2304}, {3}}, + {L"\U00000900\U0000093c", {2304}, {2}}, + {L"\U00000900\U00000308\U0000093c", {2304}, {3}}, + {L"\U00000900\U0000094d", {2304}, {2}}, + {L"\U00000900\U00000308\U0000094d", {2304}, {3}}, + {L"\U00000900\U0000200d", {2304}, {2}}, + {L"\U00000900\U00000308\U0000200d", {2304}, {3}}, + {L"\U00000900\U00000378", {2304, 888}, {1, 2}}, + {L"\U00000900\U00000308\U00000378", {2304, 888}, {2, 3}}, + {L"\U00000903\U00000020", {2307, 32}, {1, 2}}, + {L"\U00000903\U00000308\U00000020", {2307, 32}, {2, 3}}, + {L"\U00000903\U0000000d", {2307, 13}, {1, 2}}, + {L"\U00000903\U00000308\U0000000d", {2307, 13}, {2, 3}}, + {L"\U00000903\U0000000a", {2307, 10}, {1, 2}}, + {L"\U00000903\U00000308\U0000000a", {2307, 10}, {2, 3}}, + {L"\U00000903\U00000001", {2307, 1}, {1, 2}}, + {L"\U00000903\U00000308\U00000001", {2307, 1}, {2, 3}}, + {L"\U00000903\U0000034f", {2307}, {2}}, + {L"\U00000903\U00000308\U0000034f", {2307}, {3}}, + {L"\U00000903\U0001f1e6", {2307, 127462}, {1, 2}}, + {L"\U00000903\U00000308\U0001f1e6", {2307, 127462}, {2, 3}}, + {L"\U00000903\U00000600", {2307, 1536}, {1, 2}}, + {L"\U00000903\U00000308\U00000600", {2307, 1536}, {2, 3}}, + {L"\U00000903\U00000a03", {2307}, {2}}, + {L"\U00000903\U00000308\U00000a03", {2307}, {3}}, + {L"\U00000903\U00001100", {2307, 4352}, {1, 2}}, + {L"\U00000903\U00000308\U00001100", {2307, 4352}, {2, 3}}, + {L"\U00000903\U00001160", {2307, 4448}, {1, 2}}, + {L"\U00000903\U00000308\U00001160", {2307, 4448}, {2, 3}}, + {L"\U00000903\U000011a8", {2307, 4520}, {1, 2}}, + {L"\U00000903\U00000308\U000011a8", {2307, 4520}, {2, 3}}, + {L"\U00000903\U0000ac00", {2307, 44032}, {1, 2}}, + {L"\U00000903\U00000308\U0000ac00", {2307, 44032}, {2, 3}}, + {L"\U00000903\U0000ac01", {2307, 44033}, {1, 2}}, + {L"\U00000903\U00000308\U0000ac01", {2307, 44033}, {2, 3}}, + {L"\U00000903\U00000900", {2307}, {2}}, + {L"\U00000903\U00000308\U00000900", {2307}, {3}}, + {L"\U00000903\U00000903", {2307}, {2}}, + {L"\U00000903\U00000308\U00000903", {2307}, {3}}, + {L"\U00000903\U00000904", {2307, 2308}, {1, 2}}, + {L"\U00000903\U00000308\U00000904", {2307, 2308}, {2, 3}}, + {L"\U00000903\U00000d4e", {2307, 3406}, {1, 2}}, + {L"\U00000903\U00000308\U00000d4e", {2307, 3406}, {2, 3}}, + {L"\U00000903\U00000915", {2307, 2325}, {1, 2}}, + {L"\U00000903\U00000308\U00000915", {2307, 2325}, {2, 3}}, + {L"\U00000903\U0000231a", {2307, 8986}, {1, 2}}, + {L"\U00000903\U00000308\U0000231a", {2307, 8986}, {2, 3}}, + {L"\U00000903\U00000300", {2307}, {2}}, + {L"\U00000903\U00000308\U00000300", {2307}, {3}}, + {L"\U00000903\U0000093c", {2307}, {2}}, + {L"\U00000903\U00000308\U0000093c", {2307}, {3}}, + {L"\U00000903\U0000094d", {2307}, {2}}, + {L"\U00000903\U00000308\U0000094d", {2307}, {3}}, + {L"\U00000903\U0000200d", {2307}, {2}}, + {L"\U00000903\U00000308\U0000200d", {2307}, {3}}, + {L"\U00000903\U00000378", {2307, 888}, {1, 2}}, + {L"\U00000903\U00000308\U00000378", {2307, 888}, {2, 3}}, + {L"\U00000904\U00000020", {2308, 32}, {1, 2}}, + {L"\U00000904\U00000308\U00000020", {2308, 32}, {2, 3}}, + {L"\U00000904\U0000000d", {2308, 13}, {1, 2}}, + {L"\U00000904\U00000308\U0000000d", {2308, 13}, {2, 3}}, + {L"\U00000904\U0000000a", {2308, 10}, {1, 2}}, + {L"\U00000904\U00000308\U0000000a", {2308, 10}, {2, 3}}, + {L"\U00000904\U00000001", {2308, 1}, {1, 2}}, + {L"\U00000904\U00000308\U00000001", {2308, 1}, {2, 3}}, + {L"\U00000904\U0000034f", {2308}, {2}}, + {L"\U00000904\U00000308\U0000034f", {2308}, {3}}, + {L"\U00000904\U0001f1e6", {2308, 127462}, {1, 2}}, + {L"\U00000904\U00000308\U0001f1e6", {2308, 127462}, {2, 3}}, + {L"\U00000904\U00000600", {2308, 1536}, {1, 2}}, + {L"\U00000904\U00000308\U00000600", {2308, 1536}, {2, 3}}, + {L"\U00000904\U00000a03", {2308}, {2}}, + {L"\U00000904\U00000308\U00000a03", {2308}, {3}}, + {L"\U00000904\U00001100", {2308, 4352}, {1, 2}}, + {L"\U00000904\U00000308\U00001100", {2308, 4352}, {2, 3}}, + {L"\U00000904\U00001160", {2308, 4448}, {1, 2}}, + {L"\U00000904\U00000308\U00001160", {2308, 4448}, {2, 3}}, + {L"\U00000904\U000011a8", {2308, 4520}, {1, 2}}, + {L"\U00000904\U00000308\U000011a8", {2308, 4520}, {2, 3}}, + {L"\U00000904\U0000ac00", {2308, 44032}, {1, 2}}, + {L"\U00000904\U00000308\U0000ac00", {2308, 44032}, {2, 3}}, + {L"\U00000904\U0000ac01", {2308, 44033}, {1, 2}}, + {L"\U00000904\U00000308\U0000ac01", {2308, 44033}, {2, 3}}, + {L"\U00000904\U00000900", {2308}, {2}}, + {L"\U00000904\U00000308\U00000900", {2308}, {3}}, + {L"\U00000904\U00000903", {2308}, {2}}, + {L"\U00000904\U00000308\U00000903", {2308}, {3}}, + {L"\U00000904\U00000904", {2308, 2308}, {1, 2}}, + {L"\U00000904\U00000308\U00000904", {2308, 2308}, {2, 3}}, + {L"\U00000904\U00000d4e", {2308, 3406}, {1, 2}}, + {L"\U00000904\U00000308\U00000d4e", {2308, 3406}, {2, 3}}, + {L"\U00000904\U00000915", {2308, 2325}, {1, 2}}, + {L"\U00000904\U00000308\U00000915", {2308, 2325}, {2, 3}}, + {L"\U00000904\U0000231a", {2308, 8986}, {1, 2}}, + {L"\U00000904\U00000308\U0000231a", {2308, 8986}, {2, 3}}, + {L"\U00000904\U00000300", {2308}, {2}}, + {L"\U00000904\U00000308\U00000300", {2308}, {3}}, + {L"\U00000904\U0000093c", {2308}, {2}}, + {L"\U00000904\U00000308\U0000093c", {2308}, {3}}, + {L"\U00000904\U0000094d", {2308}, {2}}, + {L"\U00000904\U00000308\U0000094d", {2308}, {3}}, + {L"\U00000904\U0000200d", {2308}, {2}}, + {L"\U00000904\U00000308\U0000200d", {2308}, {3}}, + {L"\U00000904\U00000378", {2308, 888}, {1, 2}}, + {L"\U00000904\U00000308\U00000378", {2308, 888}, {2, 3}}, + {L"\U00000d4e\U00000020", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000020", {3406, 32}, {2, 3}}, + {L"\U00000d4e\U0000000d", {3406, 13}, {1, 2}}, + {L"\U00000d4e\U00000308\U0000000d", {3406, 13}, {2, 3}}, + {L"\U00000d4e\U0000000a", {3406, 10}, {1, 2}}, + {L"\U00000d4e\U00000308\U0000000a", {3406, 10}, {2, 3}}, + {L"\U00000d4e\U00000001", {3406, 1}, {1, 2}}, + {L"\U00000d4e\U00000308\U00000001", {3406, 1}, {2, 3}}, + {L"\U00000d4e\U0000034f", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000034f", {3406}, {3}}, + {L"\U00000d4e\U0001f1e6", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0001f1e6", {3406, 127462}, {2, 3}}, + {L"\U00000d4e\U00000600", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000600", {3406, 1536}, {2, 3}}, + {L"\U00000d4e\U00000a03", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000a03", {3406}, {3}}, + {L"\U00000d4e\U00001100", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00001100", {3406, 4352}, {2, 3}}, + {L"\U00000d4e\U00001160", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00001160", {3406, 4448}, {2, 3}}, + {L"\U00000d4e\U000011a8", {3406}, {2}}, + {L"\U00000d4e\U00000308\U000011a8", {3406, 4520}, {2, 3}}, + {L"\U00000d4e\U0000ac00", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000ac00", {3406, 44032}, {2, 3}}, + {L"\U00000d4e\U0000ac01", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000ac01", {3406, 44033}, {2, 3}}, + {L"\U00000d4e\U00000900", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000900", {3406}, {3}}, + {L"\U00000d4e\U00000903", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000903", {3406}, {3}}, + {L"\U00000d4e\U00000904", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000904", {3406, 2308}, {2, 3}}, + {L"\U00000d4e\U00000d4e", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000d4e", {3406, 3406}, {2, 3}}, + {L"\U00000d4e\U00000915", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000915", {3406, 2325}, {2, 3}}, + {L"\U00000d4e\U0000231a", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000231a", {3406, 8986}, {2, 3}}, + {L"\U00000d4e\U00000300", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000300", {3406}, {3}}, + {L"\U00000d4e\U0000093c", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000093c", {3406}, {3}}, + {L"\U00000d4e\U0000094d", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000094d", {3406}, {3}}, + {L"\U00000d4e\U0000200d", {3406}, {2}}, + {L"\U00000d4e\U00000308\U0000200d", {3406}, {3}}, + {L"\U00000d4e\U00000378", {3406}, {2}}, + {L"\U00000d4e\U00000308\U00000378", {3406, 888}, {2, 3}}, + {L"\U00000915\U00000020", {2325, 32}, {1, 2}}, + {L"\U00000915\U00000308\U00000020", {2325, 32}, {2, 3}}, + {L"\U00000915\U0000000d", {2325, 13}, {1, 2}}, + {L"\U00000915\U00000308\U0000000d", {2325, 13}, {2, 3}}, + {L"\U00000915\U0000000a", {2325, 10}, {1, 2}}, + {L"\U00000915\U00000308\U0000000a", {2325, 10}, {2, 3}}, + {L"\U00000915\U00000001", {2325, 1}, {1, 2}}, + {L"\U00000915\U00000308\U00000001", {2325, 1}, {2, 3}}, + {L"\U00000915\U0000034f", {2325}, {2}}, + {L"\U00000915\U00000308\U0000034f", {2325}, {3}}, + {L"\U00000915\U0001f1e6", {2325, 127462}, {1, 2}}, + {L"\U00000915\U00000308\U0001f1e6", {2325, 127462}, {2, 3}}, + {L"\U00000915\U00000600", {2325, 1536}, {1, 2}}, + {L"\U00000915\U00000308\U00000600", {2325, 1536}, {2, 3}}, + {L"\U00000915\U00000a03", {2325}, {2}}, + {L"\U00000915\U00000308\U00000a03", {2325}, {3}}, + {L"\U00000915\U00001100", {2325, 4352}, {1, 2}}, + {L"\U00000915\U00000308\U00001100", {2325, 4352}, {2, 3}}, + {L"\U00000915\U00001160", {2325, 4448}, {1, 2}}, + {L"\U00000915\U00000308\U00001160", {2325, 4448}, {2, 3}}, + {L"\U00000915\U000011a8", {2325, 4520}, {1, 2}}, + {L"\U00000915\U00000308\U000011a8", {2325, 4520}, {2, 3}}, + {L"\U00000915\U0000ac00", {2325, 44032}, {1, 2}}, + {L"\U00000915\U00000308\U0000ac00", {2325, 44032}, {2, 3}}, + {L"\U00000915\U0000ac01", {2325, 44033}, {1, 2}}, + {L"\U00000915\U00000308\U0000ac01", {2325, 44033}, {2, 3}}, + {L"\U00000915\U00000900", {2325}, {2}}, + {L"\U00000915\U00000308\U00000900", {2325}, {3}}, + {L"\U00000915\U00000903", {2325}, {2}}, + {L"\U00000915\U00000308\U00000903", {2325}, {3}}, + {L"\U00000915\U00000904", {2325, 2308}, {1, 2}}, + {L"\U00000915\U00000308\U00000904", {2325, 2308}, {2, 3}}, + {L"\U00000915\U00000d4e", {2325, 3406}, {1, 2}}, + {L"\U00000915\U00000308\U00000d4e", {2325, 3406}, {2, 3}}, + {L"\U00000915\U00000915", {2325, 2325}, {1, 2}}, + {L"\U00000915\U00000308\U00000915", {2325, 2325}, {2, 3}}, + {L"\U00000915\U0000231a", {2325, 8986}, {1, 2}}, + {L"\U00000915\U00000308\U0000231a", {2325, 8986}, {2, 3}}, + {L"\U00000915\U00000300", {2325}, {2}}, + {L"\U00000915\U00000308\U00000300", {2325}, {3}}, + {L"\U00000915\U0000093c", {2325}, {2}}, + {L"\U00000915\U00000308\U0000093c", {2325}, {3}}, + {L"\U00000915\U0000094d", {2325}, {2}}, + {L"\U00000915\U00000308\U0000094d", {2325}, {3}}, + {L"\U00000915\U0000200d", {2325}, {2}}, + {L"\U00000915\U00000308\U0000200d", {2325}, {3}}, + {L"\U00000915\U00000378", {2325, 888}, {1, 2}}, + {L"\U00000915\U00000308\U00000378", {2325, 888}, {2, 3}}, {L"\U0000231a\U00000020", {8986, 32}, {1, 2}}, {L"\U0000231a\U00000308\U00000020", {8986, 32}, {2, 3}}, {L"\U0000231a\U0000000d", {8986, 13}, {1, 2}}, @@ -1758,8 +3350,8 @@ std::array, 602> data_utf32 = {{ {L"\U0000231a\U00000308\U0001f1e6", {8986, 127462}, {2, 3}}, {L"\U0000231a\U00000600", {8986, 1536}, {1, 2}}, {L"\U0000231a\U00000308\U00000600", {8986, 1536}, {2, 3}}, - {L"\U0000231a\U00000903", {8986}, {2}}, - {L"\U0000231a\U00000308\U00000903", {8986}, {3}}, + {L"\U0000231a\U00000a03", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000a03", {8986}, {3}}, {L"\U0000231a\U00001100", {8986, 4352}, {1, 2}}, {L"\U0000231a\U00000308\U00001100", {8986, 4352}, {2, 3}}, {L"\U0000231a\U00001160", {8986, 4448}, {1, 2}}, @@ -1770,10 +3362,24 @@ std::array, 602> data_utf32 = {{ {L"\U0000231a\U00000308\U0000ac00", {8986, 44032}, {2, 3}}, {L"\U0000231a\U0000ac01", {8986, 44033}, {1, 2}}, {L"\U0000231a\U00000308\U0000ac01", {8986, 44033}, {2, 3}}, + {L"\U0000231a\U00000900", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000900", {8986}, {3}}, + {L"\U0000231a\U00000903", {8986}, {2}}, + {L"\U0000231a\U00000308\U00000903", {8986}, {3}}, + {L"\U0000231a\U00000904", {8986, 2308}, {1, 2}}, + {L"\U0000231a\U00000308\U00000904", {8986, 2308}, {2, 3}}, + {L"\U0000231a\U00000d4e", {8986, 3406}, {1, 2}}, + {L"\U0000231a\U00000308\U00000d4e", {8986, 3406}, {2, 3}}, + {L"\U0000231a\U00000915", {8986, 2325}, {1, 2}}, + {L"\U0000231a\U00000308\U00000915", {8986, 2325}, {2, 3}}, {L"\U0000231a\U0000231a", {8986, 8986}, {1, 2}}, {L"\U0000231a\U00000308\U0000231a", {8986, 8986}, {2, 3}}, {L"\U0000231a\U00000300", {8986}, {2}}, {L"\U0000231a\U00000308\U00000300", {8986}, {3}}, + {L"\U0000231a\U0000093c", {8986}, {2}}, + {L"\U0000231a\U00000308\U0000093c", {8986}, {3}}, + {L"\U0000231a\U0000094d", {8986}, {2}}, + {L"\U0000231a\U00000308\U0000094d", {8986}, {3}}, {L"\U0000231a\U0000200d", {8986}, {2}}, {L"\U0000231a\U00000308\U0000200d", {8986}, {3}}, {L"\U0000231a\U00000378", {8986, 888}, {1, 2}}, @@ -1792,8 +3398,8 @@ std::array, 602> data_utf32 = {{ {L"\U00000300\U00000308\U0001f1e6", {768, 127462}, {2, 3}}, {L"\U00000300\U00000600", {768, 1536}, {1, 2}}, {L"\U00000300\U00000308\U00000600", {768, 1536}, {2, 3}}, - {L"\U00000300\U00000903", {768}, {2}}, - {L"\U00000300\U00000308\U00000903", {768}, {3}}, + {L"\U00000300\U00000a03", {768}, {2}}, + {L"\U00000300\U00000308\U00000a03", {768}, {3}}, {L"\U00000300\U00001100", {768, 4352}, {1, 2}}, {L"\U00000300\U00000308\U00001100", {768, 4352}, {2, 3}}, {L"\U00000300\U00001160", {768, 4448}, {1, 2}}, @@ -1804,14 +3410,124 @@ std::array, 602> data_utf32 = {{ {L"\U00000300\U00000308\U0000ac00", {768, 44032}, {2, 3}}, {L"\U00000300\U0000ac01", {768, 44033}, {1, 2}}, {L"\U00000300\U00000308\U0000ac01", {768, 44033}, {2, 3}}, + {L"\U00000300\U00000900", {768}, {2}}, + {L"\U00000300\U00000308\U00000900", {768}, {3}}, + {L"\U00000300\U00000903", {768}, {2}}, + {L"\U00000300\U00000308\U00000903", {768}, {3}}, + {L"\U00000300\U00000904", {768, 2308}, {1, 2}}, + {L"\U00000300\U00000308\U00000904", {768, 2308}, {2, 3}}, + {L"\U00000300\U00000d4e", {768, 3406}, {1, 2}}, + {L"\U00000300\U00000308\U00000d4e", {768, 3406}, {2, 3}}, + {L"\U00000300\U00000915", {768, 2325}, {1, 2}}, + {L"\U00000300\U00000308\U00000915", {768, 2325}, {2, 3}}, {L"\U00000300\U0000231a", {768, 8986}, {1, 2}}, {L"\U00000300\U00000308\U0000231a", {768, 8986}, {2, 3}}, {L"\U00000300\U00000300", {768}, {2}}, {L"\U00000300\U00000308\U00000300", {768}, {3}}, + {L"\U00000300\U0000093c", {768}, {2}}, + {L"\U00000300\U00000308\U0000093c", {768}, {3}}, + {L"\U00000300\U0000094d", {768}, {2}}, + {L"\U00000300\U00000308\U0000094d", {768}, {3}}, {L"\U00000300\U0000200d", {768}, {2}}, {L"\U00000300\U00000308\U0000200d", {768}, {3}}, {L"\U00000300\U00000378", {768, 888}, {1, 2}}, {L"\U00000300\U00000308\U00000378", {768, 888}, {2, 3}}, + {L"\U0000093c\U00000020", {2364, 32}, {1, 2}}, + {L"\U0000093c\U00000308\U00000020", {2364, 32}, {2, 3}}, + {L"\U0000093c\U0000000d", {2364, 13}, {1, 2}}, + {L"\U0000093c\U00000308\U0000000d", {2364, 13}, {2, 3}}, + {L"\U0000093c\U0000000a", {2364, 10}, {1, 2}}, + {L"\U0000093c\U00000308\U0000000a", {2364, 10}, {2, 3}}, + {L"\U0000093c\U00000001", {2364, 1}, {1, 2}}, + {L"\U0000093c\U00000308\U00000001", {2364, 1}, {2, 3}}, + {L"\U0000093c\U0000034f", {2364}, {2}}, + {L"\U0000093c\U00000308\U0000034f", {2364}, {3}}, + {L"\U0000093c\U0001f1e6", {2364, 127462}, {1, 2}}, + {L"\U0000093c\U00000308\U0001f1e6", {2364, 127462}, {2, 3}}, + {L"\U0000093c\U00000600", {2364, 1536}, {1, 2}}, + {L"\U0000093c\U00000308\U00000600", {2364, 1536}, {2, 3}}, + {L"\U0000093c\U00000a03", {2364}, {2}}, + {L"\U0000093c\U00000308\U00000a03", {2364}, {3}}, + {L"\U0000093c\U00001100", {2364, 4352}, {1, 2}}, + {L"\U0000093c\U00000308\U00001100", {2364, 4352}, {2, 3}}, + {L"\U0000093c\U00001160", {2364, 4448}, {1, 2}}, + {L"\U0000093c\U00000308\U00001160", {2364, 4448}, {2, 3}}, + {L"\U0000093c\U000011a8", {2364, 4520}, {1, 2}}, + {L"\U0000093c\U00000308\U000011a8", {2364, 4520}, {2, 3}}, + {L"\U0000093c\U0000ac00", {2364, 44032}, {1, 2}}, + {L"\U0000093c\U00000308\U0000ac00", {2364, 44032}, {2, 3}}, + {L"\U0000093c\U0000ac01", {2364, 44033}, {1, 2}}, + {L"\U0000093c\U00000308\U0000ac01", {2364, 44033}, {2, 3}}, + {L"\U0000093c\U00000900", {2364}, {2}}, + {L"\U0000093c\U00000308\U00000900", {2364}, {3}}, + {L"\U0000093c\U00000903", {2364}, {2}}, + {L"\U0000093c\U00000308\U00000903", {2364}, {3}}, + {L"\U0000093c\U00000904", {2364, 2308}, {1, 2}}, + {L"\U0000093c\U00000308\U00000904", {2364, 2308}, {2, 3}}, + {L"\U0000093c\U00000d4e", {2364, 3406}, {1, 2}}, + {L"\U0000093c\U00000308\U00000d4e", {2364, 3406}, {2, 3}}, + {L"\U0000093c\U00000915", {2364, 2325}, {1, 2}}, + {L"\U0000093c\U00000308\U00000915", {2364, 2325}, {2, 3}}, + {L"\U0000093c\U0000231a", {2364, 8986}, {1, 2}}, + {L"\U0000093c\U00000308\U0000231a", {2364, 8986}, {2, 3}}, + {L"\U0000093c\U00000300", {2364}, {2}}, + {L"\U0000093c\U00000308\U00000300", {2364}, {3}}, + {L"\U0000093c\U0000093c", {2364}, {2}}, + {L"\U0000093c\U00000308\U0000093c", {2364}, {3}}, + {L"\U0000093c\U0000094d", {2364}, {2}}, + {L"\U0000093c\U00000308\U0000094d", {2364}, {3}}, + {L"\U0000093c\U0000200d", {2364}, {2}}, + {L"\U0000093c\U00000308\U0000200d", {2364}, {3}}, + {L"\U0000093c\U00000378", {2364, 888}, {1, 2}}, + {L"\U0000093c\U00000308\U00000378", {2364, 888}, {2, 3}}, + {L"\U0000094d\U00000020", {2381, 32}, {1, 2}}, + {L"\U0000094d\U00000308\U00000020", {2381, 32}, {2, 3}}, + {L"\U0000094d\U0000000d", {2381, 13}, {1, 2}}, + {L"\U0000094d\U00000308\U0000000d", {2381, 13}, {2, 3}}, + {L"\U0000094d\U0000000a", {2381, 10}, {1, 2}}, + {L"\U0000094d\U00000308\U0000000a", {2381, 10}, {2, 3}}, + {L"\U0000094d\U00000001", {2381, 1}, {1, 2}}, + {L"\U0000094d\U00000308\U00000001", {2381, 1}, {2, 3}}, + {L"\U0000094d\U0000034f", {2381}, {2}}, + {L"\U0000094d\U00000308\U0000034f", {2381}, {3}}, + {L"\U0000094d\U0001f1e6", {2381, 127462}, {1, 2}}, + {L"\U0000094d\U00000308\U0001f1e6", {2381, 127462}, {2, 3}}, + {L"\U0000094d\U00000600", {2381, 1536}, {1, 2}}, + {L"\U0000094d\U00000308\U00000600", {2381, 1536}, {2, 3}}, + {L"\U0000094d\U00000a03", {2381}, {2}}, + {L"\U0000094d\U00000308\U00000a03", {2381}, {3}}, + {L"\U0000094d\U00001100", {2381, 4352}, {1, 2}}, + {L"\U0000094d\U00000308\U00001100", {2381, 4352}, {2, 3}}, + {L"\U0000094d\U00001160", {2381, 4448}, {1, 2}}, + {L"\U0000094d\U00000308\U00001160", {2381, 4448}, {2, 3}}, + {L"\U0000094d\U000011a8", {2381, 4520}, {1, 2}}, + {L"\U0000094d\U00000308\U000011a8", {2381, 4520}, {2, 3}}, + {L"\U0000094d\U0000ac00", {2381, 44032}, {1, 2}}, + {L"\U0000094d\U00000308\U0000ac00", {2381, 44032}, {2, 3}}, + {L"\U0000094d\U0000ac01", {2381, 44033}, {1, 2}}, + {L"\U0000094d\U00000308\U0000ac01", {2381, 44033}, {2, 3}}, + {L"\U0000094d\U00000900", {2381}, {2}}, + {L"\U0000094d\U00000308\U00000900", {2381}, {3}}, + {L"\U0000094d\U00000903", {2381}, {2}}, + {L"\U0000094d\U00000308\U00000903", {2381}, {3}}, + {L"\U0000094d\U00000904", {2381, 2308}, {1, 2}}, + {L"\U0000094d\U00000308\U00000904", {2381, 2308}, {2, 3}}, + {L"\U0000094d\U00000d4e", {2381, 3406}, {1, 2}}, + {L"\U0000094d\U00000308\U00000d4e", {2381, 3406}, {2, 3}}, + {L"\U0000094d\U00000915", {2381, 2325}, {1, 2}}, + {L"\U0000094d\U00000308\U00000915", {2381, 2325}, {2, 3}}, + {L"\U0000094d\U0000231a", {2381, 8986}, {1, 2}}, + {L"\U0000094d\U00000308\U0000231a", {2381, 8986}, {2, 3}}, + {L"\U0000094d\U00000300", {2381}, {2}}, + {L"\U0000094d\U00000308\U00000300", {2381}, {3}}, + {L"\U0000094d\U0000093c", {2381}, {2}}, + {L"\U0000094d\U00000308\U0000093c", {2381}, {3}}, + {L"\U0000094d\U0000094d", {2381}, {2}}, + {L"\U0000094d\U00000308\U0000094d", {2381}, {3}}, + {L"\U0000094d\U0000200d", {2381}, {2}}, + {L"\U0000094d\U00000308\U0000200d", {2381}, {3}}, + {L"\U0000094d\U00000378", {2381, 888}, {1, 2}}, + {L"\U0000094d\U00000308\U00000378", {2381, 888}, {2, 3}}, {L"\U0000200d\U00000020", {8205, 32}, {1, 2}}, {L"\U0000200d\U00000308\U00000020", {8205, 32}, {2, 3}}, {L"\U0000200d\U0000000d", {8205, 13}, {1, 2}}, @@ -1826,8 +3542,8 @@ std::array, 602> data_utf32 = {{ {L"\U0000200d\U00000308\U0001f1e6", {8205, 127462}, {2, 3}}, {L"\U0000200d\U00000600", {8205, 1536}, {1, 2}}, {L"\U0000200d\U00000308\U00000600", {8205, 1536}, {2, 3}}, - {L"\U0000200d\U00000903", {8205}, {2}}, - {L"\U0000200d\U00000308\U00000903", {8205}, {3}}, + {L"\U0000200d\U00000a03", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000a03", {8205}, {3}}, {L"\U0000200d\U00001100", {8205, 4352}, {1, 2}}, {L"\U0000200d\U00000308\U00001100", {8205, 4352}, {2, 3}}, {L"\U0000200d\U00001160", {8205, 4448}, {1, 2}}, @@ -1838,10 +3554,24 @@ std::array, 602> data_utf32 = {{ {L"\U0000200d\U00000308\U0000ac00", {8205, 44032}, {2, 3}}, {L"\U0000200d\U0000ac01", {8205, 44033}, {1, 2}}, {L"\U0000200d\U00000308\U0000ac01", {8205, 44033}, {2, 3}}, + {L"\U0000200d\U00000900", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000900", {8205}, {3}}, + {L"\U0000200d\U00000903", {8205}, {2}}, + {L"\U0000200d\U00000308\U00000903", {8205}, {3}}, + {L"\U0000200d\U00000904", {8205, 2308}, {1, 2}}, + {L"\U0000200d\U00000308\U00000904", {8205, 2308}, {2, 3}}, + {L"\U0000200d\U00000d4e", {8205, 3406}, {1, 2}}, + {L"\U0000200d\U00000308\U00000d4e", {8205, 3406}, {2, 3}}, + {L"\U0000200d\U00000915", {8205, 2325}, {1, 2}}, + {L"\U0000200d\U00000308\U00000915", {8205, 2325}, {2, 3}}, {L"\U0000200d\U0000231a", {8205, 8986}, {1, 2}}, {L"\U0000200d\U00000308\U0000231a", {8205, 8986}, {2, 3}}, {L"\U0000200d\U00000300", {8205}, {2}}, {L"\U0000200d\U00000308\U00000300", {8205}, {3}}, + {L"\U0000200d\U0000093c", {8205}, {2}}, + {L"\U0000200d\U00000308\U0000093c", {8205}, {3}}, + {L"\U0000200d\U0000094d", {8205}, {2}}, + {L"\U0000200d\U00000308\U0000094d", {8205}, {3}}, {L"\U0000200d\U0000200d", {8205}, {2}}, {L"\U0000200d\U00000308\U0000200d", {8205}, {3}}, {L"\U0000200d\U00000378", {8205, 888}, {1, 2}}, @@ -1860,8 +3590,8 @@ std::array, 602> data_utf32 = {{ {L"\U00000378\U00000308\U0001f1e6", {888, 127462}, {2, 3}}, {L"\U00000378\U00000600", {888, 1536}, {1, 2}}, {L"\U00000378\U00000308\U00000600", {888, 1536}, {2, 3}}, - {L"\U00000378\U00000903", {888}, {2}}, - {L"\U00000378\U00000308\U00000903", {888}, {3}}, + {L"\U00000378\U00000a03", {888}, {2}}, + {L"\U00000378\U00000308\U00000a03", {888}, {3}}, {L"\U00000378\U00001100", {888, 4352}, {1, 2}}, {L"\U00000378\U00000308\U00001100", {888, 4352}, {2, 3}}, {L"\U00000378\U00001160", {888, 4448}, {1, 2}}, @@ -1872,10 +3602,24 @@ std::array, 602> data_utf32 = {{ {L"\U00000378\U00000308\U0000ac00", {888, 44032}, {2, 3}}, {L"\U00000378\U0000ac01", {888, 44033}, {1, 2}}, {L"\U00000378\U00000308\U0000ac01", {888, 44033}, {2, 3}}, + {L"\U00000378\U00000900", {888}, {2}}, + {L"\U00000378\U00000308\U00000900", {888}, {3}}, + {L"\U00000378\U00000903", {888}, {2}}, + {L"\U00000378\U00000308\U00000903", {888}, {3}}, + {L"\U00000378\U00000904", {888, 2308}, {1, 2}}, + {L"\U00000378\U00000308\U00000904", {888, 2308}, {2, 3}}, + {L"\U00000378\U00000d4e", {888, 3406}, {1, 2}}, + {L"\U00000378\U00000308\U00000d4e", {888, 3406}, {2, 3}}, + {L"\U00000378\U00000915", {888, 2325}, {1, 2}}, + {L"\U00000378\U00000308\U00000915", {888, 2325}, {2, 3}}, {L"\U00000378\U0000231a", {888, 8986}, {1, 2}}, {L"\U00000378\U00000308\U0000231a", {888, 8986}, {2, 3}}, {L"\U00000378\U00000300", {888}, {2}}, {L"\U00000378\U00000308\U00000300", {888}, {3}}, + {L"\U00000378\U0000093c", {888}, {2}}, + {L"\U00000378\U00000308\U0000093c", {888}, {3}}, + {L"\U00000378\U0000094d", {888}, {2}}, + {L"\U00000378\U00000308\U0000094d", {888}, {3}}, {L"\U00000378\U0000200d", {888}, {2}}, {L"\U00000378\U00000308\U0000200d", {888}, {3}}, {L"\U00000378\U00000378", {888, 888}, {1, 2}}, @@ -1903,7 +3647,18 @@ std::array, 602> data_utf32 = {{ {L"\U0001f6d1\U0000200d\U0001f6d1", {128721}, {3}}, {L"\U00000061\U0000200d\U0001f6d1", {97, 128721}, {2, 3}}, {L"\U00002701\U0000200d\U00002701", {9985}, {3}}, - {L"\U00000061\U0000200d\U00002701", {97, 9985}, {2, 3}}}}; + {L"\U00000061\U0000200d\U00002701", {97, 9985}, {2, 3}}, + {L"\U00000915\U00000924", {2325, 2340}, {1, 2}}, + {L"\U00000915\U0000094d\U00000924", {2325}, {3}}, + {L"\U00000915\U0000094d\U0000094d\U00000924", {2325}, {4}}, + {L"\U00000915\U0000094d\U0000200d\U00000924", {2325}, {4}}, + {L"\U00000915\U0000093c\U0000200d\U0000094d\U00000924", {2325}, {5}}, + {L"\U00000915\U0000093c\U0000094d\U0000200d\U00000924", {2325}, {5}}, + {L"\U00000915\U0000094d\U00000924\U0000094d\U0000092f", {2325}, {5}}, + {L"\U00000915\U0000094d\U00000061", {2325, 97}, {2, 3}}, + {L"\U00000061\U0000094d\U00000924", {97, 2340}, {2, 3}}, + {L"\U0000003f\U0000094d\U00000924", {63, 2340}, {2, 3}}, + {L"\U00000915\U0000094d\U0000094d\U00000924", {2325}, {4}}}}; #endif // TEST_HAS_NO_WIDE_CHARACTERS #endif // LIBCXX_TEST_STD_UTILITIES_FORMAT_FORMAT_STRING_FORMAT_STRING_STD_EXTENDED_GRAPHEME_CLUSTER_H diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp index 5f9873b51ac0d..c1fdd1f2098d2 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp @@ -53,6 +53,21 @@ static_assert(count_entries(cluster::__property::__LVT) == 10773); static_assert(count_entries(cluster::__property::__ZWJ) == 1); static_assert(count_entries(cluster::__property::__Extended_Pictographic) == 3537); +namespace inCB = std::__indic_conjunct_break; +constexpr int count_entries(inCB::__property property) { + return std::transform_reduce( + std::begin(inCB::__entries), std::end(inCB::__entries), 0, std::plus{}, [property](auto entry) { + if (static_cast(entry & 0b11) != property) + return 0; + + return 1 + static_cast((entry >> 2) & 0b1'1111'1111); + }); +} + +static_assert(count_entries(inCB::__property::__Linker) == 6); +static_assert(count_entries(inCB::__property::__Consonant) == 240); +static_assert(count_entries(inCB::__property::__Extend) == 884); + } // namespace template diff --git a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp index bc75d04740da0..5edf22eaacf31 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/fstream.cons/path.pass.cpp @@ -15,17 +15,50 @@ // plate > // class basic_fstream -// explicit basic_fstream(const filesystem::path& s, -// ios_base::openmode mode = ios_base::in|ios_base::out); +// template +// explicit basic_fstream(const T& s, ios_base::openmode mode = ios_base::in); // Since C++17 +// Constraints: is_same_v is true #include #include #include +#include + #include "test_macros.h" +#include "test_iterators.h" #include "platform_support.h" namespace fs = std::filesystem; +template +constexpr bool test_non_convert_to_path() { + // String types + static_assert(!std::is_constructible_v>); + static_assert(!std::is_constructible_v>); + + // Char* pointers + if constexpr (!std::is_same_v) + static_assert(!std::is_constructible_v); + + // Iterators + static_assert(!std::is_convertible_v>); + + return true; +} + +static_assert(test_non_convert_to_path()); + +#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_HAS_OPEN_WITH_WCHAR) +static_assert(test_non_convert_to_path()); +#endif // !TEST_HAS_NO_WIDE_CHARACTERS && !TEST_HAS_OPEN_WITH_WCHAR + +#ifndef TEST_HAS_NO_CHAR8_T +static_assert(test_non_convert_to_path()); +#endif // TEST_HAS_NO_CHAR8_T + +static_assert(test_non_convert_to_path()); +static_assert(test_non_convert_to_path()); + int main(int, char**) { fs::path p = get_temp_file_name(); { diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp index cfbb8419fe1c5..2f27fd8e6e93d 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ifstream.cons/path.pass.cpp @@ -17,8 +17,9 @@ // template > // class basic_ifstream -// explicit basic_ifstream(const filesystem::path& s, -// ios_base::openmode mode = ios_base::in); +// template +// explicit basic_ifstream(const T& s, ios_base::openmode mode = ios_base::in); // Since C++17 +// Constraints: is_same_v is true #include #include @@ -26,9 +27,39 @@ #include #include "test_macros.h" +#include "test_iterators.h" namespace fs = std::filesystem; +template +constexpr bool test_non_convert_to_path() { + // String types + static_assert(!std::is_constructible_v>); + static_assert(!std::is_constructible_v>); + + // Char* pointers + if constexpr (!std::is_same_v) + static_assert(!std::is_constructible_v); + + // Iterators + static_assert(!std::is_convertible_v>); + + return true; +} + +static_assert(test_non_convert_to_path()); + +#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_HAS_OPEN_WITH_WCHAR) +static_assert(test_non_convert_to_path()); +#endif // !TEST_HAS_NO_WIDE_CHARACTERS && !TEST_HAS_OPEN_WITH_WCHAR + +#ifndef TEST_HAS_NO_CHAR8_T +static_assert(test_non_convert_to_path()); +#endif // TEST_HAS_NO_CHAR8_T + +static_assert(test_non_convert_to_path()); +static_assert(test_non_convert_to_path()); + int main(int, char**) { { fs::path p; diff --git a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp index 316ed776a48b5..e55adfd83fc3c 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/ofstream.cons/path.pass.cpp @@ -15,7 +15,9 @@ // plate > // class basic_ofstream -// explicit basic_ofstream(const filesystem::path& s, ios_base::openmode mode = ios_base::out); +// template +// explicit basic_ifstream(const T& s, ios_base::openmode mode = ios_base::in); // Since C++17 +// Constraints: is_same_v is true #include #include @@ -24,9 +26,39 @@ #include "platform_support.h" #include "test_macros.h" +#include "test_iterators.h" namespace fs = std::filesystem; +template +constexpr bool test_non_convert_to_path() { + // String types + static_assert(!std::is_constructible_v>); + static_assert(!std::is_constructible_v>); + + // Char* pointers + if constexpr (!std::is_same_v) + static_assert(!std::is_constructible_v); + + // Iterators + static_assert(!std::is_convertible_v>); + + return true; +} + +static_assert(test_non_convert_to_path()); + +#if !defined(TEST_HAS_NO_WIDE_CHARACTERS) && !defined(TEST_HAS_OPEN_WITH_WCHAR) +static_assert(test_non_convert_to_path()); +#endif // !TEST_HAS_NO_WIDE_CHARACTERS && !TEST_HAS_OPEN_WITH_WCHAR + +#ifndef TEST_HAS_NO_CHAR8_T +static_assert(test_non_convert_to_path()); +#endif // TEST_HAS_NO_CHAR8_T + +static_assert(test_non_convert_to_path()); +static_assert(test_non_convert_to_path()); + int main(int, char**) { fs::path p = get_temp_file_name(); { diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.formatted.print/locale-specific_form.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.formatted.print/locale-specific_form.pass.cpp index 6b62e2f1754de..2e19e38e2ed04 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.formatted.print/locale-specific_form.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.formatted.print/locale-specific_form.pass.cpp @@ -26,6 +26,7 @@ // void print(ostream& os, format_string fmt, Args&&... args); // template // void println(ostream& os, format_string fmt, Args&&... args); +// void println(ostream& os); // since C++26 // // void vprint_unicode(ostream& os, string_view fmt, format_args args); // void vprint_nonunicode(ostream& os, string_view fmt, format_args args); @@ -67,7 +68,7 @@ test(std::stringstream& stream, std::string expected, test_format_string(args)...); std::string out = stream.str(); TEST_REQUIRE(out == expected, @@ -111,6 +112,7 @@ static void test(std::string expected, std::locale loc, test_format_string { string_type do_truename() const override { return "gültig"; } string_type do_falsename() const override { return "ungültig"; } @@ -2188,12 +2190,47 @@ static void test_floating_point() { test_floating_point_default_precision(); } +static void test_println_blank_line(std::stringstream& stream) { + std::string expected{'\n'}; + stream.str(""); + + std::println(stream); + std::string out = stream.str(); + TEST_REQUIRE(out == expected, + TEST_WRITE_CONCATENATED("\nExpected output (blank line) ", expected, "\nActual output ", out, '\n')); +} + +static void test_println_blank_line(std::locale loc) { + std::stringstream stream; + stream.imbue(loc); + test_println_blank_line(stream); +} + +static void test_println_blank_line() { + std::locale::global(std::locale(LOCALE_en_US_UTF_8)); + assert(std::locale().name() == LOCALE_en_US_UTF_8); + std::stringstream stream; + test_println_blank_line(stream); + + std::locale loc = std::locale(std::locale(), new numpunct()); + std::locale::global(loc); + test_println_blank_line(std::locale(LOCALE_en_US_UTF_8)); + +#ifndef TEST_HAS_NO_UNICODE + + std::locale loc_unicode = std::locale(std::locale(), new numpunct_unicode()); + test_println_blank_line(loc_unicode); + +#endif // TEST_HAS_NO_UNICODE +} + int main(int, char**) { test_bool(); test_integer(); test_floating_point(); test_floating_point(); test_floating_point(); + test_println_blank_line(); return 0; } diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.formatted.print/println.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.formatted.print/println.pass.cpp index 479a3de0a93c8..19a02638a9da1 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.formatted.print/println.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.formatted.print/println.pass.cpp @@ -17,6 +17,7 @@ // template // void println(ostream& os, format_string fmt, Args&&... args); +// void println(ostream& os); // since C++26 // [ostream.formatted.print]/3 // If the function is vprint_unicode and os is a stream that refers to @@ -55,8 +56,20 @@ auto test_exception = [](std::string_view, std::string_view, Args // The exceptions are tested by other functions that don't use the basic-format-string as fmt argument. }; +void test_println_blank_line() { + std::string expected{'\n'}; + + std::stringstream sstr; + std::println(sstr); + + std::string out = sstr.str(); + TEST_REQUIRE(out == expected, + TEST_WRITE_CONCATENATED("\nExpected output (blank line) ", expected, "\nActual output ", out, '\n')); +}; + int main(int, char**) { print_tests(test_file, test_exception); + test_println_blank_line(); return 0; } diff --git a/libcxx/test/std/input.output/iostream.format/print.fun/no_file_description.pass.cpp b/libcxx/test/std/input.output/iostream.format/print.fun/no_file_description.pass.cpp index f502616b677b7..ffa48c5e745d6 100644 --- a/libcxx/test/std/input.output/iostream.format/print.fun/no_file_description.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/print.fun/no_file_description.pass.cpp @@ -25,8 +25,10 @@ // template // void print(FILE* stream, format_string fmt, Args&&... args); +// void println(); // Since C++26 // template // void println(FILE* stream, format_string fmt, Args&&... args); +// void println(FILE* stream); // Since C++26 // void vprint_unicode(FILE* stream, string_view fmt, format_args args); // void vprint_nonunicode(FILE* stream, string_view fmt, format_args args); @@ -63,6 +65,20 @@ static void test_println() { assert(std::string_view(buffer.data(), pos) == "hello world!\n"); } +static void test_println_blank_line() { + std::array buffer{0}; + + FILE* file = fmemopen(buffer.data(), buffer.size(), "wb"); + assert(file); + + std::println(file); + long pos = std::ftell(file); + std::fclose(file); + + assert(pos > 0); + assert(std::string_view(buffer.data(), pos) == "\n"); +} + static void test_vprint_unicode() { std::array buffer{0}; @@ -96,6 +112,7 @@ static void test_vprint_nonunicode() { int main(int, char**) { test_print(); test_println(); + test_println_blank_line(); test_vprint_unicode(); test_vprint_nonunicode(); diff --git a/libcxx/test/std/input.output/iostream.format/print.fun/println.blank_line.sh.cpp b/libcxx/test/std/input.output/iostream.format/print.fun/println.blank_line.sh.cpp new file mode 100644 index 0000000000000..a262c287108a4 --- /dev/null +++ b/libcxx/test/std/input.output/iostream.format/print.fun/println.blank_line.sh.cpp @@ -0,0 +1,50 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: no-filesystem +// UNSUPPORTED: executor-has-no-bash +// UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME + +// FIXME PRINT How to test println on Windows? +// XFAIL: msvc, target={{.+}}-windows-gnu + +// XFAIL: availability-fp_to_chars-missing + +// + +// void println(); + +// Testing this properly is quite hard; the function unconditionally +// writes to stdout. When stdout is redirected to a file it is no longer +// considered a terminal. The function is a small wrapper around +// +// template +// void println(FILE* stream, format_string fmt, Args&&... args); +// +// So do minimal tests for this function and rely on the FILE* overload +// to do more testing. +// +// The testing is based on the testing for std::cout. + +// TODO PRINT Use lit builtin echo + +// FILE_DEPENDENCIES: echo.sh +// RUN: %{build} +// RUN: %{exec} bash echo.sh -ne "println blank line test: \n" > %t.expected +// RUN: %{exec} "%t.exe" > %t.actual +// RUN: diff -u %t.actual %t.expected + +#include + +int main(int, char**) { + // On some configurations the `diff -u` test fails if we print a single blank line character `\n`, so we print some text first. + std::print("println blank line test: "); + std::println(); + + return 0; +} diff --git a/libcxx/test/std/input.output/iostream.format/print.fun/println.file.pass.cpp b/libcxx/test/std/input.output/iostream.format/print.fun/println.file.pass.cpp index 07272ebb57e5f..2f088e7a7db5f 100644 --- a/libcxx/test/std/input.output/iostream.format/print.fun/println.file.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/print.fun/println.file.pass.cpp @@ -129,6 +129,29 @@ static void test_new_line() { } } +static void test_println_blank_line() { + // Text does newline translation. + { + FILE* file = fopen(filename.c_str(), "w"); + assert(file); + + std::println(file); +#ifndef _WIN32 + assert(std::ftell(file) == 1); +#else + assert(std::ftell(file) == 2); +#endif + } + // Binary no newline translation. + { + FILE* file = fopen(filename.c_str(), "wb"); + assert(file); + + std::println(file); + assert(std::ftell(file) == 1); + } +} + int main(int, char**) { print_tests(test_file, test_exception); @@ -137,6 +160,7 @@ int main(int, char**) { #endif test_read_only(); test_new_line(); + test_println_blank_line(); return 0; } diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp index fa4d9baa28372..aeb09a30b4259 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp @@ -17,7 +17,6 @@ /* Constant Value __cpp_lib_bind_back 202202L [C++23] - 202306L [C++26] __cpp_lib_bind_front 201907L [C++20] 202306L [C++26] __cpp_lib_boyer_moore_searcher 201603L [C++17] @@ -337,17 +336,11 @@ #elif TEST_STD_VER == 23 -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_bind_back -# error "__cpp_lib_bind_back should be defined in c++23" -# endif -# if __cpp_lib_bind_back != 202202L -# error "__cpp_lib_bind_back should have the value 202202L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_bind_back -# error "__cpp_lib_bind_back should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_bind_back +# error "__cpp_lib_bind_back should be defined in c++23" +# endif +# if __cpp_lib_bind_back != 202202L +# error "__cpp_lib_bind_back should have the value 202202L in c++23" # endif # ifndef __cpp_lib_bind_front @@ -447,17 +440,11 @@ #elif TEST_STD_VER > 23 -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_bind_back -# error "__cpp_lib_bind_back should be defined in c++26" -# endif -# if __cpp_lib_bind_back != 202306L -# error "__cpp_lib_bind_back should have the value 202306L in c++26" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_bind_back -# error "__cpp_lib_bind_back should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_bind_back +# error "__cpp_lib_bind_back should be defined in c++26" +# endif +# if __cpp_lib_bind_back != 202202L +# error "__cpp_lib_bind_back should have the value 202202L in c++26" # endif # ifndef __cpp_lib_bind_front diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 5055786c2d458..3ec548f56cea1 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -39,7 +39,6 @@ __cpp_lib_atomic_wait 201907L [C++20] __cpp_lib_barrier 201907L [C++20] __cpp_lib_bind_back 202202L [C++23] - 202306L [C++26] __cpp_lib_bind_front 201907L [C++20] 202306L [C++26] __cpp_lib_bit_cast 201806L [C++20] @@ -4605,17 +4604,11 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_bind_back -# error "__cpp_lib_bind_back should be defined in c++23" -# endif -# if __cpp_lib_bind_back != 202202L -# error "__cpp_lib_bind_back should have the value 202202L in c++23" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_bind_back -# error "__cpp_lib_bind_back should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_bind_back +# error "__cpp_lib_bind_back should be defined in c++23" +# endif +# if __cpp_lib_bind_back != 202202L +# error "__cpp_lib_bind_back should have the value 202202L in c++23" # endif # ifndef __cpp_lib_bind_front @@ -6240,17 +6233,11 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_bind_back -# error "__cpp_lib_bind_back should be defined in c++26" -# endif -# if __cpp_lib_bind_back != 202306L -# error "__cpp_lib_bind_back should have the value 202306L in c++26" -# endif -# else // _LIBCPP_VERSION -# ifdef __cpp_lib_bind_back -# error "__cpp_lib_bind_back should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_bind_back +# error "__cpp_lib_bind_back should be defined in c++26" +# endif +# if __cpp_lib_bind_back != 202202L +# error "__cpp_lib_bind_back should have the value 202202L in c++26" # endif # ifndef __cpp_lib_bind_front diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp index e99a85a873981..fbc0bb4ace3ac 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/ctor.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wbuffer_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp index 5a5a3f5a3462b..91ebd9eb9b041 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/overflow.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wbuffer_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp index 926f661a2d44e..28e54cec366e4 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/pbackfail.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wbuffer_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp index cdbabafcdf5a7..b53516b4df77b 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/rdbuf.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wbuffer_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp index 87d9061dd6df5..1947e811c5536 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/seekoff.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wbuffer_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp index fe77d9b3bcc85..6ae7c9d66d1ed 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/state.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wbuffer_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp index 01a112e03e8e9..1a45036215c46 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/test.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wbuffer_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp index 0e6f77fff9d1f..65ec28cdd977c 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.buffer/underflow.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wbuffer_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp index 9627d2ca3312b..5585ec86d771a 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wstring_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp index 76ccd1ac9f6de..90dd81885ac21 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wstring_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp index 454576d8035d3..e55b21169d54b 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_codecvt_state.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wstring_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp index 3d7d8c601c1bf..27f878463b97f 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_copy.pass.cpp @@ -12,7 +12,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wstring_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp index e5da324196ff1..937a276af04f5 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/ctor_err_string.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wstring_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/depr.verify.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/depr.verify.cpp index c520d34a23802..f8bd156bdd5f6 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/depr.verify.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/depr.verify.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT + // UNSUPPORTED: c++03, c++11, c++14, c++26 // UNSUPPORTED: no-wide-characters diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp index f745cbc202d62..59939e1cf7c65 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wstring_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp index d7e1989e6a172..f59730c5b29d8 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/state.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wstring_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp index 19ffdd57d2ecc..8c5348477aee1 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // wstring_convert diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp index 1987a06a9048d..bec4f7a1d8029 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/types.pass.cpp @@ -8,7 +8,7 @@ // -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // template, diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp new file mode 100644 index 0000000000000..d85c8ba52622a --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// const time_zone* current_zone(); + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +#ifdef _WIN32 +static void set_tz(std::string zone) { + // Note Windows does not have setenv, only putenv + // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/putenv-s-wputenv-s?view=msvc-170 + // Unlike POSIX it does not mention the string of putenv becomes part + // of the environment. + + int status = _putenv_s("TZ", zone.c_str(), 1); + assert(status == 0); +} + +#else +static void set_tz(const std::string& zone) { + int status = setenv("TZ", zone.c_str(), 1); + assert(status == 0); +} +#endif + +static void test_zone(const std::string& zone) { + set_tz(zone); + const std::chrono::time_zone* tz = std::chrono::current_zone(); + assert(tz); + assert(tz->name() == zone); +} + +static void test_link(const std::string& link, std::string_view zone) { + set_tz(link); + const std::chrono::time_zone* tz = std::chrono::current_zone(); + assert(tz); + assert(tz->name() == zone); +} + +int main(int, const char**) { + const std::chrono::time_zone* tz = std::chrono::current_zone(); + // Returns a valid time zone, the value depends on the OS settings. + assert(tz); + // setting the environment to an invalid value returns the value of + // the OS setting. + set_tz("This is not a time zone"); + assert(tz == std::chrono::current_zone()); + + const std::chrono::tzdb& db = std::chrono::get_tzdb(); + for (const auto& zone : db.zones) + test_zone(std::string{zone.name()}); + + for (const auto& link : db.links) + test_link(std::string{link.name()}, link.target()); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp new file mode 100644 index 0000000000000..c3142a86bf9d6 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// const time_zone* locate_zone(string_view tz_name); + +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +static void test_zone(std::string_view zone) { + const std::chrono::time_zone* tz = std::chrono::locate_zone(zone); + assert(tz); + assert(tz->name() == zone); +} + +static void test_link(std::string_view link, std::string_view zone) { + const std::chrono::time_zone* tz = std::chrono::locate_zone(link); + assert(tz); + assert(tz->name() == zone); +} + +static void test_exception([[maybe_unused]] std::string_view zone) { + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + std::string_view what{"tzdb: requested time zone not found"}; + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD std::chrono::locate_zone(zone)); +} + +int main(int, const char**) { + const std::chrono::tzdb& db = std::chrono::get_tzdb(); + for (const auto& zone : db.zones) + test_zone(zone.name()); + + for (const auto& link : db.links) + test_link(link.name(), link.target()); + + test_exception("This is not a time zone"); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp new file mode 100644 index 0000000000000..7b4218cc8421b --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// struct tzdb + +// const time_zone* current_zone() const; + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +#ifdef _WIN32 +static void set_tz(std::string zone) { + // Note Windows does not have setenv, only putenv + // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/putenv-s-wputenv-s?view=msvc-170 + // Unlike POSIX it does not mention the string of putenv becomes part + // of the environment. + + int status = _putenv_s("TZ", zone.c_str(), 1); + assert(status == 0); +} + +#else +static void set_tz(const std::string& zone) { + int status = setenv("TZ", zone.c_str(), 1); + assert(status == 0); +} +#endif + +static void test_zone(const std::string& zone) { + set_tz(zone); + const std::chrono::time_zone* tz = std::chrono::get_tzdb().current_zone(); + assert(tz); + assert(tz->name() == zone); +} + +static void test_link(const std::string& link, std::string_view zone) { + set_tz(link); + const std::chrono::time_zone* tz = std::chrono::get_tzdb().current_zone(); + assert(tz); + assert(tz->name() == zone); +} + +int main(int, const char**) { + const std::chrono::time_zone* tz = std::chrono::get_tzdb().current_zone(); + // Returns a valid time zone, the value depends on the OS settings. + assert(tz); + // setting the environment to an invalid value returns the value of + // the OS setting. + set_tz("This is not a time zone"); + assert(tz == std::chrono::get_tzdb().current_zone()); + + const std::chrono::tzdb& db = std::chrono::get_tzdb(); + for (const auto& zone : db.zones) + test_zone(std::string{zone.name()}); + + for (const auto& link : db.links) + test_link(std::string{link.name()}, link.target()); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp new file mode 100644 index 0000000000000..12987f6c89d80 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// struct tzdb + +// const time_zone* locate_zone(string_view tz_name) const; + +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +static void test_zone(std::string_view zone) { + const std::chrono::time_zone* tz = std::chrono::get_tzdb().locate_zone(zone); + assert(tz); + assert(tz->name() == zone); +} + +static void test_link(std::string_view link, std::string_view zone) { + const std::chrono::time_zone* tz = std::chrono::get_tzdb().locate_zone(link); + assert(tz); + assert(tz->name() == zone); +} + +static void test_exception([[maybe_unused]] std::string_view zone) { + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + std::string_view what{"tzdb: requested time zone not found"}; + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD std::chrono::get_tzdb().locate_zone(zone)); +} + +int main(int, const char**) { + const std::chrono::tzdb& db = std::chrono::get_tzdb(); + for (const auto& zone : db.zones) + test_zone(zone.name()); + + for (const auto& link : db.links) + test_link(link.name(), link.target()); + + test_exception("This is not a time zone"); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.info/time.zone.info.sys/sys_info.members.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.info/time.zone.info.sys/sys_info.members.pass.cpp new file mode 100644 index 0000000000000..2510792c2280b --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.info/time.zone.info.sys/sys_info.members.pass.cpp @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// XFAIL: libcpp-has-no-incomplete-tzdb + +// + +// struct sys_info { +// sys_seconds begin; +// sys_seconds end; +// seconds offset; +// minutes save; +// string abbrev; +// }; + +// Validates whether: +// - The members are present as non-const members. +// - The struct is an aggregate. + +#include +#include +#include + +int main(int, const char**) { + static_assert(std::is_aggregate_v); + + std::chrono::sys_info sys_info{ + .begin = std::chrono::sys_seconds::min(), + .end = std::chrono::sys_seconds::max(), + .offset = std::chrono::seconds(0), + .save = std::chrono::minutes(0), + .abbrev = "UTC"}; + + [[maybe_unused]] std::chrono::sys_seconds& begin = sys_info.begin; + [[maybe_unused]] std::chrono::sys_seconds& end = sys_info.end; + [[maybe_unused]] std::chrono::seconds& offset = sys_info.offset; + [[maybe_unused]] std::chrono::minutes& save = sys_info.save; + [[maybe_unused]] std::string& abbrev = sys_info.abbrev; + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp new file mode 100644 index 0000000000000..2ad408968589e --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/get_info.sys_time.pass.cpp @@ -0,0 +1,1374 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class time_zone; + +// template +// sys_info get_info(const sys_time<_Duration>& time) const; + +// This test uses the system provided database. This makes the test portable, +// but may cause failures when the database information changes. Historic data +// may change if new facts are uncovered, future data may change when regions +// change their time zone or daylight saving time. Most tests will not look in +// the future to attempt to avoid issues. All tests list the data on which they +// are based, this makes debugging easier upon failure; including to see whether +// the provided data has not been changed +// +// +// The data in the tests can be validated by using the zdump tool. For +// example +// zdump -v Asia/Hong_Kong +// show all transistions in the Hong Kong time zone. Or +// zdump -c1970,1980 -v Asia/Hong_Kong +// shows all transitions in Hong Kong between 1970 and 1980. + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +/***** ***** HELPERS ***** *****/ + +[[nodiscard]] static std::chrono::sys_seconds to_sys_seconds( + std::chrono::year year, + std::chrono::month month, + std::chrono::day day, + std::chrono::hours h = std::chrono::hours(0), + std::chrono::minutes m = std::chrono::minutes{0}, + std::chrono::seconds s = std::chrono::seconds{0}) { + std::chrono::year_month_day result{year, month, day}; + + return std::chrono::time_point_cast(static_cast(result)) + h + m + s; +} + +static void assert_equal(const std::chrono::sys_info& lhs, const std::chrono::sys_info& rhs) { + TEST_REQUIRE(lhs.begin == rhs.begin, + TEST_WRITE_CONCATENATED("\nBegin:\nExpected output ", lhs.begin, "\nActual output ", rhs.begin, '\n')); + TEST_REQUIRE(lhs.end == rhs.end, + TEST_WRITE_CONCATENATED("\nEnd:\nExpected output ", lhs.end, "\nActual output ", rhs.end, '\n')); + TEST_REQUIRE( + lhs.offset == rhs.offset, + TEST_WRITE_CONCATENATED("\nOffset:\nExpected output ", lhs.offset, "\nActual output ", rhs.offset, '\n')); + TEST_REQUIRE(lhs.save == rhs.save, + TEST_WRITE_CONCATENATED("\nSave:\nExpected output ", lhs.save, "\nActual output ", rhs.save, '\n')); + TEST_REQUIRE( + lhs.abbrev == rhs.abbrev, + TEST_WRITE_CONCATENATED("\nAbbrev:\nExpected output ", lhs.abbrev, "\nActual output ", rhs.abbrev, '\n')); +} + +static void assert_equal(std::string_view expected, const std::chrono::sys_info& value) { + // Note the output of operator<< is implementation defined, use this + // format to keep the test portable. + std::string result = std::format( + "[{}, {}) {:%T} {:%Q%q} {}", + value.begin, + value.end, + std::chrono::hh_mm_ss{value.offset}, + value.save, + value.abbrev); + + TEST_REQUIRE(expected == result, + TEST_WRITE_CONCATENATED("\nExpected output ", expected, "\nActual output ", result, '\n')); +} + +static void +assert_range(std::string_view expected, const std::chrono::sys_info& begin, const std::chrono::sys_info& end) { + assert_equal(expected, begin); + assert_equal(expected, end); +} + +static void assert_cycle( + std::string_view expected_1, + const std::chrono::sys_info& begin_1, + const std::chrono::sys_info& end_1, + std::string_view expected_2, + const std::chrono::sys_info& begin_2, + const std::chrono::sys_info& end_2 + +) { + assert_range(expected_1, begin_1, end_1); + assert_range(expected_2, begin_2, end_2); +} + +/***** ***** TESTS ***** *****/ + +static void test_gmt() { + // Simple zone always valid, no rule entries, lookup using a link. + // L Etc/GMT GMT + // Z Etc/GMT 0 - GMT + + const std::chrono::time_zone* tz = std::chrono::locate_zone("GMT"); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + std::chrono::sys_seconds::max(), + std::chrono::seconds(0), + std::chrono::minutes(0), + "GMT"), + tz->get_info(std::chrono::sys_seconds::min())); + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + std::chrono::sys_seconds::max(), + std::chrono::seconds(0), + std::chrono::minutes(0), + "GMT"), + tz->get_info(std::chrono::sys_seconds(std::chrono::seconds{0}))); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + std::chrono::sys_seconds::max(), + std::chrono::seconds(0), + std::chrono::minutes(0), + "GMT"), + tz->get_info(std::chrono::sys_seconds::max() - std::chrono::seconds{1})); // max is not valid +} + +static void test_durations() { + // Doesn't test a location, instead tests whether different duration + // specializations work. + const std::chrono::time_zone* tz = std::chrono::locate_zone("GMT"); + + // Using the GMT zone means every call gives the same result. + std::chrono::sys_info expected( + std::chrono::sys_seconds::min(), + std::chrono::sys_seconds::max(), + std::chrono::seconds(0), + std::chrono::minutes(0), + "GMT"); + + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); + assert_equal(expected, tz->get_info(std::chrono::sys_time{})); +} + +static void test_indian_kerguelen() { + // One change, no rules, no dst changes. + + // Z Indian/Kerguelen 0 - -00 1950 + // 5 - +05 + + const std::chrono::time_zone* tz = std::chrono::locate_zone("Indian/Kerguelen"); + + std::chrono::sys_seconds transition = + to_sys_seconds(std::chrono::year(1950), std::chrono::January, std::chrono::day(1)); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), // + transition, // + std::chrono::seconds(0), // + std::chrono::minutes(0), // + "-00"), // + tz->get_info(std::chrono::sys_seconds::min())); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), // + transition, // + std::chrono::seconds(0), // + std::chrono::minutes(0), // + "-00"), // + tz->get_info(transition - std::chrono::seconds{1})); + + assert_equal( + std::chrono::sys_info( + transition, // + std::chrono::sys_seconds::max(), // + std::chrono::hours(5), // + std::chrono::minutes(0), // + "+05"), // + tz->get_info(transition)); +} + +static void test_antarctica_syowa() { + // One change, no rules, no dst changes + // This change uses an ON field with a day number + // + // There don't seem to be rule-less zones that use last day or a + // contrained day + + // Z Antarctica/Syowa 0 - -00 1957 Ja 29 + // 3 - +03 + + const std::chrono::time_zone* tz = std::chrono::locate_zone("Antarctica/Syowa"); + + std::chrono::sys_seconds transition = + to_sys_seconds(std::chrono::year(1957), std::chrono::January, std::chrono::day(29)); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), // + transition, // + std::chrono::seconds(0), // + std::chrono::minutes(0), // + "-00"), // + tz->get_info(std::chrono::sys_seconds::min())); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), // + transition, // + std::chrono::seconds(0), // + std::chrono::minutes(0), // + "-00"), // + tz->get_info(transition - std::chrono::seconds(1))); + + assert_equal( + std::chrono::sys_info( + transition, // + std::chrono::sys_seconds::max(), // + std::chrono::hours(3), // + std::chrono::minutes(0), // + "+03"), // + tz->get_info(transition)); +} + +static void test_asia_hong_kong() { + // A more typical entry, first some hard-coded entires and then at the + // end a rules based entry. This rule is valid for its entire period + // + // Z Asia/Hong_Kong 7:36:42 - LMT 1904 O 30 0:36:42 + // 8 - HKT 1941 Jun 15 3 + // 8 1 HKST 1941 O 1 4 + // 8 0:30 HKWT 1941 D 25 + // 9 - JST 1945 N 18 2 + // 8 HK HK%sT + // + // R HK 1946 o - Ap 21 0 1 S + // R HK 1946 o - D 1 3:30s 0 - + // R HK 1947 o - Ap 13 3:30s 1 S + // R HK 1947 o - N 30 3:30s 0 - + // R HK 1948 o - May 2 3:30s 1 S + // R HK 1948 1952 - O Su>=28 3:30s 0 - + // R HK 1949 1953 - Ap Su>=1 3:30 1 S + // R HK 1953 1964 - O Su>=31 3:30 0 - + // R HK 1954 1964 - Mar Su>=18 3:30 1 S + // R HK 1965 1976 - Ap Su>=16 3:30 1 S + // R HK 1965 1976 - O Su>=16 3:30 0 - + // R HK 1973 o - D 30 3:30 1 S + // R HK 1979 o - May 13 3:30 1 S + // R HK 1979 o - O 21 3:30 0 - + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Asia/Hong_Kong"); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), // 7:36:42 - LMT 1904 O 30 0:36:42 + 7h + 36min + 42s, + 0min, + "LMT"), + tz->get_info(std::chrono::sys_seconds::min())); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1904y, std::chrono::October, 29d, 17h), // 7:36:42 - LMT 1904 O 30 0:36:42 + 7h + 36min + 42s, + 0min, + "LMT"), + tz->get_info(to_sys_seconds(1904y, std::chrono::October, 29d, 16h, 59min, 59s))); + + assert_range("[1904-10-29 17:00:00, 1941-06-14 19:00:00) 08:00:00 0min HKT", // 8 - HKT 1941 Jun 15 3 + tz->get_info(to_sys_seconds(1904y, std::chrono::October, 29d, 17h)), + tz->get_info(to_sys_seconds(1941y, std::chrono::June, 14d, 18h, 59min, 59s))); + + assert_range("[1941-06-14 19:00:00, 1941-09-30 19:00:00) 09:00:00 60min HKST", // 8 1 HKST 1941 O 1 4 + tz->get_info(to_sys_seconds(1941y, std::chrono::June, 14d, 19h)), + tz->get_info(to_sys_seconds(1941y, std::chrono::September, 30d, 18h, 59min, 59s))); + + assert_range("[1941-09-30 19:00:00, 1941-12-24 15:30:00) 08:30:00 30min HKWT", // 8 0:30 HKWT 1941 D 25 + tz->get_info(to_sys_seconds(1941y, std::chrono::September, 30d, 19h)), + tz->get_info(to_sys_seconds(1941y, std::chrono::December, 24d, 15h, 29min, 59s))); + + assert_range("[1941-12-24 15:30:00, 1945-11-17 17:00:00) 09:00:00 0min JST", // 9 - JST 1945 N 18 2 + tz->get_info(to_sys_seconds(1941y, std::chrono::December, 24d, 15h, 30min)), + tz->get_info(to_sys_seconds(1945y, std::chrono::November, 17d, 16h, 59min, 59s))); + + assert_range("[1945-11-17 17:00:00, 1946-04-20 16:00:00) 08:00:00 0min HKT", // 8 HK%sT + tz->get_info(to_sys_seconds(1945y, std::chrono::November, 17d, 17h)), + tz->get_info(to_sys_seconds(1946y, std::chrono::April, 20d, 15h, 59min, 59s))); + + assert_cycle( // 8 HK%sT + "[1946-04-20 16:00:00, 1946-11-30 19:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1946y, std::chrono::April, 20d, 16h)), // 1946 o Ap 21 0 1 S + tz->get_info(to_sys_seconds(1946y, std::chrono::November, 30d, 19h, 29min, 59s)), // 1946 o D 1 3:30s 0 - + "[1946-11-30 19:30:00, 1947-04-12 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1946y, std::chrono::November, 30d, 19h, 30min)), // 1946 o D 1 3:30s 0 - + tz->get_info(to_sys_seconds(1947y, std::chrono::April, 12d, 19h, 29min, 59s))); // 1947 o Ap 13 3:30s 1 S + + assert_cycle( // 8 HK%sT + "[1947-04-12 19:30:00, 1947-11-29 19:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1947y, std::chrono::April, 12d, 19h, 30min)), // 1947 o Ap 13 3:30s 1 S + tz->get_info(to_sys_seconds(1947y, std::chrono::November, 29d, 19h, 29min, 59s)), // 1947 o N 30 3:30s 0 - + "[1947-11-29 19:30:00, 1948-05-01 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1947y, std::chrono::November, 29d, 19h, 30min)), // 1947 o N 30 3:30s 0 - + tz->get_info(to_sys_seconds(1948y, std::chrono::May, 1d, 19h, 29min, 59s))); // 1948 o May 2 3:30s 1 S + + assert_cycle( // 8 HK%sT + "[1948-05-01 19:30:00, 1948-10-30 19:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1948y, std::chrono::May, 1d, 19h, 30min)), // 1948 o May 2 3:30s 1 S + tz->get_info(to_sys_seconds(1948y, std::chrono::October, 30d, 19h, 29min, 59s)), // 1948 1952 O Su>=28 3:30s 0 - + "[1948-10-30 19:30:00, 1949-04-02 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1948y, std::chrono::October, 30d, 19h, 30min)), // 1948 1952 O Su>=28 3:30s 0 - + tz->get_info(to_sys_seconds(1949y, std::chrono::April, 2d, 19h, 29min, 59s))); // 1949 1953 Ap Su>=1 3:30 1 S + + assert_cycle( // 8 HK%sT + "[1949-04-02 19:30:00, 1949-10-29 19:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1949y, std::chrono::April, 2d, 19h, 30min)), // 1949 1953 Ap Su>=1 3:30 1 S + tz->get_info(to_sys_seconds(1949y, std::chrono::October, 29d, 19h, 29min, 59s)), // 1948 1952 O Su>=28 3:30s 0 + "[1949-10-29 19:30:00, 1950-04-01 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1949y, std::chrono::October, 29d, 19h, 30min)), // 1948 1952 O Su>=28 3:30s 0 + tz->get_info(to_sys_seconds(1950y, std::chrono::April, 1d, 19h, 29min, 59s))); // 1949 1953 Ap Su>=1 3:30 1 S + + assert_range( + "[1953-10-31 18:30:00, 1954-03-20 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1953y, std::chrono::October, 31d, 18h, 30min)), // 1953 1964 - O Su>=31 3:30 0 - + tz->get_info(to_sys_seconds(1954y, std::chrono::March, 20d, 19h, 29min, 59s))); // 1954 1964 - Mar Su>=18 3:30 1 S + + assert_cycle( // 8 HK%sT + "[1953-04-04 19:30:00, 1953-10-31 18:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1953y, std::chrono::April, 4d, 19h, 30min)), // 1949 1953 Ap Su>=1 3:30 1 S + tz->get_info(to_sys_seconds(1953y, std::chrono::October, 31d, 18h, 29min, 59s)), // 1953 1964 - O Su>=31 3:30 0 - + "[1953-10-31 18:30:00, 1954-03-20 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1953y, std::chrono::October, 31d, 18h, 30min)), // 1953 1964 - O Su>=31 3:30 0 - + tz->get_info(to_sys_seconds(1954y, std::chrono::March, 20d, 19h, 29min, 59s))); // 1954 1964 - Mar Su>=18 3:30 1 S + + assert_cycle( // 8 HK%sT + "[1972-04-15 19:30:00, 1972-10-21 18:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1972y, std::chrono::April, 19d, 19h, 30min)), // 1965 1976 - Ap Su>=16 3:30 1 S + tz->get_info(to_sys_seconds(1972y, std::chrono::October, 21d, 18h, 29min, 59s)), // 1965 1976 - O Su>=16 3:30 0 - + "[1972-10-21 18:30:00, 1973-04-21 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1972y, std::chrono::October, 21d, 18h, 30min)), // 1965 1976 - O Su>=16 3:30 0 - + tz->get_info(to_sys_seconds(1973y, std::chrono::April, 21d, 19h, 29min, 59s))); // 1965 1976 - Ap Su>=16 3:30 1 S + + assert_range( // 8 HK%sT + "[1973-04-21 19:30:00, 1973-10-20 18:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1973y, std::chrono::April, 21d, 19h, 30min)), // 1965 1976 - Ap Su>=16 3:30 1 S + tz->get_info(to_sys_seconds(1973y, std::chrono::October, 20d, 18h, 29min, 59s))); // 1965 1976 - O Su>=16 3:30 0 - + + assert_range( // 8 HK%sT, test "1973 o - D 30 3:30 1 S" + "[1973-10-20 18:30:00, 1973-12-29 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1973y, std::chrono::October, 20d, 18h, 30min)), // 1965 1976 - O Su>=16 3:30 + tz->get_info(to_sys_seconds(1973y, std::chrono::December, 29d, 19h, 29min, 59s))); // 1973 o - D 30 3:30 1 S + + assert_range( // 8 HK%sT + "[1973-12-29 19:30:00, 1974-10-19 18:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1973y, std::chrono::December, 29d, 19h, 30min)), // 1973 o - D 30 3:30 1 S + tz->get_info(to_sys_seconds(1974y, std::chrono::October, 19d, 18h, 29min, 59s))); // 1965 1976 - O Su>=16 3:30 + + assert_range( // 8 HK%sT, between 1973 and 1979 no rule is active so falls back to default + "[1976-04-17 19:30:00, 1976-10-16 18:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1976y, std::chrono::April, 17d, 19h, 30min)), // 1965 1976 - Ap Su>=16 3:30 1 S + tz->get_info(to_sys_seconds(1976y, std::chrono::October, 16d, 18h, 29min, 59s))); // 1965 1976 - O Su>=16 3:30 0 - + + assert_range( // 8 HK%sT, between 1973 and 1979 no rule is active so falls back to default + "[1976-10-16 18:30:00, 1979-05-12 19:30:00) 08:00:00 0min HKT", + tz->get_info(to_sys_seconds(1976y, std::chrono::October, 16d, 18h, 30min)), // 1965 1976 - O Su>=16 3:30 0 - + tz->get_info(to_sys_seconds(1979y, std::chrono::May, 12d, 19h, 29min, 59s))); // 1979 o - May 13 3:30 1 S + + assert_range( // 8 HK%sT + "[1979-05-12 19:30:00, 1979-10-20 18:30:00) 09:00:00 60min HKST", + tz->get_info(to_sys_seconds(1979y, std::chrono::May, 12d, 19h, 30min)), // 1979 o - May 13 3:30 1 S + tz->get_info(to_sys_seconds(1979y, std::chrono::October, 20d, 18h, 29min, 59s))); // 1979 o - O 21 3:30 0 - + + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1979y, std::chrono::October, 20d, 18h, 30min), + std::chrono::sys_seconds::max(), + 8h, + std::chrono::minutes(0), + "HKT"), + tz->get_info(to_sys_seconds(1979y, std::chrono::October, 20d, 18h, 30min))); + + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1979y, std::chrono::October, 20d, 18h, 30min), + std::chrono::sys_seconds::max(), + 8h, + std::chrono::minutes(0), + "HKT"), + tz->get_info(std::chrono::sys_seconds::max() - std::chrono::seconds{1})); // max is not valid +} + +static void test_europe_berlin() { + // A more typical entry, first some hard-coded entires and then at the + // end a rules based entry. This rule is valid for its entire period + // + + // Z Europe/Berlin 0:53:28 - LMT 1893 Ap + // 1 c CE%sT 1945 May 24 2 + // 1 So CE%sT 1946 + // 1 DE CE%sT 1980 + // 1 E CE%sT + // + // R c 1916 o - Ap 30 23 1 S + // R c 1916 o - O 1 1 0 - + // R c 1917 1918 - Ap M>=15 2s 1 S + // R c 1917 1918 - S M>=15 2s 0 - + // R c 1940 o - Ap 1 2s 1 S + // R c 1942 o - N 2 2s 0 - + // R c 1943 o - Mar 29 2s 1 S + // R c 1943 o - O 4 2s 0 - + // R c 1944 1945 - Ap M>=1 2s 1 S + // R c 1944 o - O 2 2s 0 - + // R c 1945 o - S 16 2s 0 - + // R c 1977 1980 - Ap Su>=1 2s 1 S + // R c 1977 o - S lastSu 2s 0 - + // R c 1978 o - O 1 2s 0 - + // R c 1979 1995 - S lastSu 2s 0 - + // R c 1981 ma - Mar lastSu 2s 1 S + // R c 1996 ma - O lastSu 2s 0 - + // + // R So 1945 o - May 24 2 2 M + // R So 1945 o - S 24 3 1 S + // R So 1945 o - N 18 2s 0 - + // + // R DE 1946 o - Ap 14 2s 1 S + // R DE 1946 o - O 7 2s 0 - + // R DE 1947 1949 - O Su>=1 2s 0 - + // R DE 1947 o - Ap 6 3s 1 S + // R DE 1947 o - May 11 2s 2 M + // R DE 1947 o - Jun 29 3 1 S + // R DE 1948 o - Ap 18 2s 1 S + // R DE 1949 o - Ap 10 2s 1 S + // + // R E 1977 1980 - Ap Su>=1 1u 1 S + // R E 1977 o - S lastSu 1u 0 - + // R E 1978 o - O 1 1u 0 - + // R E 1979 1995 - S lastSu 1u 0 - + // R E 1981 ma - Mar lastSu 1u 1 S + // R E 1996 ma - O lastSu 1u 0 - + // + // Note the European Union decided to stop the seasonal change in + // 2021. In 2023 seasonal changes are still in effect. + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Europe/Berlin"); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1893y, std::chrono::March, 31d, 23h, 6min, 32s), // 0:53:28 - LMT 1893 Ap + 53min + 28s, + 0min, + "LMT"), + tz->get_info(std::chrono::sys_seconds::min())); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1893y, std::chrono::March, 31d, 23h, 6min, 32s), // 0:53:28 - LMT 1893 Ap + 53min + 28s, + 0min, + "LMT"), + tz->get_info(to_sys_seconds(1893y, std::chrono::March, 31d, 23h, 6min, 31s))); + + assert_range( + // 1 CE%sT before 1916 o - Ap 30 23 1 S + "[1893-03-31 23:06:32, 1916-04-30 22:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1893y, std::chrono::March, 31d, 23h, 6min, 32s)), + tz->get_info(to_sys_seconds(1916y, std::chrono::April, 30d, 21h, 59min, 59s))); + + assert_cycle( + // 1 CE%sT + "[1916-04-30 22:00:00, 1916-09-30 23:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1916y, std::chrono::April, 30d, 22h)), // 1916 o - Ap 30 23 1 S + tz->get_info(to_sys_seconds(1916y, std::chrono::September, 30d, 22h, 59min, 59s)), // o - O 1 1 0 - + "[1916-09-30 23:00:00, 1917-04-16 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1916y, std::chrono::September, 30d, 23h)), // o - O 1 1 0 - + tz->get_info(to_sys_seconds(1917y, std::chrono::April, 16d, 0h, 59min, 59s))); // 1917 1918 - Ap M>=15 2s 1 S + + assert_cycle( + // 1 CE%sT + "[1917-04-16 01:00:00, 1917-09-17 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1917y, std::chrono::April, 16d, 1h)), // 1917 1918 Ap M>=15 2s 1 S + tz->get_info(to_sys_seconds(1917y, std::chrono::September, 17d, 0h, 59min, 59s)), // 1917 1918 S M>=15 2s 0 - + "[1917-09-17 01:00:00, 1918-04-15 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1917y, std::chrono::September, 17d, 1h)), // 1917 1918 S M>=15 2s 0 - + tz->get_info(to_sys_seconds(1918y, std::chrono::April, 15d, 0h, 59min, 59s))); // 1917 1918 Ap M>=15 2s 1 S + + assert_cycle( + // 1 CE%sT (The cycle is more than 1 year) + "[1918-04-15 01:00:00, 1918-09-16 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1918y, std::chrono::April, 15d, 1h)), // 1917 1918 Ap M>=15 2s 1 S + tz->get_info(to_sys_seconds(1918y, std::chrono::September, 16d, 0h, 59min, 59s)), // 1917 1918 S M>=15 2s 0 - + "[1918-09-16 01:00:00, 1940-04-01 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1918y, std::chrono::September, 16d, 1h)), // 1917 1918 S M>=15 2s 0 - + tz->get_info(to_sys_seconds(1940y, std::chrono::April, 1d, 0h, 59min, 59s))); // 1940 o Ap 1 2s 1 S + + assert_cycle( + // 1 CE%sT (The cycle is more than 1 year) + "[1940-04-01 01:00:00, 1942-11-02 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1940y, std::chrono::April, 1d, 1h)), // 1940 o Ap 1 2s 1 S + tz->get_info(to_sys_seconds(1942y, std::chrono::November, 2d, 0h, 59min, 59s)), // 1942 o N 2 2s 0 - + "[1942-11-02 01:00:00, 1943-03-29 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1942y, std::chrono::November, 2d, 1h)), // 1942 o N 2 2s 0 - + tz->get_info(to_sys_seconds(1943y, std::chrono::March, 29d, 0h, 59min, 59s))); // 1943 o Mar 29 2s 1 S + + assert_range( + // Here the zone changes from c (C-Eur) to So (SovietZone). + // The rule c ends on 1945-09-16, instead it ends at the zone change date/time + // There is a tricky part in the time + // "1 c CE%sT" has an offset of 1 at the moment the rule + // ends there is a save of 60 minutes. This means the + // local offset to UTC is 2 hours. The rule ends at + // 1945-05-24 02:00:00 local time, which is + // 1945-05-24 00:00:00 UTC. + "[1945-04-02 01:00:00, 1945-05-24 00:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1945y, std::chrono::April, 2d, 1h)), // 1 CE%sT & 1945 Ap M>=1 2s 1 S + tz->get_info(to_sys_seconds(1945y, std::chrono::May, 23d, 23h, 59min, 59s))); // 1 c CE%sT & 1945 May 24 2 + + assert_range( // -- + "[1945-05-24 00:00:00, 1945-09-24 00:00:00) 03:00:00 120min CEMT", + tz->get_info(to_sys_seconds(1945y, std::chrono::May, 24d)), // 1 c CE%sT & 1945 May 24 2 + tz->get_info(to_sys_seconds(1945y, std::chrono::September, 23d, 23h, 59min, 59s))); // 1945 o S 24 3 1 S + + assert_range( + // 1 c CE%sT 1945 May 24 2 + "[1945-09-24 00:00:00, 1945-11-18 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1945y, std::chrono::September, 24d)), // 1945 o S 24 3 1 S + tz->get_info(to_sys_seconds(1945y, std::chrono::November, 18d, 0h, 59min, 59s))); // 1945 o N 18 2s 0 - + assert_range( // -- + // Merges 2 continuations + "[1945-11-18 01:00:00, 1946-04-14 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1945y, std::chrono::November, 18d, 1h)), // 1 c CE%sT & 1945 o N 18 2s 0 - + tz->get_info(to_sys_seconds(1946y, std::chrono::April, 14d, 0h, 59min, 59s))); // 1 So CE%sT & 1946 o Ap 14 2s 1 S + + assert_range( + // 1 DE CE%sT 1980 + "[1946-04-14 01:00:00, 1946-10-07 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1946y, std::chrono::April, 14d, 1h)), // 1946 o Ap 14 2s 1 S + tz->get_info(to_sys_seconds(1946y, std::chrono::October, 7d, 0h, 59min, 59s))); // 1946 o O 7 2s 0 - + + // Note 1947 is an interesting year with 4 rules + // R DE 1947 1949 - O Su>=1 2s 0 - + // R DE 1947 o - Ap 6 3s 1 S + // R DE 1947 o - May 11 2s 2 M + // R DE 1947 o - Jun 29 3 1 S + assert_range( + // 1 DE CE%sT 1980 + "[1946-10-07 01:00:00, 1947-04-06 02:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1946y, std::chrono::October, 7d, 1h)), // 1946 o O 7 2s 0 - + tz->get_info(to_sys_seconds(1947y, std::chrono::April, 6d, 1h, 59min, 59s))); // 1947 o Ap 6 3s 1 S + + assert_range( + // 1 DE CE%sT 1980 + "[1947-04-06 02:00:00, 1947-05-11 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1947y, std::chrono::April, 6d, 2h)), // 1947 o Ap 6 3s 1 S + tz->get_info(to_sys_seconds(1947y, std::chrono::May, 11d, 0h, 59min, 59s))); // 1947 o May 11 2s 2 M + + assert_range( + // 1 DE CE%sT 1980 + "[1947-05-11 01:00:00, 1947-06-29 00:00:00) 03:00:00 120min CEMT", + tz->get_info(to_sys_seconds(1947y, std::chrono::May, 11d, 1h)), // 1947 o May 11 2s 2 M + tz->get_info(to_sys_seconds(1947y, std::chrono::June, 28d, 23h, 59min, 59s))); // 1947 o Jun 29 3 1 S + + assert_cycle( + // 1 DE CE%sT 1980 + "[1947-06-29 00:00:00, 1947-10-05 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1947y, std::chrono::June, 29d)), // 1947 o Jun 29 3 1 S + tz->get_info(to_sys_seconds(1947y, std::chrono::October, 5d, 0h, 59min, 59s)), // 1947 1949 O Su>=1 2s 0 - + "[1947-10-05 01:00:00, 1948-04-18 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1947y, std::chrono::October, 5d, 1h)), // 1947 1949 O Su>=1 2s 0 - + tz->get_info(to_sys_seconds(1948y, std::chrono::April, 18d, 0h, 59min, 59s))); // 1948 o Ap 18 2s 1 S + + assert_cycle( + // 1 DE CE%sT 1980 + "[1948-04-18 01:00:00, 1948-10-03 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(1948y, std::chrono::April, 18d, 1h)), // 1948 o Ap 18 2s 1 S + tz->get_info(to_sys_seconds(1948y, std::chrono::October, 3d, 0h, 59min, 59s)), // 1947 1949 O Su>=1 2s 0 - + "[1948-10-03 01:00:00, 1949-04-10 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1948y, std::chrono::October, 3d, 1h)), // 1947 1949 O Su>=1 2s 0 - + tz->get_info(to_sys_seconds(1949y, std::chrono::April, 10d, 0h, 59min, 59s))); // 1949 o Ap 10 2s 1 S + + assert_cycle( // Note the end time is in a different continuation. + "[1949-04-10 01:00:00, 1949-10-02 01:00:00) 02:00:00 60min CEST", // 1 DE CE%sT 1980 + tz->get_info(to_sys_seconds(1949y, std::chrono::April, 10d, 1h)), // 1949 o Ap 10 2s 1 S + tz->get_info(to_sys_seconds(1949y, std::chrono::October, 2d, 0h, 59min, 59s)), // 1947 1949 O Su>=1 2s 0 - + "[1949-10-02 01:00:00, 1980-04-06 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(1949y, std::chrono::October, 2d, 1h)), // 1947 1949 O Su>=1 2s 0 - + tz->get_info( // 1 E CE%sT + to_sys_seconds(1980y, std::chrono::April, 6d, 0h, 59min, 59s))); // 1977 1980 Ap Su>=1 1u 1 S + + assert_cycle( + // 1 E CE%sT + "[2020-03-29 01:00:00, 2020-10-25 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(2020y, std::chrono::March, 29d, 1h)), // 1981 ma Mar lastSu 1u 1 S + tz->get_info(to_sys_seconds(2020y, std::chrono::October, 25d, 0h, 59min, 59s)), // 1996 ma O lastSu 1u 0 - + "[2020-10-25 01:00:00, 2021-03-28 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(2020y, std::chrono::October, 25d, 1h)), // 1996 ma O lastSu 1u 0 - + tz->get_info(to_sys_seconds(2021y, std::chrono::March, 28d, 0h, 59min, 59s))); // 1981 ma Mar lastSu 1u 1 S + + assert_cycle( + // 1 E CE%sT + "[2021-03-28 01:00:00, 2021-10-31 01:00:00) 02:00:00 60min CEST", + tz->get_info(to_sys_seconds(2021y, std::chrono::March, 28d, 1h)), // 1981 ma Mar lastSu 1u 1 S + tz->get_info(to_sys_seconds(2021y, std::chrono::October, 31d, 0h, 59min, 59s)), // 1996 ma O lastSu 1u 0 - + "[2021-10-31 01:00:00, 2022-03-27 01:00:00) 01:00:00 0min CET", + tz->get_info(to_sys_seconds(2021y, std::chrono::October, 31d, 1h)), // 1996 ma O lastSu 1u 0 - + tz->get_info(to_sys_seconds(2022y, std::chrono::March, 27d, 0h, 59min, 59s))); // 1981 ma Mar lastSu 1u 1 S +} + +static void test_america_st_johns() { + // A more typical entry, + // Uses letters both when DST is ative and not and has multiple + // letters. Uses negetive offsets. + // Switches several times between their own and Canadian rules + // Switches the stdoff from -3:30:52 to -3:30 while observing the same rule + + // Z America/St_Johns -3:30:52 - LMT 1884 + // -3:30:52 j N%sT 1918 + // -3:30:52 C N%sT 1919 + // -3:30:52 j N%sT 1935 Mar 30 + // -3:30 j N%sT 1942 May 11 + // -3:30 C N%sT 1946 + // -3:30 j N%sT 2011 N + // -3:30 C N%sT + // + // R j 1917 o - Ap 8 2 1 D + // R j 1917 o - S 17 2 0 S + // R j 1919 o - May 5 23 1 D + // R j 1919 o - Au 12 23 0 S + // R j 1920 1935 - May Su>=1 23 1 D + // R j 1920 1935 - O lastSu 23 0 S + // R j 1936 1941 - May M>=9 0 1 D + // R j 1936 1941 - O M>=2 0 0 S + // R j 1946 1950 - May Su>=8 2 1 D + // R j 1946 1950 - O Su>=2 2 0 S + // R j 1951 1986 - Ap lastSu 2 1 D + // R j 1951 1959 - S lastSu 2 0 S + // R j 1960 1986 - O lastSu 2 0 S + // R j 1987 o - Ap Su>=1 0:1 1 D + // R j 1987 2006 - O lastSu 0:1 0 S + // R j 1988 o - Ap Su>=1 0:1 2 DD + // R j 1989 2006 - Ap Su>=1 0:1 1 D + // R j 2007 2011 - Mar Su>=8 0:1 1 D + // R j 2007 2010 - N Su>=1 0:1 0 S + // + // R C 1918 o - Ap 14 2 1 D + // R C 1918 o - O 27 2 0 S + // R C 1942 o - F 9 2 1 W + // R C 1945 o - Au 14 23u 1 P + // R C 1945 o - S 30 2 0 S + // R C 1974 1986 - Ap lastSu 2 1 D + // R C 1974 2006 - O lastSu 2 0 S + // R C 1987 2006 - Ap Su>=1 2 1 D + // R C 2007 ma - Mar Su>=8 2 1 D + // R C 2007 ma - N Su>=1 2 0 S + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/St_Johns"); + + assert_equal( // -- + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s), // -3:30:52 - LMT 1884 + -(3h + 30min + 52s), + 0min, + "LMT"), + tz->get_info(std::chrono::sys_seconds::min())); + + assert_equal( // -- + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s), // -3:30:52 - LMT 1884 + -(3h + 30min + 52s), + 0min, + "LMT"), + tz->get_info(to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 51s))); + + assert_range( // -3:30:52 j N%sT 1918 + "[1884-01-01 03:30:52, 1917-04-08 05:30:52) -03:30:52 0min NST", + tz->get_info(to_sys_seconds(1884y, std::chrono::January, 1d, 3h, 30min, 52s)), // no rule active + tz->get_info(to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 51s))); // 1917 o Ap 8 2 1 D + + assert_range( // -3:30:52 j N%sT 1918 + "[1917-04-08 05:30:52, 1917-09-17 04:30:52) -02:30:52 60min NDT", + tz->get_info(to_sys_seconds(1917y, std::chrono::April, 8d, 5h, 30min, 52s)), // 1917 o Ap 8 2 1 D + tz->get_info(to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 51s))); // 1917 o S 17 2 0 S + + assert_range("[1917-09-17 04:30:52, 1918-04-14 05:30:52) -03:30:52 0min NST", + tz->get_info( // -3:30:52 j N%sT 1918 + to_sys_seconds(1917y, std::chrono::September, 17d, 4h, 30min, 52s)), // 1917 o S 17 2 0 S + tz->get_info( // -3:30:52 C N%sT 1919 + to_sys_seconds(1918y, std::chrono::April, 14d, 5h, 30min, 51s))); // 1918 o Ap 14 2 1 D + + assert_range( // -3:30:52 C N%sT 1919 + "[1918-04-14 05:30:52, 1918-10-27 04:30:52) -02:30:52 60min NDT", + tz->get_info(to_sys_seconds(1918y, std::chrono::April, 14d, 5h, 30min, 52s)), // 1918 o Ap 14 2 1 D + tz->get_info(to_sys_seconds(1918y, std::chrono::October, 27d, 4h, 30min, 51s))); // 1918 o O 27 2 0 S + + assert_range("[1918-10-27 04:30:52, 1919-05-06 02:30:52) -03:30:52 0min NST", + tz->get_info( // -3:30:52 C N%sT 1919 + to_sys_seconds(1918y, std::chrono::October, 27d, 4h, 30min, 52s)), // 1918 o O 27 2 0 S + tz->get_info( // -3:30:52 j N%sT 1935 Mar 30 + to_sys_seconds(1919y, std::chrono::May, 6d, 2h, 30min, 51s))); // 1919 o May 5 23 1 D + + assert_range( // -3:30:52 j N%sT 1935 Mar 30 + "[1934-10-29 01:30:52, 1935-03-30 03:30:52) -03:30:52 0min NST", + tz->get_info(to_sys_seconds(1934y, std::chrono::October, 29d, 1h, 30min, 52s)), // 1920 1935 O lastSu 23 0 S + tz->get_info(to_sys_seconds(1935y, std::chrono::March, 30d, 3h, 30min, 51s))); // 1920 1935 May Su>=1 23 1 D + + assert_range( // -3:30 j N%sT 1942 May 11 + // Changed the stdoff while the same rule remains active. + "[1935-03-30 03:30:52, 1935-05-06 02:30:00) -03:30:00 0min NST", + tz->get_info(to_sys_seconds(1935y, std::chrono::March, 30d, 3h, 30min, 52s)), // 1920 1935 O lastSu 23 0 S + tz->get_info(to_sys_seconds(1935y, std::chrono::May, 6d, 2h, 29min, 59s))); // 1920 1935 May Su>=1 23 1 D + + assert_range( // -3:30 j N%sT 1942 May 11 + "[1935-05-06 02:30:00, 1935-10-28 01:30:00) -02:30:00 60min NDT", + tz->get_info(to_sys_seconds(1935y, std::chrono::May, 6d, 2h, 30min, 0s)), // 1920 1935 May Su>=1 23 1 D + tz->get_info(to_sys_seconds(1935y, std::chrono::October, 28d, 1h, 29min, 59s))); // 1920 1935 O lastSu 23 0 S + + assert_range( // -3:30 j N%sT 1942 May 11 + "[1941-10-06 02:30:00, 1942-05-11 03:30:00) -03:30:00 0min NST", + tz->get_info(to_sys_seconds(1941y, std::chrono::October, 6d, 2h, 30min, 0s)), // 1936 1941 O M>=2 0 0 S + tz->get_info(to_sys_seconds(1942y, std::chrono::May, 11d, 3h, 29min, 59s))); // 1946 1950 May Su>=8 2 1 D + + assert_range( // -3:30 C N%sT 1946 + "[1942-05-11 03:30:00, 1945-08-14 23:00:00) -02:30:00 60min NWT", + tz->get_info(to_sys_seconds(1942y, std::chrono::May, 11d, 3h, 30min, 0s)), // 1942 o F 9 2 1 W + tz->get_info(to_sys_seconds(1945y, std::chrono::August, 14d, 22h, 59min, 59s))); // 1945 o Au 14 23u 1 P + + assert_range( // -3:30 C N%sT 1946 + "[1945-08-14 23:00:00, 1945-09-30 04:30:00) -02:30:00 60min NPT", + tz->get_info(to_sys_seconds(1945y, std::chrono::August, 14d, 23h, 0min, 0s)), // 1945 o Au 14 23u 1 P + tz->get_info(to_sys_seconds(1945y, std::chrono::September, 30d, 4h, 29min, 59s))); // 1945 o S 30 2 0 S + + assert_range( + "[1945-09-30 04:30:00, 1946-05-12 05:30:00) -03:30:00 0min NST", + tz->get_info( + to_sys_seconds(1945y, std::chrono::September, 30d, 4h, 30min, 0s)), // -3:30 C N%sT 1946 & 945 o S 30 2 0 S + tz->get_info(to_sys_seconds( + 1946y, std::chrono::May, 12d, 5h, 29min, 59s))); // -3:30 j N%sT 2011 N & 1946 1950 May Su>=8 2 1 D + + assert_range( // -3:30 j N%sT 2011 N + "[1988-04-03 03:31:00, 1988-10-30 01:31:00) -01:30:00 120min NDDT", + tz->get_info(to_sys_seconds(1988y, std::chrono::April, 3d, 3h, 31min, 0s)), // 1988 o Ap Su>=1 0:1 2 DD + tz->get_info(to_sys_seconds(1988y, std::chrono::October, 30d, 1h, 30min, 59s))); // 1987 2006 O lastSu 0:1 0 S + + assert_range("[2011-03-13 03:31:00, 2011-11-06 04:30:00) -02:30:00 60min NDT", + tz->get_info( // -3:30 j N%sT 2011 N + to_sys_seconds(2011y, std::chrono::March, 13d, 3h, 31min, 0s)), // 2007 2011 Mar Su>=8 0:1 1 D + tz->get_info( // -3:30 C N%sT + to_sys_seconds(2011y, std::chrono::November, 6d, 04h, 29min, 59s))); // 2007 ma N Su>=1 2 0 S +} + +static void test_get_at_standard_time_universal() { + // Z Asia/Barnaul 5:35 - LMT 1919 D 10 + // ... + // 7 R +07/+08 1995 May 28 + // 6 R +06/+07 2011 Mar 27 2s + // ... + // + // ... + // R R 1985 2010 - Mar lastSu 2s 1 S + // R R 1996 2010 - O lastSu 2s 0 - + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Asia/Barnaul"); + + assert_equal( + std::chrono::sys_info( + to_sys_seconds(2010y, std::chrono::October, 30d, 20h), + to_sys_seconds(2011y, std::chrono::March, 26d, 20h), + 6h, + 0min, + "+06"), + tz->get_info(to_sys_seconds(2010y, std::chrono::October, 31d, 10h))); +} + +static void test_get_at_standard_time_standard() { + // Z Africa/Bissau -1:2:20 - LMT 1912 Ja 1 1u + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Africa/Bissau"); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + to_sys_seconds(1912y, std::chrono::January, 1d, 1h), + -(1h + 2min + 20s), + 0min, + "LMT"), + tz->get_info(std::chrono::sys_seconds::min())); +} + +static void test_get_at_save_universal() { + // Z America/Tijuana -7:48:4 - LMT 1922 Ja 1 0:11:56 + // -7 - MST 1924 + // -8 - PST 1927 Jun 10 23 + // -7 - MST 1930 N 15 + // -8 - PST 1931 Ap + // -8 1 PDT 1931 S 30 + // -8 - PST 1942 Ap 24 + // -8 1 PWT 1945 Au 14 23u + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Tijuana"); + + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1942y, std::chrono::April, 24d, 8h), + to_sys_seconds(1945y, std::chrono::August, 14d, 23h), + -7h, + 60min, + "PWT"), + tz->get_info(to_sys_seconds(1942y, std::chrono::April, 24d, 8h))); +} + +static void test_get_at_rule_standard() { + // Z Antarctica/Macquarie 0 - -00 1899 N + // 10 - AEST 1916 O 1 2 + // 10 1 AEDT 1917 F + // 10 AU AE%sT 1919 Ap 1 0s + // ... + // + // R AU 1917 o - Ja 1 2s 1 D + // R AU 1917 o - Mar lastSu 2s 0 S + // R AU 1942 o - Ja 1 2s 1 D + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Antarctica/Macquarie"); + + // Another rule where the S propagates? + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1916y, std::chrono::September, 30d, 16h), + to_sys_seconds(1917y, std::chrono::March, 24d, 16h), + 11h, + 60min, + "AEDT"), + tz->get_info(to_sys_seconds(1916y, std::chrono::September, 30d, 16h))); +} + +static void test_get_at_rule_universal() { + // Z America/Nuuk -3:26:56 - LMT 1916 Jul 28 + // -3 - -03 1980 Ap 6 2 + // -3 E -03/-02 2023 O 29 1u + // -2 E -02/-01 + // + // R E 1977 1980 - Ap Su>=1 1u 1 S + // R E 1977 o - S lastSu 1u 0 - + // R E 1978 o - O 1 1u 0 - + // R E 1979 1995 - S lastSu 1u 0 - + // R E 1981 ma - Mar lastSu 1u 1 S + // R E 1996 ma - O lastSu 1u 0 - + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Nuuk"); + + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1980y, std::chrono::April, 6d, 5h), + to_sys_seconds(1980y, std::chrono::September, 28d, 1h), + -2h, + 60min, + "-02"), + tz->get_info(to_sys_seconds(1980y, std::chrono::April, 6d, 5h))); +} + +static void test_format_with_alternatives_west() { + // Z America/Nuuk -3:26:56 - LMT 1916 Jul 28 + // -3 - -03 1980 Ap 6 2 + // -3 E -03/-02 2023 O 29 1u + // -2 E -02/-01 + // + // ... + // R E 1981 ma - Mar lastSu 1u 1 S + // R E 1996 ma - O lastSu 1u 0 - + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Nuuk"); + + assert_cycle( // -3 E -03/-02 + "[2019-10-27 01:00:00, 2020-03-29 01:00:00) -03:00:00 0min -03", + tz->get_info(to_sys_seconds(2019y, std::chrono::October, 27d, 1h)), // 1981 ma Mar lastSu 1u 1 S + tz->get_info(to_sys_seconds(2020y, std::chrono::March, 29d, 0h, 59min, 59s)), // 1996 ma O lastSu 1u 0 - + "[2020-03-29 01:00:00, 2020-10-25 01:00:00) -02:00:00 60min -02", + tz->get_info(to_sys_seconds(2020y, std::chrono::March, 29d, 1h)), // 1996 ma O lastSu 1u 0 - + tz->get_info(to_sys_seconds(2020y, std::chrono::October, 25d, 0h, 59min, 59s))); // 1981 ma Mar lastSu 1u 1 S +} + +static void test_format_with_alternatives_east() { + // Z Asia/Barnaul 5:35 - LMT 1919 D 10 + // ... + // 6 R +06/+07 2011 Mar 27 2s + // ... + // + // ... + // R R 1985 2010 - Mar lastSu 2s 1 S + // R R 1996 2010 - O lastSu 2s 0 - + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Asia/Barnaul"); + + assert_cycle( // 6 R +06/+07 2011 Mar 27 2s + "[2000-03-25 20:00:00, 2000-10-28 20:00:00) 07:00:00 60min +07", + tz->get_info(to_sys_seconds(2000y, std::chrono::March, 25d, 20h)), // 1985 2010 Mar lastSu 2s 1 S + tz->get_info(to_sys_seconds(2000y, std::chrono::October, 28d, 19h, 59min, 59s)), // 1996 2010 O lastSu 2s 0 - + "[2000-10-28 20:00:00, 2001-03-24 20:00:00) 06:00:00 0min +06", + tz->get_info(to_sys_seconds(2000y, std::chrono::October, 28d, 20h)), // 1996 2010 O lastSu 2s 0 - + tz->get_info(to_sys_seconds(2001y, std::chrono::March, 24d, 19h, 59min, 59s))); // 1985 2010 Mar lastSu 2s 1 S +} + +static void test_africa_algiers() { + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Africa/Algiers"); + + assert_equal( + std::chrono::sys_info( + to_sys_seconds(1977y, std::chrono::October, 20d, 23h), + to_sys_seconds(1978y, std::chrono::March, 24d), + 1h, + std::chrono::minutes(0), + "CET"), + tz->get_info(to_sys_seconds(1977y, std::chrono::October, 20d, 23h))); + + assert_range("[1977-05-06 00:00:00, 1977-10-20 23:00:00) 01:00:00 60min WEST", // 0 d WE%sT 1977 O 21 + tz->get_info(to_sys_seconds(1977y, std::chrono::May, 6d)), + tz->get_info(to_sys_seconds(1977y, std::chrono::October, 20d, 22h, 59min, 59s))); + + assert_range("[1977-10-20 23:00:00, 1978-03-24 00:00:00) 01:00:00 0min CET", // 1 d CE%sT 1979 O 26 + tz->get_info(to_sys_seconds(1977y, std::chrono::October, 20d, 23h)), + tz->get_info(to_sys_seconds(1978y, std::chrono::March, 23d, 23h, 59min, 59s))); +} + +static void test_africa_casablanca() { + // Z Africa/Casablanca -0:30:20 - LMT 1913 O 26 + // 0 M +00/+01 1984 Mar 16 + // 1 - +01 1986 + // 0 M +00/+01 2018 O 28 3 + // 1 M +01/+00 + // + // ... + // R M 2013 2018 - O lastSu 3 0 - + // R M 2014 2018 - Mar lastSu 2 1 - + // R M 2014 o - Jun 28 3 0 - + // R M 2014 o - Au 2 2 1 - + // R M 2015 o - Jun 14 3 0 - + // R M 2015 o - Jul 19 2 1 - + // R M 2016 o - Jun 5 3 0 - + // R M 2016 o - Jul 10 2 1 - + // R M 2017 o - May 21 3 0 - + // R M 2017 o - Jul 2 2 1 - + // R M 2018 o - May 13 3 0 - + // R M 2018 o - Jun 17 2 1 - + // R M 2019 o - May 5 3 -1 - + // R M 2019 o - Jun 9 2 0 - + // R M 2020 o - Ap 19 3 -1 - + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Africa/Casablanca"); + + assert_range("[2018-06-17 02:00:00, 2018-10-28 02:00:00) 01:00:00 60min +01", + tz->get_info(to_sys_seconds(2018y, std::chrono::June, 17d, 2h)), + tz->get_info(to_sys_seconds(2018y, std::chrono::October, 28d, 1h, 59min, 59s))); + + assert_range("[2018-10-28 02:00:00, 2019-05-05 02:00:00) 01:00:00 0min +01", + tz->get_info( // 1 M +01/+00 & R M 2018 o - Jun 17 2 1 - + to_sys_seconds(2018y, std::chrono::October, 28d, 2h)), + tz->get_info( // 1 M +01/+00 & R M 2019 o - May 5 3 -1 - + to_sys_seconds(2019y, std::chrono::May, 5d, 1h, 59min, 59s))); + + // 1 M +01/+00 + // Note the SAVE contains a negative value + assert_range("[2019-05-05 02:00:00, 2019-06-09 02:00:00) 00:00:00 -60min +00", + tz->get_info(to_sys_seconds(2019y, std::chrono::May, 5d, 2h)), // R M 2019 o - May 5 3 -1 - + tz->get_info(to_sys_seconds(2019y, std::chrono::June, 9d, 1h, 59min, 59s))); // R M 2019 o - Jun 9 2 0 - + + assert_range("[2019-06-09 02:00:00, 2020-04-19 02:00:00) 01:00:00 0min +01", + tz->get_info( // 1 M +01/+00 & R M 2019 o - Jun 9 2 0 - + to_sys_seconds(2019y, std::chrono::June, 9d, 2h)), + tz->get_info( // 1 M +01/+00 & R M 2020 o - Ap 19 3 -1 - + to_sys_seconds(2020y, std::chrono::April, 19d, 1h, 59min, 59s))); // +} + +static void test_africa_ceuta() { + // Z Africa/Ceuta -0:21:16 - LMT 1900 D 31 23:38:44 + // 0 - WET 1918 May 6 23 + // 0 1 WEST 1918 O 7 23 + // 0 - WET 1924 + // 0 s WE%sT 1929 + // 0 - WET 1967 + // 0 Sp WE%sT 1984 Mar 16 + // 1 - CET 1986 + // 1 E CE%sT + // + // ... + // R s 1926 o - Ap 17 23 1 S + // R s 1926 1929 - O Sa>=1 24s 0 - + // R s 1927 o - Ap 9 23 1 S + // R s 1928 o - Ap 15 0 1 S + // R s 1929 o - Ap 20 23 1 S + // R s 1937 o - Jun 16 23 1 S + // ... + // + // R Sp 1967 o - Jun 3 12 1 S + // R Sp 1967 o - O 1 0 0 - + // R Sp 1974 o - Jun 24 0 1 S + // R Sp 1974 o - S 1 0 0 - + // R Sp 1976 1977 - May 1 0 1 S + // R Sp 1976 o - Au 1 0 0 - + // R Sp 1977 o - S 28 0 0 - + // R Sp 1978 o - Jun 1 0 1 S + // R Sp 1978 o - Au 4 0 0 - + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Africa/Ceuta"); + + assert_range( + + "[1928-10-07 00:00:00, 1967-06-03 12:00:00) 00:00:00 0min WET", + tz->get_info(to_sys_seconds(1928y, std::chrono::October, 7d)), // 0 s WE%sT 1929 & 1926 1929 O Sa>=1 24s 0 - + tz->get_info( // No transitions in "0 - WET 1967" + to_sys_seconds(1967y, std::chrono::June, 3d, 11h, 59min, 59s))); // 0 - WET 1967 & 1967 o Jun 3 12 1 S +} + +static void test_africa_freetown() { + // Z Africa/Freetown -0:53 - LMT 1882 + // -0:53 - FMT 1913 Jul + // -1 SL %s 1939 S 5 + // -1 - -01 1941 D 6 24 + // 0 - GMT + // + // R SL 1932 o - D 1 0 0:20 -0040 + // R SL 1933 1938 - Mar 31 24 0 -01 + // R SL 1933 1939 - Au 31 24 0:20 -0040 + // R SL 1939 o - May 31 24 0 -01 + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Africa/Freetown"); + + // When a continuation has a named rule, the tranisition time determined by + // the active rule can be wrong. The next continuation may set the clock to an + // earlier time. This is tested for San Luis. This tests the rule is not used + // when the rule is not a named rule. + // + // Fixes: + // Expected output [1882-01-01 00:53:00, 1913-07-01 00:53:00) -00:53:00 0min FMT + // Actual output [1882-01-01 00:53:00, 1913-07-01 00:46:00) -00:53:00 0min FMT + + assert_range("[1882-01-01 00:53:00, 1913-07-01 00:53:00) -00:53:00 0min FMT", + tz->get_info(to_sys_seconds(1882y, std::chrono::January, 1d, 0h, 53min)), // -0:53 - FMT 1913 Jul + tz->get_info( // -1 SL %s 1939 S 5 & before first rule + to_sys_seconds(1913y, std::chrono::July, 1d, 0h, 52min, 59s))); + + // Tests whether the "-1 SL %s 1939 S 5" until gets the proper local time + // adjustment. + assert_range("[1939-09-01 01:00:00, 1939-09-05 00:40:00) -00:40:00 20min -0040", + tz->get_info( // -1 SL %s 1939 S 5 & R SL 1933 1939 - Au 31 24 0:20 -0040 + to_sys_seconds(1939y, std::chrono::September, 1d, 1h)), + tz->get_info( // -1 - -01 1941 D 6 24 + to_sys_seconds(1939y, std::chrono::September, 5d, 0h, 39min, 59s))); +} + +static void test_africa_windhoek() { + // Tests the LETTER/S used before the first rule per + // https://data.iana.org/time-zones/tz-how-to.html + // If switching to a named rule before any transition has happened, + // assume standard time (SAVE zero), and use the LETTER data from + // the earliest transition with a SAVE of zero. + + // Z Africa/Windhoek 1:8:24 - LMT 1892 F 8 + // 1:30 - +0130 1903 Mar + // 2 - SAST 1942 S 20 2 + // 2 1 SAST 1943 Mar 21 2 + // 2 - SAST 1990 Mar 21 + // 2 NA %s + // + // R NA 1994 o - Mar 21 0 -1 WAT + // R NA 1994 2017 - S Su>=1 2 0 CAT + // R NA 1995 2017 - Ap Su>=1 2 -1 WAT + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("Africa/Windhoek"); + + assert_range( // 2 - EET 2012 N 10 2 + "[1990-03-20 22:00:00, 1994-03-20 22:00:00) 02:00:00 0min CAT", + tz->get_info(to_sys_seconds(1990y, std::chrono::March, 20d, 22h)), + tz->get_info(to_sys_seconds(1994y, std::chrono::March, 20d, 21h, 59min, 59s))); +} + +static void test_america_adak() { + // Z America/Adak 12:13:22 - LMT 1867 O 19 12:44:35 + // ... + // -11 u B%sT 1983 O 30 2 + // -10 u AH%sT 1983 N 30 + // -10 u H%sT + // + // ... + // R u 1945 o - S 30 2 0 S + // R u 1967 2006 - O lastSu 2 0 S + // R u 1967 1973 - Ap lastSu 2 1 D + // R u 1974 o - Ja 6 2 1 D + // R u 1975 o - F lastSu 2 1 D + // R u 1976 1986 - Ap lastSu 2 1 D + // R u 1987 2006 - Ap Su>=1 2 1 D + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Adak"); + + assert_range( // 2 - EET 2012 N 10 2 + "[1983-10-30 12:00:00, 1983-11-30 10:00:00) -10:00:00 0min AHST", + tz->get_info(to_sys_seconds(1983y, std::chrono::October, 30d, 12h)), // -11 u B%sT 1983 O 30 2 + tz->get_info(to_sys_seconds(1983y, std::chrono::November, 30d, 9h, 59min, 59s))); // -10 u AH%sT 1983 N 30 +} + +static void test_america_auncion() { + // R y 2013 ma - Mar Su>=22 0 0 - + // Z America/Asuncion -3:50:40 - LMT 1890 + // -3:50:40 - AMT 1931 O 10 + // -4 - -04 1972 O + // -3 - -03 1974 Ap + // -4 y -04/-03 + // + // R y 1975 1988 - O 1 0 1 - + // R y 1975 1978 - Mar 1 0 0 - + // R y 1979 1991 - Ap 1 0 0 - + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Asuncion"); + + assert_range("[1974-04-01 03:00:00, 1975-10-01 04:00:00) -04:00:00 0min -04", + tz->get_info(to_sys_seconds(1974y, std::chrono::April, 1d, 3h)), + tz->get_info(to_sys_seconds(1975y, std::chrono::October, 1d, 3h, 59min, 59s))); + + assert_range("[1975-10-01 04:00:00, 1976-03-01 03:00:00) -03:00:00 60min -03", + tz->get_info(to_sys_seconds(1975y, std::chrono::October, 1d, 4h)), + tz->get_info(to_sys_seconds(1976y, std::chrono::March, 1d, 2h, 59min, 59s))); +} + +static void test_america_ciudad_juarez() { + // Z America/Ciudad_Juarez -7:5:56 - LMT 1922 Ja 1 7u + // -7 - MST 1927 Jun 10 23 + // -6 - CST 1930 N 15 + // -7 m MST 1932 Ap + // -6 - CST 1996 + // -6 m C%sT 1998 + // ... + // + // R m 1939 o - F 5 0 1 D + // R m 1939 o - Jun 25 0 0 S + // R m 1940 o - D 9 0 1 D + // R m 1941 o - Ap 1 0 0 S + // R m 1943 o - D 16 0 1 W + // R m 1944 o - May 1 0 0 S + // R m 1950 o - F 12 0 1 D + // R m 1950 o - Jul 30 0 0 S + // R m 1996 2000 - Ap Su>=1 2 1 D + // R m 1996 2000 - O lastSu 2 0 S + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Ciudad_Juarez"); + + // 1996 has a similar issue, instead of __time the __until end before + // the first rule in 1939. Between the two usages of RULE Mexico + // a different continuation RULE is active + assert_range("[1996-04-07 08:00:00, 1996-10-27 07:00:00) -05:00:00 60min CDT", + tz->get_info(to_sys_seconds(1996y, std::chrono::April, 7d, 8h)), + tz->get_info(to_sys_seconds(1996y, std::chrono::October, 27d, 6h, 59min, 59s))); +} + +static void test_america_argentina_buenos_aires() { + // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31 + // -4:16:48 - CMT 1920 May + // -4 - -04 1930 D + // -4 A -04/-03 1969 O 5 + // -3 A -03/-02 1999 O 3 + // -4 A -04/-03 2000 Mar 3 + // -3 A -03/-02 + // + // ... + // R A 1989 1992 - O Su>=15 0 1 - + // R A 1999 o - O Su>=1 0 1 - + // R A 2000 o - Mar 3 0 0 - + // R A 2007 o - D 30 0 1 - + // ... + + // The 1999 switch uses the same rule, but with a different stdoff. + // R A 1999 o - O Su>=1 0 1 - + // stdoff -3 -> 1999-10-03 03:00:00 + // stdoff -4 -> 1999-10-03 04:00:00 + // This generates an invalid entry and this is evaluated as a transition. + // Looking at the zdump like output in libc++ this generates jumps in + // the UTC time + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Argentina/Buenos_Aires"); + + assert_range("[1999-10-03 03:00:00, 2000-03-03 03:00:00) -03:00:00 60min -03", + tz->get_info(to_sys_seconds(1999y, std::chrono::October, 3d, 3h)), + tz->get_info(to_sys_seconds(2000y, std::chrono::March, 3d, 2h, 59min, 59s))); + assert_range("[2000-03-03 03:00:00, 2007-12-30 03:00:00) -03:00:00 0min -03", + tz->get_info(to_sys_seconds(2000y, std::chrono::March, 3d, 3h)), + tz->get_info(to_sys_seconds(2007y, std::chrono::December, 30d, 2h, 59min, 59s))); +} + +static void test_america_argentina_la_rioja() { + // Z America/Argentina/La_Rioja -4:27:24 - LMT 1894 O 31 + // ... + // -4 A -04/-03 1969 O 5 + // -3 A -03/-02 1991 Mar + // -4 - -04 1991 May 7 + // -3 A -03/-02 1999 O 3 + // ... + // + // ... + // R A 1988 o - D 1 0 1 - + // R A 1989 1993 - Mar Su>=1 0 0 - + // R A 1989 1992 - O Su>=15 0 1 - + // R A 1999 o - O Su>=1 0 1 - + // ... + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Argentina/La_Rioja"); + + assert_range("[1990-10-21 03:00:00, 1991-03-01 02:00:00) -02:00:00 60min -02", + tz->get_info(to_sys_seconds(1990y, std::chrono::October, 21d, 3h)), + tz->get_info(to_sys_seconds(1991y, std::chrono::March, 1d, 1h, 59min, 59s))); +} + +static void test_america_argentina_san_luis() { + // Z America/Argentina/San_Luis -4:25:24 - LMT 1894 O 31 + // ... + // -4 A -04/-03 1969 O 5 + // -3 A -03/-02 1990 + // -3 1 -02 1990 Mar 14 + // -4 - -04 1990 O 15 + // -4 1 -03 1991 Mar + // -4 - -04 1991 Jun + // -3 - -03 1999 O 3 + // -4 1 -03 2000 Mar 3 + // -4 - -04 2004 Jul 25 + // -3 A -03/-02 2008 Ja 21 + // -4 Sa -04/-03 2009 O 11 + // -3 - -03 + // + // ... + // R A 1988 o - D 1 0 1 - + // R A 1989 1993 - Mar Su>=1 0 0 - + // R A 1989 1992 - O Su>=15 0 1 - + // R A 1999 o - O Su>=1 0 1 - + // R A 2000 o - Mar 3 0 0 - + // R A 2007 o - D 30 0 1 - + // R A 2008 2009 - Mar Su>=15 0 0 - + // R A 2008 o - O Su>=15 0 1 - + // + // R Sa 2008 2009 - Mar Su>=8 0 0 - + // R Sa 2007 2008 - O Su>=8 0 1 - + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Argentina/San_Luis"); + + assert_range("[1989-10-15 03:00:00, 1990-03-14 02:00:00) -02:00:00 60min -02", + tz->get_info( // -3 A -03/-02 1990 & R A 1989 1992 - O Su>=15 0 1 - + to_sys_seconds(1989y, std::chrono::October, 15d, 3h)), + tz->get_info( // UNTIL -3 1 -02 1990 Mar 14 + to_sys_seconds(1990y, std::chrono::March, 14d, 1h, 59min, 59s))); + + assert_range("[2008-01-21 02:00:00, 2008-03-09 03:00:00) -03:00:00 60min -03", + tz->get_info(to_sys_seconds(2008y, std::chrono::January, 21d, 2h)), + tz->get_info(to_sys_seconds(2008y, std::chrono::March, 9d, 2h, 59min, 59s))); +} + +static void test_america_indiana_knox() { + // Z America/Indiana/Knox -5:46:30 - LMT 1883 N 18 12:13:30 + // -6 u C%sT 1947 + // -6 St C%sT 1962 Ap 29 2 + // -5 - EST 1963 O 27 2 + // -6 u C%sT 1991 O 27 2 + // -5 - EST 2006 Ap 2 2 + // -6 u C%sT + // + // ... + // R u 1976 1986 - Ap lastSu 2 1 D + // R u 1987 2006 - Ap Su>=1 2 1 D + // R u 2007 ma - Mar Su>=8 2 1 D + // R u 2007 ma - N Su>=1 2 0 S + + using namespace std::literals::chrono_literals; + const std::chrono::time_zone* tz = std::chrono::locate_zone("America/Indiana/Knox"); + + // The continuations + // -5 - EST + // -6 u C%sT + // have different offsets. The start time of the first active rule in + // RULE u should use the offset at the end of -5 - EST. + assert_range("[2006-04-02 07:00:00, 2006-10-29 07:00:00) -05:00:00 60min CDT", + tz->get_info(to_sys_seconds(2006y, std::chrono::April, 2d, 7h)), + tz->get_info(to_sys_seconds(2006y, std::chrono::October, 29d, 6h, 59min, 59s))); +} + +int main(int, const char**) { + // Basic tests + test_gmt(); + test_durations(); + test_indian_kerguelen(); + test_antarctica_syowa(); + test_asia_hong_kong(); + test_europe_berlin(); + + test_america_st_johns(); + + // Small tests for not-yet tested conditions + test_get_at_standard_time_universal(); + test_get_at_standard_time_standard(); + test_get_at_save_universal(); + test_get_at_rule_standard(); + test_get_at_rule_universal(); + + test_format_with_alternatives_west(); + test_format_with_alternatives_east(); + + // Tests based on bugs found + test_africa_algiers(); + test_africa_casablanca(); + test_africa_ceuta(); + test_africa_freetown(); + test_africa_windhoek(); + test_america_adak(); + test_america_argentina_buenos_aires(); + test_america_argentina_la_rioja(); + test_america_argentina_san_luis(); + test_america_auncion(); + test_america_ciudad_juarez(); + test_america_indiana_knox(); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp new file mode 100644 index 0000000000000..05328e2256c79 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp @@ -0,0 +1,129 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb, has-no-zdump + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// TODO TZDB Investigate +// XFAIL: target={{armv(7|8)l-linux-gnueabihf}} + +#include +#include +#include +#include + +#include "filesystem_test_helper.h" +#include "assert_macros.h" +#include "concat_macros.h" + +// The year range to validate. The dates used in practice are expected to be +// inside the tested range. +constexpr std::chrono::year first{1800}; +constexpr std::chrono::year last{2100}; + +// A custom sys_info class that also stores the name of the time zone. +// Its formatter matches the output of zdump. +struct sys_info : public std::chrono::sys_info { + sys_info(std::string_view name_, std::chrono::sys_info info) : std::chrono::sys_info{info}, name{name_} {} + + std::string name; +}; + +template <> +struct std::formatter { + template + constexpr typename ParseContext::iterator parse(ParseContext& ctx) { + return ctx.begin(); + } + + template + typename FormatContext::iterator format(const sys_info& info, FormatContext& ctx) const { + using namespace std::literals::chrono_literals; + + // Every "sys_info" entry of zdump consists of 2 lines. + // - 1 for first second of the range + // - 1 for last second of the range + // For example: + // Africa/Casablanca Sun Mar 25 02:00:00 2018 UT = Sun Mar 25 03:00:00 2018 +01 isdst=1 gmtoff=3600 + // Africa/Casablanca Sun May 13 01:59:59 2018 UT = Sun May 13 02:59:59 2018 +01 isdst=1 gmtoff=3600 + + if (info.begin != std::chrono::sys_seconds::min()) + ctx.advance_to(std::format_to( + ctx.out(), + "{} {:%a %b %e %H:%M:%S %Y} UT = {:%a %b %e %H:%M:%S %Y} {} isdst={:d} gmtoff={:%Q}\n", + info.name, + info.begin, + info.begin + info.offset, + info.abbrev, + info.save != 0s, + info.offset)); + + if (info.end != std::chrono::sys_seconds::max()) + ctx.advance_to(std::format_to( + ctx.out(), + "{} {:%a %b %e %H:%M:%S %Y} UT = {:%a %b %e %H:%M:%S %Y} {} isdst={:d} gmtoff={:%Q}\n", + info.name, + info.end - 1s, + info.end - 1s + info.offset, + info.abbrev, + info.save != 0s, + info.offset)); + + return ctx.out(); + } +}; + +void process(std::ostream& stream, const std::chrono::time_zone& zone) { + using namespace std::literals::chrono_literals; + + constexpr auto begin = std::chrono::time_point_cast( + static_cast(std::chrono::year_month_day{first, std::chrono::January, 1d})); + constexpr auto end = std::chrono::time_point_cast( + static_cast(std::chrono::year_month_day{last, std::chrono::January, 1d})); + + std::chrono::sys_seconds s = begin; + do { + sys_info info{zone.name(), zone.get_info(s)}; + + if (info.end >= end) + info.end = std::chrono::sys_seconds::max(); + + stream << std::format("{}", info); + s = info.end; + } while (s != std::chrono::sys_seconds::max()); +} + +// This test compares the output of the zdump against the output based on the +// standard library implementation. It tests all available time zones and +// validates them. The specification of how to use the IANA database is limited +// and the real database contains quite a number of "interesting" cases. +int main(int, const char**) { + scoped_test_env env; + const std::string file = env.create_file("zdump.txt"); + + const std::chrono::tzdb& tzdb = std::chrono::get_tzdb(); + for (const auto& zone : tzdb.zones) { + std::stringstream libcxx; + process(libcxx, zone); + + int result = std::system(std::format("zdump -V -c{},{} {} > {}", first, last, zone.name(), file).c_str()); + assert(result == 0); + + std::stringstream zdump; + zdump << std::ifstream(file).rdbuf(); + + TEST_REQUIRE( + libcxx.str() == zdump.str(), + TEST_WRITE_CONCATENATED("\nTZ=", zone.name(), "\nlibc++\n", libcxx.str(), "|\n\nzdump\n", zdump.str(), "|")); + } + + return 0; +} diff --git a/libcxx/test/std/utilities/expected/expected.bad/what.noexcept.compile.pass.cpp b/libcxx/test/std/utilities/expected/expected.bad/base.compile.pass.cpp similarity index 56% rename from libcxx/test/std/utilities/expected/expected.bad/what.noexcept.compile.pass.cpp rename to libcxx/test/std/utilities/expected/expected.bad/base.compile.pass.cpp index e6d050b2129d6..545215a3b1613 100644 --- a/libcxx/test/std/utilities/expected/expected.bad/what.noexcept.compile.pass.cpp +++ b/libcxx/test/std/utilities/expected/expected.bad/base.compile.pass.cpp @@ -7,19 +7,12 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 -// const char* what() const noexcept override; +// Make sure std::bad_expected_access inherits from std::bad_expected_access. #include -#include +#include -template -concept WhatNoexcept = - requires(const T& t) { - { t.what() } noexcept; - }; +struct Foo {}; -struct foo{}; - -static_assert(!WhatNoexcept); -static_assert(WhatNoexcept>); -static_assert(WhatNoexcept>); +static_assert(std::is_base_of_v, std::bad_expected_access>); +static_assert(std::is_base_of_v, std::bad_expected_access>); diff --git a/libcxx/test/std/utilities/expected/expected.bad/void-specialization.pass.cpp b/libcxx/test/std/utilities/expected/expected.bad/void-specialization.pass.cpp new file mode 100644 index 0000000000000..092e1153103c8 --- /dev/null +++ b/libcxx/test/std/utilities/expected/expected.bad/void-specialization.pass.cpp @@ -0,0 +1,83 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// template<> +// class bad_expected_access : public exception { +// protected: +// bad_expected_access() noexcept; +// bad_expected_access(const bad_expected_access&) noexcept; +// bad_expected_access(bad_expected_access&&) noexcept; +// bad_expected_access& operator=(const bad_expected_access&) noexcept; +// bad_expected_access& operator=(bad_expected_access&&) noexcept; +// ~bad_expected_access(); +// +// public: +// const char* what() const noexcept override; +// }; + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +struct Inherit : std::bad_expected_access {}; + +int main(int, char**) { + // base class + static_assert(std::is_base_of_v>); + + // default constructor + { + Inherit exc; + ASSERT_NOEXCEPT(Inherit()); + } + + // copy constructor + { + Inherit exc; + Inherit copy(exc); + ASSERT_NOEXCEPT(Inherit(exc)); + } + + // move constructor + { + Inherit exc; + Inherit copy(std::move(exc)); + ASSERT_NOEXCEPT(Inherit(std::move(exc))); + } + + // copy assignment + { + Inherit exc; + Inherit copy; + [[maybe_unused]] Inherit& result = (copy = exc); + ASSERT_NOEXCEPT(copy = exc); + } + + // move assignment + { + Inherit exc; + Inherit copy; + [[maybe_unused]] Inherit& result = (copy = std::move(exc)); + ASSERT_NOEXCEPT(copy = std::move(exc)); + } + + // what() + { + Inherit exc; + char const* what = exc.what(); + assert(what != nullptr); + ASSERT_NOEXCEPT(exc.what()); + } + + return 0; +} diff --git a/libcxx/test/std/utilities/expected/expected.bad/what.pass.cpp b/libcxx/test/std/utilities/expected/expected.bad/what.pass.cpp new file mode 100644 index 0000000000000..bc5e356161a74 --- /dev/null +++ b/libcxx/test/std/utilities/expected/expected.bad/what.pass.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// const char* what() const noexcept override; + +#include +#include +#include + +#include "test_macros.h" + +struct Foo {}; + +int main(int, char**) { + { + std::bad_expected_access const exc(99); + char const* what = exc.what(); + assert(what != nullptr); + ASSERT_NOEXCEPT(exc.what()); + } + { + std::bad_expected_access const exc(Foo{}); + char const* what = exc.what(); + assert(what != nullptr); + ASSERT_NOEXCEPT(exc.what()); + } + + return 0; +} diff --git a/libcxx/test/std/utilities/function.objects/func.bind.partial/bind_back.pass.cpp b/libcxx/test/std/utilities/function.objects/func.bind.partial/bind_back.pass.cpp new file mode 100644 index 0000000000000..01a96348d50c5 --- /dev/null +++ b/libcxx/test/std/utilities/function.objects/func.bind.partial/bind_back.pass.cpp @@ -0,0 +1,381 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// constexpr unspecified bind_back(F&& f, Args&&... args); + +#include + +#include +#include +#include +#include + +#include "callable_types.h" +#include "types.h" + +constexpr void test_basic_bindings() { + { // Bind arguments, call without arguments + { + auto f = std::bind_back(MakeTuple{}); + assert(f() == std::make_tuple()); + } + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}); + assert(f() == std::make_tuple(Elem<1>{})); + } + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}, Elem<2>{}); + assert(f() == std::make_tuple(Elem<1>{}, Elem<2>{})); + } + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}, Elem<2>{}, Elem<3>{}); + assert(f() == std::make_tuple(Elem<1>{}, Elem<2>{}, Elem<3>{})); + } + } + + { // Bind no arguments, call with arguments + { + auto f = std::bind_back(MakeTuple{}); + assert(f(Elem<1>{}) == std::make_tuple(Elem<1>{})); + } + { + auto f = std::bind_back(MakeTuple{}); + assert(f(Elem<1>{}, Elem<2>{}) == std::make_tuple(Elem<1>{}, Elem<2>{})); + } + { + auto f = std::bind_back(MakeTuple{}); + assert(f(Elem<1>{}, Elem<2>{}, Elem<3>{}) == std::make_tuple(Elem<1>{}, Elem<2>{}, Elem<3>{})); + } + } + + { // Bind arguments, call with arguments + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}); + assert(f(Elem<10>{}) == std::make_tuple(Elem<10>{}, Elem<1>{})); + } + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}, Elem<2>{}); + assert(f(Elem<10>{}) == std::make_tuple(Elem<10>{}, Elem<1>{}, Elem<2>{})); + } + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}, Elem<2>{}, Elem<3>{}); + assert(f(Elem<10>{}) == std::make_tuple(Elem<10>{}, Elem<1>{}, Elem<2>{}, Elem<3>{})); + } + + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}); + assert(f(Elem<10>{}, Elem<11>{}) == std::make_tuple(Elem<10>{}, Elem<11>{}, Elem<1>{})); + } + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}, Elem<2>{}); + assert(f(Elem<10>{}, Elem<11>{}) == std::make_tuple(Elem<10>{}, Elem<11>{}, Elem<1>{}, Elem<2>{})); + } + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}, Elem<2>{}, Elem<3>{}); + assert(f(Elem<10>{}, Elem<11>{}) == std::make_tuple(Elem<10>{}, Elem<11>{}, Elem<1>{}, Elem<2>{}, Elem<3>{})); + } + { + auto f = std::bind_back(MakeTuple{}, Elem<1>{}, Elem<2>{}, Elem<3>{}); + assert(f(Elem<10>{}, Elem<11>{}, Elem<12>{}) == + std::make_tuple(Elem<10>{}, Elem<11>{}, Elem<12>{}, Elem<1>{}, Elem<2>{}, Elem<3>{})); + } + } + + { // Basic tests with fundamental types + int n = 2; + int m = 1; + int sum = 0; + auto add = [](int x, int y) { return x + y; }; + auto add_n = [](int a, int b, int c, int d, int e, int f) { return a + b + c + d + e + f; }; + auto add_ref = [&](int x, int y) -> int& { return sum = x + y; }; + auto add_rref = [&](int x, int y) -> int&& { return std::move(sum = x + y); }; + + auto a = std::bind_back(add, m, n); + assert(a() == 3); + + auto b = std::bind_back(add_n, m, n, m, m, m, m); + assert(b() == 7); + + auto c = std::bind_back(add_n, n, m); + assert(c(1, 1, 1, 1) == 7); + + auto d = std::bind_back(add_ref, n, m); + std::same_as decltype(auto) dresult(d()); + assert(dresult == 3); + + auto e = std::bind_back(add_rref, n, m); + std::same_as decltype(auto) eresult(e()); + assert(eresult == 3); + + auto f = std::bind_back(add, n); + assert(f(3) == 5); + + auto g = std::bind_back(add, n, 1); + assert(g() == 3); + + auto h = std::bind_back(add_n, 1, 1, 1); + assert(h(2, 2, 2) == 9); + + auto i = std::bind_back(add_ref, n); + std::same_as decltype(auto) iresult(i(5)); + assert(iresult == 7); + + auto j = std::bind_back(add_rref, m); + std::same_as decltype(auto) jresult(j(4)); + assert(jresult == 5); + } +} + +constexpr void test_edge_cases() { + { // Make sure we don't treat std::reference_wrapper specially. + auto sub = [](std::reference_wrapper a, std::reference_wrapper b) { return a.get() - b.get(); }; + + int i = 1; + int j = 2; + auto f = std::bind_back(sub, std::ref(i)); + assert(f(std::ref(j)) == 1); + } + + { // Make sure we can call a function that's a pointer to a member function. + struct MemberFunction { + constexpr int foo(int x, int y) { return x * y; } + }; + + MemberFunction value; + auto fn = std::bind_back(&MemberFunction::foo, 2, 3); + assert(fn(value) == 6); + } + + { // Make sure we can call a function that's a pointer to a member object. + struct MemberObject { + int obj; + }; + + MemberObject value{.obj = 3}; + auto fn = std::bind_back(&MemberObject::obj); + assert(fn(value) == 3); + } +} + +constexpr void test_passing_arguments() { + { // Make sure that we copy the bound arguments into the unspecified-type. + auto add = [](int x, int y) { return x + y; }; + int n = 2; + auto f = std::bind_back(add, n, 1); + n = 100; + assert(f() == 3); + } + + { // Make sure we pass the bound arguments to the function object + // with the right value category. + { + auto was_copied = [](CopyMoveInfo info) { return info.copy_kind == CopyMoveInfo::copy; }; + CopyMoveInfo info; + auto f = std::bind_back(was_copied, info); + assert(f()); + } + + { + auto was_moved = [](CopyMoveInfo info) { return info.copy_kind == CopyMoveInfo::move; }; + CopyMoveInfo info; + auto f = std::bind_back(was_moved, info); + assert(std::move(f)()); + } + } +} + +constexpr void test_function_objects() { + { // Make sure we call the correctly cv-ref qualified operator() + // based on the value category of the bind_back unspecified-type. + struct X { + constexpr int operator()() & { return 1; } + constexpr int operator()() const& { return 2; } + constexpr int operator()() && { return 3; } + constexpr int operator()() const&& { return 4; } + }; + + auto f = std::bind_back(X{}); + using F = decltype(f); + assert(static_cast(f)() == 1); + assert(static_cast(f)() == 2); + assert(static_cast(f)() == 3); + assert(static_cast(f)() == 4); + } + + // Make sure the `bind_back` unspecified-type does not model invocable + // when the call would select a differently-qualified operator(). + // + // For example, if the call to `operator()() &` is ill-formed, the call to the unspecified-type + // should be ill-formed and not fall back to the `operator()() const&` overload. + { // Make sure we delete the & overload when the underlying call isn't valid. + { + struct X { + void operator()() & = delete; + void operator()() const&; + void operator()() &&; + void operator()() const&&; + }; + + using F = decltype(std::bind_back(X{})); + static_assert(!std::invocable); + static_assert(std::invocable); + static_assert(std::invocable); + static_assert(std::invocable); + } + + // There's no way to make sure we delete the const& overload when the underlying call isn't valid, + // so we can't check this one. + + { // Make sure we delete the && overload when the underlying call isn't valid. + struct X { + void operator()() &; + void operator()() const&; + void operator()() && = delete; + void operator()() const&&; + }; + + using F = decltype(std::bind_back(X{})); + static_assert(std::invocable); + static_assert(std::invocable); + static_assert(!std::invocable); + static_assert(std::invocable); + } + + { // Make sure we delete the const&& overload when the underlying call isn't valid. + struct X { + void operator()() &; + void operator()() const&; + void operator()() &&; + void operator()() const&& = delete; + }; + + using F = decltype(std::bind_back(X{})); + static_assert(std::invocable); + static_assert(std::invocable); + static_assert(std::invocable); + static_assert(!std::invocable); + } + } + + { // Extra value category tests + struct X {}; + + { + struct Y { + void operator()(X&&) const&; + void operator()(X&&) && = delete; + }; + + using F = decltype(std::bind_back(Y{})); + static_assert(std::invocable); + static_assert(!std::invocable); + } + + { + struct Y { + void operator()(const X&) const; + void operator()(X&&) const = delete; + }; + + using F = decltype(std::bind_back(Y{}, X{})); + static_assert(std::invocable); + static_assert(!std::invocable); + } + } +} + +constexpr void test_return_type() { + { // Test properties of the constructor of the unspecified-type returned by bind_back. + { // Test move constructor when function is move only. + MoveOnlyCallable value(true); + auto f = std::bind_back(std::move(value), 1); + assert(f()); + assert(f(1, 2, 3)); + + auto f1 = std::move(f); + assert(!f()); + assert(f1()); + assert(f1(1, 2, 3)); + + using F = decltype(f); + static_assert(std::is_move_constructible::value); + static_assert(!std::is_copy_constructible::value); + static_assert(!std::is_move_assignable::value); + static_assert(!std::is_copy_assignable::value); + } + + { // Test move constructor when function is copyable but not assignable. + CopyCallable value(true); + auto f = std::bind_back(value, 1); + assert(f()); + assert(f(1, 2, 3)); + + auto f1 = std::move(f); + assert(!f()); + assert(f1()); + assert(f1(1, 2, 3)); + + auto f2 = std::bind_back(std::move(value), 1); + assert(f1()); + assert(f2()); + assert(f2(1, 2, 3)); + + using F = decltype(f); + static_assert(std::is_move_constructible::value); + static_assert(std::is_copy_constructible::value); + static_assert(!std::is_move_assignable::value); + static_assert(!std::is_copy_assignable::value); + } + + { // Test constructors when function is copy assignable. + using F = decltype(std::bind_back(std::declval(), 1)); + static_assert(std::is_move_constructible::value); + static_assert(std::is_copy_constructible::value); + static_assert(std::is_move_assignable::value); + static_assert(std::is_copy_assignable::value); + } + + { // Test constructors when function is move assignable only. + using F = decltype(std::bind_back(std::declval(), 1)); + static_assert(std::is_move_constructible::value); + static_assert(!std::is_copy_constructible::value); + static_assert(std::is_move_assignable::value); + static_assert(!std::is_copy_assignable::value); + } + } + + { // Make sure bind_back's unspecified type's operator() is SFINAE-friendly. + using F = decltype(std::bind_back(std::declval(), 1)); + static_assert(!std::is_invocable::value); + static_assert(std::is_invocable::value); + static_assert(!std::is_invocable::value); + static_assert(!std::is_invocable::value); + } +} + +constexpr bool test() { + test_basic_bindings(); + test_edge_cases(); + test_passing_arguments(); + test_function_objects(); + test_return_type(); + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/utilities/function.objects/func.bind.partial/bind_back.verify.cpp b/libcxx/test/std/utilities/function.objects/func.bind.partial/bind_back.verify.cpp new file mode 100644 index 0000000000000..eb100c15f580d --- /dev/null +++ b/libcxx/test/std/utilities/function.objects/func.bind.partial/bind_back.verify.cpp @@ -0,0 +1,85 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// constexpr unspecified bind_back(F&& f, Args&&... args); + +#include + +#include "types.h" + +constexpr int pass(int n) { return n; } + +void test() { + { // Test calling constexpr function from non-constexpr `bind_back` result + auto f1 = std::bind_back(pass, 1); + static_assert(f1() == 1); // expected-error {{static assertion expression is not an integral constant expression}} + } + + { // Test calling `bind_back` with template function + auto f1 = std::bind_back(do_nothing, 2); + // expected-error@-1 {{no matching function for call to 'bind_back'}} + } + + { // Mandates: is_constructible_v, F> + struct F { + F() = default; + F(const F&) = default; + F(F&) = delete; + + void operator()() {} + }; + + F f; + auto f1 = std::bind_back(f); + // expected-error-re@*:* {{static assertion failed{{.*}}bind_back requires decay_t to be constructible from F}} + } + + { // Mandates: is_move_constructible_v> + struct F { + F() = default; + F(const F&) = default; + F(F&&) = delete; + + void operator()() {} + }; + + F f; + auto f1 = std::bind_back(f); + // expected-error-re@*:* {{static assertion failed{{.*}}bind_back requires decay_t to be move constructible}} + } + + { // Mandates: (is_constructible_v, Args> && ...) + struct Arg { + Arg() = default; + Arg(const Arg&) = default; + Arg(Arg&) = delete; + }; + + Arg x; + auto f = std::bind_back([](const Arg&) {}, x); + // expected-error-re@*:* {{static assertion failed{{.*}}bind_back requires all decay_t to be constructible from respective Args}} + // expected-error@*:* {{no matching constructor for initialization}} + } + + { // Mandates: (is_move_constructible_v> && ...) + struct Arg { + Arg() = default; + Arg(const Arg&) = default; + Arg(Arg&&) = delete; + }; + + Arg x; + auto f = std::bind_back([](Arg&) {}, x); + // expected-error-re@*:* {{static assertion failed{{.*}}bind_back requires all decay_t to be move constructible}} + } +} diff --git a/libcxx/test/std/utilities/function.objects/func.bind.partial/types.h b/libcxx/test/std/utilities/function.objects/func.bind.partial/types.h new file mode 100644 index 0000000000000..76ed4d478baac --- /dev/null +++ b/libcxx/test/std/utilities/function.objects/func.bind.partial/types.h @@ -0,0 +1,43 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef TEST_STD_UTILITIES_FUNCTION_OBJECTS_FUNC_BIND_PARTIAL_TYPES_H +#define TEST_STD_UTILITIES_FUNCTION_OBJECTS_FUNC_BIND_PARTIAL_TYPES_H + +#include +#include + +struct MakeTuple { + template + constexpr auto operator()(Args&&... args) const { + return std::make_tuple(std::forward(args)...); + } +}; + +template +struct Elem { + template + constexpr bool operator==(const Elem&) const { + return X == Y; + } +}; + +struct CopyMoveInfo { + enum { none, copy, move } copy_kind; + + constexpr CopyMoveInfo() : copy_kind(none) {} + constexpr CopyMoveInfo(const CopyMoveInfo&) : copy_kind(copy) {} + constexpr CopyMoveInfo(CopyMoveInfo&&) : copy_kind(move) {} +}; + +template +T do_nothing(T t) { + return t; +} + +#endif // TEST_STD_UTILITIES_FUNCTION_OBJECTS_FUNC_BIND_PARTIAL_TYPES_H diff --git a/libcxx/test/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp b/libcxx/test/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp index 6eb4e4a46e82f..0dee6a95f60a7 100644 --- a/libcxx/test/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -142,12 +143,13 @@ constexpr bool test() { // Basic tests with fundamental types { - int n = 2; - int m = 1; - auto add = [](int x, int y) { return x + y; }; - auto addN = [](int a, int b, int c, int d, int e, int f) { - return a + b + c + d + e + f; - }; + int n = 2; + int m = 1; + int sum = 0; + auto add = [](int x, int y) { return x + y; }; + auto addN = [](int a, int b, int c, int d, int e, int f) { return a + b + c + d + e + f; }; + auto add_ref = [&](int x, int y) -> int& { return sum = x + y; }; + auto add_rref = [&](int x, int y) -> int&& { return std::move(sum = x + y); }; auto a = std::bind_front(add, m, n); assert(a() == 3); @@ -158,6 +160,14 @@ constexpr bool test() { auto c = std::bind_front(addN, n, m); assert(c(1, 1, 1, 1) == 7); + auto d = std::bind_front(add_ref, n, m); + std::same_as decltype(auto) dresult(d()); + assert(dresult == 3); + + auto e = std::bind_front(add_rref, n, m); + std::same_as decltype(auto) eresult(e()); + assert(eresult == 3); + auto f = std::bind_front(add, n); assert(f(3) == 5); @@ -166,6 +176,14 @@ constexpr bool test() { auto h = std::bind_front(addN, 1, 1, 1); assert(h(2, 2, 2) == 9); + + auto i = std::bind_front(add_ref, n); + std::same_as decltype(auto) iresult(i(5)); + assert(iresult == 7); + + auto j = std::bind_front(add_rref, m); + std::same_as decltype(auto) jresult(j(4)); + assert(jresult == 5); } // Make sure we don't treat std::reference_wrapper specially. diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h index 24f69c758f365..7b2dcbb52d0c8 100644 --- a/libcxx/test/support/test_macros.h +++ b/libcxx/test/support/test_macros.h @@ -385,6 +385,10 @@ inline Tp const& DoNotOptimize(Tp const& value) { # define TEST_HAS_NO_UNICODE #endif +#if defined(_LIBCPP_HAS_OPEN_WITH_WCHAR) +# define TEST_HAS_OPEN_WITH_WCHAR +#endif + #if defined(_LIBCPP_HAS_NO_INT128) || defined(_MSVC_STL_VERSION) # define TEST_HAS_NO_INT128 #endif diff --git a/libcxx/utils/CMakeLists.txt b/libcxx/utils/CMakeLists.txt index 19bb9851c8674..7a573535e1ef8 100644 --- a/libcxx/utils/CMakeLists.txt +++ b/libcxx/utils/CMakeLists.txt @@ -48,6 +48,13 @@ add_custom_target(libcxx-generate-width-estimation-table "${LIBCXX_SOURCE_DIR}/include/__format/width_estimation_table.h" COMMENT "Generate the width estimation header") +add_custom_target(libcxx-indic-conjunct-break-table + COMMAND + "${Python3_EXECUTABLE}" + "${LIBCXX_SOURCE_DIR}/utils/generate_indic_conjunct_break_table.py" + "${LIBCXX_SOURCE_DIR}/include/__format/indic_conjunct_break_table.h" + COMMENT "Generate the Indic Conjunct Break header") + add_custom_target(libcxx-generate-iwyu-mapping COMMAND "${Python3_EXECUTABLE}" @@ -63,5 +70,6 @@ add_custom_target(libcxx-generate-files libcxx-generate-extended-grapheme-cluster-tests libcxx-generate-escaped-output-table libcxx-generate-width-estimation-table + libcxx-indic-conjunct-break-table libcxx-generate-iwyu-mapping COMMENT "Create all the auto-generated files in libc++ and its tests.") diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile index db88da20b977a..c77f6c435baf4 100644 --- a/libcxx/utils/ci/Dockerfile +++ b/libcxx/utils/ci/Dockerfile @@ -72,33 +72,32 @@ RUN sudo apt-get update \ RUN sudo apt-get update \ && sudo apt-get install -y \ - python3 \ - python3-distutils \ - python3-psutil \ - git \ - gdb \ - ccache \ - gpg \ - wget \ bash \ + ccache \ curl \ - python3 \ - python3-dev \ - libpython3-dev \ - uuid-dev \ - libncurses5-dev \ - swig3.0 \ - libxml2-dev \ - libedit-dev \ + gdb \ + git \ + gpg \ language-pack-en \ language-pack-fr \ language-pack-ja \ language-pack-ru \ language-pack-zh-hans \ + libedit-dev \ + libncurses5-dev \ + libpython3-dev \ + libxml2-dev \ lsb-release \ - wget \ - unzip \ + make \ + python3 \ + python3-dev \ + python3-distutils \ + python3-psutil \ software-properties-common \ + swig4.0 \ + unzip \ + uuid-dev \ + wget \ && sudo rm -rf /var/lib/apt/lists/* diff --git a/libcxx/utils/ci/oss-fuzz.sh b/libcxx/utils/ci/oss-fuzz.sh index e5723406a9ff3..03b59b294041f 100755 --- a/libcxx/utils/ci/oss-fuzz.sh +++ b/libcxx/utils/ci/oss-fuzz.sh @@ -23,7 +23,7 @@ for test in libcxx/test/libcxx/fuzzing/*.pass.cpp; do exe="$(basename ${test})" exe="${exe%.pass.cpp}" ${CXX} ${CXXFLAGS} \ - -std=c++14 \ + -std=c++20 \ -DLIBCPP_OSS_FUZZ \ -D_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS \ -nostdinc++ -cxx-isystem ${INSTALL}/include/c++/v1 \ diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 2905745355b68..a6f3eb174308b 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -374,7 +374,7 @@ bootstrapping-build) -B "${BUILD_DIR}" \ -GNinja -DCMAKE_MAKE_PROGRAM="${NINJA}" \ -DCMAKE_CXX_COMPILER_LAUNCHER="ccache" \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="${INSTALL_DIR}" \ -DLLVM_ENABLE_PROJECTS="clang" \ -DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi;libunwind" \ diff --git a/libcxx/utils/data/unicode/DerivedCoreProperties.txt b/libcxx/utils/data/unicode/DerivedCoreProperties.txt index 8b482b5c10ae4..220c55685d4b0 100644 --- a/libcxx/utils/data/unicode/DerivedCoreProperties.txt +++ b/libcxx/utils/data/unicode/DerivedCoreProperties.txt @@ -1,6 +1,6 @@ -# DerivedCoreProperties-15.0.0.txt -# Date: 2022-08-05, 22:17:05 GMT -# © 2022 Unicode®, Inc. +# DerivedCoreProperties-15.1.0.txt +# Date: 2023-08-07, 15:21:24 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -1397,11 +1397,12 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Alphabetic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Alphabetic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Alphabetic # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 137765 +# Total code points: 138387 # ================================================ @@ -6853,11 +6854,12 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; ID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; ID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; ID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136345 +# Total code points: 136967 # ================================================ @@ -7438,6 +7440,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1FE0..1FEC ; ID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA 1FF2..1FF4 ; ID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FFC ; ID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +200C..200D ; ID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 203F..2040 ; ID_Continue # Pc [2] UNDERTIE..CHARACTER TIE 2054 ; ID_Continue # Pc INVERTED UNDERTIE 2071 ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I @@ -7504,6 +7507,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 309D..309E ; ID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 309F ; ID_Continue # Lo HIRAGANA DIGRAPH YORI 30A1..30FA ; ID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; ID_Continue # Po KATAKANA MIDDLE DOT 30FC..30FE ; ID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK 30FF ; ID_Continue # Lo KATAKANA DIGRAPH KOTO 3105..312F ; ID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN @@ -7683,6 +7687,7 @@ FF10..FF19 ; ID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NIN FF21..FF3A ; ID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF3F ; ID_Continue # Pc FULLWIDTH LOW LINE FF41..FF5A ; ID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF65 ; ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT FF66..FF6F ; ID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU FF70 ; ID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF71..FF9D ; ID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N @@ -8207,12 +8212,13 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; ID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; ID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; ID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 139482 +# Total code points: 140108 # ================================================ @@ -8962,11 +8968,12 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; XID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; XID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; XID_Start # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136322 +# Total code points: 136944 # ================================================ @@ -9543,6 +9550,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1FE0..1FEC ; XID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA 1FF2..1FF4 ; XID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FFC ; XID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +200C..200D ; XID_Continue # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER 203F..2040 ; XID_Continue # Pc [2] UNDERTIE..CHARACTER TIE 2054 ; XID_Continue # Pc INVERTED UNDERTIE 2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I @@ -9608,6 +9616,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 309D..309E ; XID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 309F ; XID_Continue # Lo HIRAGANA DIGRAPH YORI 30A1..30FA ; XID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; XID_Continue # Po KATAKANA MIDDLE DOT 30FC..30FE ; XID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK 30FF ; XID_Continue # Lo KATAKANA DIGRAPH KOTO 3105..312F ; XID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN @@ -9793,6 +9802,7 @@ FF10..FF19 ; XID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NI FF21..FF3A ; XID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF3F ; XID_Continue # Pc FULLWIDTH LOW LINE FF41..FF5A ; XID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF65 ; XID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT FF66..FF6F ; XID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU FF70 ; XID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF71..FF9D ; XID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N @@ -10317,12 +10327,13 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; XID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; XID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; XID_Continue # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 139463 +# Total code points: 140089 # ================================================ @@ -10335,6 +10346,15 @@ E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTO # - FFF9..FFFB (Interlinear annotation format characters) # - 13430..13440 (Egyptian hieroglyph format characters) # - Prepended_Concatenation_Mark (Exceptional format characters that should be visible) +# +# There are currently no stability guarantees for DICP. However, the +# values of DICP interact with the derivation of XID_Continue +# and NFKC_CF, for which there are stability guarantees. +# Maintainers of this property should note that in the +# unlikely case that the DICP value changes for an existing character +# which is also XID_Continue=Yes, then exceptions must be put +# in place to ensure that the NFKC_CF mapping value for that +# existing character does not change. 00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN 034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER @@ -11602,7 +11622,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 2E80..2E99 ; Grapheme_Base # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; Grapheme_Base # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; Grapheme_Base # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE -2FF0..2FFB ; Grapheme_Base # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +2FF0..2FFF ; Grapheme_Base # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION 3000 ; Grapheme_Base # Zs IDEOGRAPHIC SPACE 3001..3003 ; Grapheme_Base # Po [3] IDEOGRAPHIC COMMA..DITTO MARK 3004 ; Grapheme_Base # So JAPANESE INDUSTRIAL STANDARD SYMBOL @@ -11657,6 +11677,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 3196..319F ; Grapheme_Base # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31A0..31BF ; Grapheme_Base # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH 31C0..31E3 ; Grapheme_Base # So [36] CJK STROKE T..CJK STROKE Q +31EF ; Grapheme_Base # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3220..3229 ; Grapheme_Base # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN @@ -12497,11 +12518,12 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Grapheme_Base # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Grapheme_Base # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Grapheme_Base # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 146986 +# Total code points: 147613 # ================================================ @@ -12572,4 +12594,239 @@ ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK # Total code points: 65 +# ================================================ + +# Derived Property: Indic_Conjunct_Break +# Generated from the Grapheme_Cluster_Break, Indic_Syllabic_Category, +# Canonical_Combining_Class, and Script properties as described in UAX #44: +# https://www.unicode.org/reports/tr44/. + +# All code points not explicitly listed for Indic_Conjunct_Break +# have the value None. + +# @missing: 0000..10FFFF; InCB; None + +# ================================================ + +# Indic_Conjunct_Break=Linker + +094D ; InCB; Linker # Mn DEVANAGARI SIGN VIRAMA +09CD ; InCB; Linker # Mn BENGALI SIGN VIRAMA +0ACD ; InCB; Linker # Mn GUJARATI SIGN VIRAMA +0B4D ; InCB; Linker # Mn ORIYA SIGN VIRAMA +0C4D ; InCB; Linker # Mn TELUGU SIGN VIRAMA +0D4D ; InCB; Linker # Mn MALAYALAM SIGN VIRAMA + +# Total code points: 6 + +# ================================================ + +# Indic_Conjunct_Break=Consonant + +0915..0939 ; InCB; Consonant # Lo [37] DEVANAGARI LETTER KA..DEVANAGARI LETTER HA +0958..095F ; InCB; Consonant # Lo [8] DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA +0978..097F ; InCB; Consonant # Lo [8] DEVANAGARI LETTER MARWARI DDA..DEVANAGARI LETTER BBA +0995..09A8 ; InCB; Consonant # Lo [20] BENGALI LETTER KA..BENGALI LETTER NA +09AA..09B0 ; InCB; Consonant # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; InCB; Consonant # Lo BENGALI LETTER LA +09B6..09B9 ; InCB; Consonant # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09DC..09DD ; InCB; Consonant # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF ; InCB; Consonant # Lo BENGALI LETTER YYA +09F0..09F1 ; InCB; Consonant # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +0A95..0AA8 ; InCB; Consonant # Lo [20] GUJARATI LETTER KA..GUJARATI LETTER NA +0AAA..0AB0 ; InCB; Consonant # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; InCB; Consonant # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; InCB; Consonant # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0AF9 ; InCB; Consonant # Lo GUJARATI LETTER ZHA +0B15..0B28 ; InCB; Consonant # Lo [20] ORIYA LETTER KA..ORIYA LETTER NA +0B2A..0B30 ; InCB; Consonant # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; InCB; Consonant # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; InCB; Consonant # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B5C..0B5D ; InCB; Consonant # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F ; InCB; Consonant # Lo ORIYA LETTER YYA +0B71 ; InCB; Consonant # Lo ORIYA LETTER WA +0C15..0C28 ; InCB; Consonant # Lo [20] TELUGU LETTER KA..TELUGU LETTER NA +0C2A..0C39 ; InCB; Consonant # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C58..0C5A ; InCB; Consonant # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0D15..0D3A ; InCB; Consonant # Lo [38] MALAYALAM LETTER KA..MALAYALAM LETTER TTTA + +# Total code points: 240 + +# ================================================ + +# Indic_Conjunct_Break=Extend + +0300..034E ; InCB; Extend # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW +0350..036F ; InCB; Extend # Mn [32] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING LATIN SMALL LETTER X +0483..0487 ; InCB; Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0591..05BD ; InCB; Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; InCB; Extend # Mn HEBREW POINT RAFE +05C1..05C2 ; InCB; Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; InCB; Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; InCB; Extend # Mn HEBREW POINT QAMATS QATAN +0610..061A ; InCB; Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; InCB; Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; InCB; Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; InCB; Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; InCB; Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; InCB; Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; InCB; Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; InCB; Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; InCB; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07EB..07F3 ; InCB; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; InCB; Extend # Mn NKO DANTAYALAN +0816..0819 ; InCB; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; InCB; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; InCB; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; InCB; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; InCB; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0898..089F ; InCB; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; InCB; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..08FF ; InCB; Extend # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA +093C ; InCB; Extend # Mn DEVANAGARI SIGN NUKTA +0951..0954 ; InCB; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +09BC ; InCB; Extend # Mn BENGALI SIGN NUKTA +09FE ; InCB; Extend # Mn BENGALI SANDHI MARK +0A3C ; InCB; Extend # Mn GURMUKHI SIGN NUKTA +0ABC ; InCB; Extend # Mn GUJARATI SIGN NUKTA +0B3C ; InCB; Extend # Mn ORIYA SIGN NUKTA +0C3C ; InCB; Extend # Mn TELUGU SIGN NUKTA +0C55..0C56 ; InCB; Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0CBC ; InCB; Extend # Mn KANNADA SIGN NUKTA +0D3B..0D3C ; InCB; Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0E38..0E3A ; InCB; Extend # Mn [3] THAI CHARACTER SARA U..THAI CHARACTER PHINTHU +0E48..0E4B ; InCB; Extend # Mn [4] THAI CHARACTER MAI EK..THAI CHARACTER MAI CHATTAWA +0EB8..0EBA ; InCB; Extend # Mn [3] LAO VOWEL SIGN U..LAO SIGN PALI VIRAMA +0EC8..0ECB ; InCB; Extend # Mn [4] LAO TONE MAI EK..LAO TONE MAI CATAWA +0F18..0F19 ; InCB; Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; InCB; Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; InCB; Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; InCB; Extend # Mn TIBETAN MARK TSA -PHRU +0F71..0F72 ; InCB; Extend # Mn [2] TIBETAN VOWEL SIGN AA..TIBETAN VOWEL SIGN I +0F74 ; InCB; Extend # Mn TIBETAN VOWEL SIGN U +0F7A..0F7D ; InCB; Extend # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO +0F80 ; InCB; Extend # Mn TIBETAN VOWEL SIGN REVERSED I +0F82..0F84 ; InCB; Extend # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA +0F86..0F87 ; InCB; Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0FC6 ; InCB; Extend # Mn TIBETAN SYMBOL PADMA GDAN +1037 ; InCB; Extend # Mn MYANMAR SIGN DOT BELOW +1039..103A ; InCB; Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +108D ; InCB; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +135D..135F ; InCB; Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1714 ; InCB; Extend # Mn TAGALOG SIGN VIRAMA +17D2 ; InCB; Extend # Mn KHMER SIGN COENG +17DD ; InCB; Extend # Mn KHMER SIGN ATTHACAN +18A9 ; InCB; Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1939..193B ; InCB; Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; InCB; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A60 ; InCB; Extend # Mn TAI THAM SIGN SAKOT +1A75..1A7C ; InCB; Extend # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; InCB; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; InCB; Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABF..1ACE ; InCB; Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B34 ; InCB; Extend # Mn BALINESE SIGN REREKAN +1B6B..1B73 ; InCB; Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1BAB ; InCB; Extend # Mn SUNDANESE SIGN VIRAMA +1BE6 ; InCB; Extend # Mn BATAK SIGN TOMPI +1C37 ; InCB; Extend # Mn LEPCHA SIGN NUKTA +1CD0..1CD2 ; InCB; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; InCB; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; InCB; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; InCB; Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; InCB; Extend # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; InCB; Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; InCB; Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200D ; InCB; Extend # Cf ZERO WIDTH JOINER +20D0..20DC ; InCB; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; InCB; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20F0 ; InCB; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; InCB; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; InCB; Extend # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; InCB; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; InCB; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; InCB; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; InCB; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; InCB; Extend # Mn COMBINING CYRILLIC VZMET +A674..A67D ; InCB; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; InCB; Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; InCB; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A82C ; InCB; Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8E0..A8F1 ; InCB; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A92B..A92D ; InCB; Extend # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU +A9B3 ; InCB; Extend # Mn JAVANESE SIGN CECAK TELU +AAB0 ; InCB; Extend # Mn TAI VIET MAI KANG +AAB2..AAB4 ; InCB; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; InCB; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; InCB; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; InCB; Extend # Mn TAI VIET TONE MAI THO +AAF6 ; InCB; Extend # Mn MEETEI MAYEK VIRAMA +ABED ; InCB; Extend # Mn MEETEI MAYEK APUN IYEK +FB1E ; InCB; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE20..FE2F ; InCB; Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +101FD ; InCB; Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; InCB; Extend # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; InCB; Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A0D ; InCB; Extend # Mn KHAROSHTHI SIGN DOUBLE RING BELOW +10A0F ; InCB; Extend # Mn KHAROSHTHI SIGN VISARGA +10A38..10A3A ; InCB; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; InCB; Extend # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; InCB; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; InCB; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; InCB; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFD..10EFF ; InCB; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10F46..10F50 ; InCB; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; InCB; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11070 ; InCB; Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA +1107F ; InCB; Extend # Mn BRAHMI NUMBER JOINER +110BA ; InCB; Extend # Mn KAITHI SIGN NUKTA +11100..11102 ; InCB; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11133..11134 ; InCB; Extend # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +11173 ; InCB; Extend # Mn MAHAJANI SIGN NUKTA +111CA ; InCB; Extend # Mn SHARADA SIGN NUKTA +11236 ; InCB; Extend # Mn KHOJKI SIGN NUKTA +112E9..112EA ; InCB; Extend # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA +1133B..1133C ; InCB; Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11366..1136C ; InCB; Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; InCB; Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11446 ; InCB; Extend # Mn NEWA SIGN NUKTA +1145E ; InCB; Extend # Mn NEWA SANDHI MARK +114C3 ; InCB; Extend # Mn TIRHUTA SIGN NUKTA +115C0 ; InCB; Extend # Mn SIDDHAM SIGN NUKTA +116B7 ; InCB; Extend # Mn TAKRI SIGN NUKTA +1172B ; InCB; Extend # Mn AHOM SIGN KILLER +1183A ; InCB; Extend # Mn DOGRA SIGN NUKTA +1193E ; InCB; Extend # Mn DIVES AKURU VIRAMA +11943 ; InCB; Extend # Mn DIVES AKURU SIGN NUKTA +11A34 ; InCB; Extend # Mn ZANABAZAR SQUARE SIGN VIRAMA +11A47 ; InCB; Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A99 ; InCB; Extend # Mn SOYOMBO SUBJOINER +11D42 ; InCB; Extend # Mn MASARAM GONDI SIGN NUKTA +11D44..11D45 ; InCB; Extend # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA +11D97 ; InCB; Extend # Mn GUNJALA GONDI VIRAMA +11F42 ; InCB; Extend # Mn KAWI CONJOINER +16AF0..16AF4 ; InCB; Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; InCB; Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +1BC9E ; InCB; Extend # Mn DUPLOYAN DOUBLE MARK +1D165 ; InCB; Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D167..1D169 ; InCB; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16E..1D172 ; InCB; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; InCB; Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; InCB; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; InCB; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; InCB; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1E000..1E006 ; InCB; Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; InCB; Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; InCB; Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; InCB; Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; InCB; Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E08F ; InCB; Extend # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E130..1E136 ; InCB; Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; InCB; Extend # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; InCB; Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E4EC..1E4EF ; InCB; Extend # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E8D0..1E8D6 ; InCB; Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; InCB; Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA + +# Total code points: 884 + # EOF diff --git a/libcxx/utils/data/unicode/DerivedGeneralCategory.txt b/libcxx/utils/data/unicode/DerivedGeneralCategory.txt index c6013ef25d834..285ffa8fb83ac 100644 --- a/libcxx/utils/data/unicode/DerivedGeneralCategory.txt +++ b/libcxx/utils/data/unicode/DerivedGeneralCategory.txt @@ -1,6 +1,6 @@ -# DerivedGeneralCategory-15.0.0.txt -# Date: 2022-04-26, 23:14:35 GMT -# © 2022 Unicode®, Inc. +# DerivedGeneralCategory-15.1.0.txt +# Date: 2023-07-28, 23:34:02 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -284,13 +284,12 @@ 2E9A ; Cn # 2EF4..2EFF ; Cn # [12] .. 2FD6..2FEF ; Cn # [26] .. -2FFC..2FFF ; Cn # [4] .. 3040 ; Cn # 3097..3098 ; Cn # [2] .. 3100..3104 ; Cn # [5] .. 3130 ; Cn # 318F ; Cn # -31E4..31EF ; Cn # [12] .. +31E4..31EE ; Cn # [11] .. 321F ; Cn # A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. @@ -713,7 +712,8 @@ FFFE..FFFF ; Cn # [2] .. 2B73A..2B73F ; Cn # [6] .. 2B81E..2B81F ; Cn # [2] .. 2CEA2..2CEAF ; Cn # [14] .. -2EBE1..2F7FF ; Cn # [3103] .. +2EBE1..2EBEF ; Cn # [15] .. +2EE5E..2F7FF ; Cn # [2466] .. 2FA1E..2FFFF ; Cn # [1506] .. 3134B..3134F ; Cn # [5] .. 323B0..E0000 ; Cn # [711761] .. @@ -723,7 +723,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 825345 +# Total code points: 824718 # ================================================ @@ -2649,11 +2649,12 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 2B740..2B81D ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Lo # [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Lo # [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Lo # [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 131612 +# Total code points: 132234 # ================================================ @@ -4092,7 +4093,7 @@ FFE3 ; Sk # FULLWIDTH MACRON 2E80..2E99 ; So # [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; So # [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; So # [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE -2FF0..2FFB ; So # [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +2FF0..2FFF ; So # [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION 3004 ; So # JAPANESE INDUSTRIAL STANDARD SYMBOL 3012..3013 ; So # [2] POSTAL MARK..GETA MARK 3020 ; So # POSTAL MARK FACE @@ -4101,6 +4102,7 @@ FFE3 ; Sk # FULLWIDTH MACRON 3190..3191 ; So # [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK 3196..319F ; So # [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31C0..31E3 ; So # [36] CJK STROKE T..CJK STROKE Q +31EF ; So # IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3200..321E ; So # [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 322A..3247 ; So # [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO 3250 ; So # PARTNERSHIP SIGN @@ -4191,7 +4193,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6634 +# Total code points: 6639 # ================================================ diff --git a/libcxx/utils/data/unicode/EastAsianWidth.txt b/libcxx/utils/data/unicode/EastAsianWidth.txt index 38b7076c02f78..02df4df475cbe 100644 --- a/libcxx/utils/data/unicode/EastAsianWidth.txt +++ b/libcxx/utils/data/unicode/EastAsianWidth.txt @@ -1,11 +1,11 @@ -# EastAsianWidth-15.0.0.txt -# Date: 2022-05-24, 17:40:20 GMT [KW, LI] -# © 2022 Unicode®, Inc. +# EastAsianWidth-15.1.0.txt +# Date: 2023-07-28, 23:34:08 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database -# For documentation, see https://www.unicode.org/reports/tr44/ +# For documentation, see https://www.unicode.org/reports/tr44/ # # East_Asian_Width Property # @@ -30,2590 +30,2592 @@ # Character ranges are specified as for other property files in the # Unicode Character Database. # -# For legacy reasons, there are no spaces before or after the semicolon -# which separates the two fields. The comments following the number sign -# "#" list the General_Category property value or the L& alias of the -# derived value LC, the Unicode character name or names, and, in lines -# with ranges of code points, the code point count in square brackets. +# The comments following the number sign "#" list the General_Category +# property value or the L& alias of the derived value LC, the Unicode +# character name or names, and, in lines with ranges of code points, +# the code point count in square brackets. # # For more information, see UAX #11: East Asian Width, # at https://www.unicode.org/reports/tr11/ # # @missing: 0000..10FFFF; N -0000..001F;N # Cc [32] .. -0020;Na # Zs SPACE -0021..0023;Na # Po [3] EXCLAMATION MARK..NUMBER SIGN -0024;Na # Sc DOLLAR SIGN -0025..0027;Na # Po [3] PERCENT SIGN..APOSTROPHE -0028;Na # Ps LEFT PARENTHESIS -0029;Na # Pe RIGHT PARENTHESIS -002A;Na # Po ASTERISK -002B;Na # Sm PLUS SIGN -002C;Na # Po COMMA -002D;Na # Pd HYPHEN-MINUS -002E..002F;Na # Po [2] FULL STOP..SOLIDUS -0030..0039;Na # Nd [10] DIGIT ZERO..DIGIT NINE -003A..003B;Na # Po [2] COLON..SEMICOLON -003C..003E;Na # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN -003F..0040;Na # Po [2] QUESTION MARK..COMMERCIAL AT -0041..005A;Na # Lu [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z -005B;Na # Ps LEFT SQUARE BRACKET -005C;Na # Po REVERSE SOLIDUS -005D;Na # Pe RIGHT SQUARE BRACKET -005E;Na # Sk CIRCUMFLEX ACCENT -005F;Na # Pc LOW LINE -0060;Na # Sk GRAVE ACCENT -0061..007A;Na # Ll [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -007B;Na # Ps LEFT CURLY BRACKET -007C;Na # Sm VERTICAL LINE -007D;Na # Pe RIGHT CURLY BRACKET -007E;Na # Sm TILDE -007F;N # Cc -0080..009F;N # Cc [32] .. -00A0;N # Zs NO-BREAK SPACE -00A1;A # Po INVERTED EXCLAMATION MARK -00A2..00A3;Na # Sc [2] CENT SIGN..POUND SIGN -00A4;A # Sc CURRENCY SIGN -00A5;Na # Sc YEN SIGN -00A6;Na # So BROKEN BAR -00A7;A # Po SECTION SIGN -00A8;A # Sk DIAERESIS -00A9;N # So COPYRIGHT SIGN -00AA;A # Lo FEMININE ORDINAL INDICATOR -00AB;N # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -00AC;Na # Sm NOT SIGN -00AD;A # Cf SOFT HYPHEN -00AE;A # So REGISTERED SIGN -00AF;Na # Sk MACRON -00B0;A # So DEGREE SIGN -00B1;A # Sm PLUS-MINUS SIGN -00B2..00B3;A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE -00B4;A # Sk ACUTE ACCENT -00B5;N # Ll MICRO SIGN -00B6..00B7;A # Po [2] PILCROW SIGN..MIDDLE DOT -00B8;A # Sk CEDILLA -00B9;A # No SUPERSCRIPT ONE -00BA;A # Lo MASCULINE ORDINAL INDICATOR -00BB;N # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -00BC..00BE;A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS -00BF;A # Po INVERTED QUESTION MARK -00C0..00C5;N # Lu [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE -00C6;A # Lu LATIN CAPITAL LETTER AE -00C7..00CF;N # Lu [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS -00D0;A # Lu LATIN CAPITAL LETTER ETH -00D1..00D6;N # Lu [6] LATIN CAPITAL LETTER N WITH TILDE..LATIN CAPITAL LETTER O WITH DIAERESIS -00D7;A # Sm MULTIPLICATION SIGN -00D8;A # Lu LATIN CAPITAL LETTER O WITH STROKE -00D9..00DD;N # Lu [5] LATIN CAPITAL LETTER U WITH GRAVE..LATIN CAPITAL LETTER Y WITH ACUTE -00DE..00E1;A # L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE -00E2..00E5;N # Ll [4] LATIN SMALL LETTER A WITH CIRCUMFLEX..LATIN SMALL LETTER A WITH RING ABOVE -00E6;A # Ll LATIN SMALL LETTER AE -00E7;N # Ll LATIN SMALL LETTER C WITH CEDILLA -00E8..00EA;A # Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX -00EB;N # Ll LATIN SMALL LETTER E WITH DIAERESIS -00EC..00ED;A # Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE -00EE..00EF;N # Ll [2] LATIN SMALL LETTER I WITH CIRCUMFLEX..LATIN SMALL LETTER I WITH DIAERESIS -00F0;A # Ll LATIN SMALL LETTER ETH -00F1;N # Ll LATIN SMALL LETTER N WITH TILDE -00F2..00F3;A # Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE -00F4..00F6;N # Ll [3] LATIN SMALL LETTER O WITH CIRCUMFLEX..LATIN SMALL LETTER O WITH DIAERESIS -00F7;A # Sm DIVISION SIGN -00F8..00FA;A # Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE -00FB;N # Ll LATIN SMALL LETTER U WITH CIRCUMFLEX -00FC;A # Ll LATIN SMALL LETTER U WITH DIAERESIS -00FD;N # Ll LATIN SMALL LETTER Y WITH ACUTE -00FE;A # Ll LATIN SMALL LETTER THORN -00FF;N # Ll LATIN SMALL LETTER Y WITH DIAERESIS -0100;N # Lu LATIN CAPITAL LETTER A WITH MACRON -0101;A # Ll LATIN SMALL LETTER A WITH MACRON -0102..0110;N # L& [15] LATIN CAPITAL LETTER A WITH BREVE..LATIN CAPITAL LETTER D WITH STROKE -0111;A # Ll LATIN SMALL LETTER D WITH STROKE -0112;N # Lu LATIN CAPITAL LETTER E WITH MACRON -0113;A # Ll LATIN SMALL LETTER E WITH MACRON -0114..011A;N # L& [7] LATIN CAPITAL LETTER E WITH BREVE..LATIN CAPITAL LETTER E WITH CARON -011B;A # Ll LATIN SMALL LETTER E WITH CARON -011C..0125;N # L& [10] LATIN CAPITAL LETTER G WITH CIRCUMFLEX..LATIN SMALL LETTER H WITH CIRCUMFLEX -0126..0127;A # L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE -0128..012A;N # L& [3] LATIN CAPITAL LETTER I WITH TILDE..LATIN CAPITAL LETTER I WITH MACRON -012B;A # Ll LATIN SMALL LETTER I WITH MACRON -012C..0130;N # L& [5] LATIN CAPITAL LETTER I WITH BREVE..LATIN CAPITAL LETTER I WITH DOT ABOVE -0131..0133;A # L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ -0134..0137;N # L& [4] LATIN CAPITAL LETTER J WITH CIRCUMFLEX..LATIN SMALL LETTER K WITH CEDILLA -0138;A # Ll LATIN SMALL LETTER KRA -0139..013E;N # L& [6] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER L WITH CARON -013F..0142;A # L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE -0143;N # Lu LATIN CAPITAL LETTER N WITH ACUTE -0144;A # Ll LATIN SMALL LETTER N WITH ACUTE -0145..0147;N # L& [3] LATIN CAPITAL LETTER N WITH CEDILLA..LATIN CAPITAL LETTER N WITH CARON -0148..014B;A # L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG -014C;N # Lu LATIN CAPITAL LETTER O WITH MACRON -014D;A # Ll LATIN SMALL LETTER O WITH MACRON -014E..0151;N # L& [4] LATIN CAPITAL LETTER O WITH BREVE..LATIN SMALL LETTER O WITH DOUBLE ACUTE -0152..0153;A # L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE -0154..0165;N # L& [18] LATIN CAPITAL LETTER R WITH ACUTE..LATIN SMALL LETTER T WITH CARON -0166..0167;A # L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE -0168..016A;N # L& [3] LATIN CAPITAL LETTER U WITH TILDE..LATIN CAPITAL LETTER U WITH MACRON -016B;A # Ll LATIN SMALL LETTER U WITH MACRON -016C..017F;N # L& [20] LATIN CAPITAL LETTER U WITH BREVE..LATIN SMALL LETTER LONG S -0180..01BA;N # L& [59] LATIN SMALL LETTER B WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL -01BB;N # Lo LATIN LETTER TWO WITH STROKE -01BC..01BF;N # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN -01C0..01C3;N # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK -01C4..01CD;N # L& [10] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER A WITH CARON -01CE;A # Ll LATIN SMALL LETTER A WITH CARON -01CF;N # Lu LATIN CAPITAL LETTER I WITH CARON -01D0;A # Ll LATIN SMALL LETTER I WITH CARON -01D1;N # Lu LATIN CAPITAL LETTER O WITH CARON -01D2;A # Ll LATIN SMALL LETTER O WITH CARON -01D3;N # Lu LATIN CAPITAL LETTER U WITH CARON -01D4;A # Ll LATIN SMALL LETTER U WITH CARON -01D5;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON -01D6;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON -01D7;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE -01D8;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE -01D9;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON -01DA;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON -01DB;N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE -01DC;A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE -01DD..024F;N # L& [115] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER Y WITH STROKE -0250;N # Ll LATIN SMALL LETTER TURNED A -0251;A # Ll LATIN SMALL LETTER ALPHA -0252..0260;N # Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK -0261;A # Ll LATIN SMALL LETTER SCRIPT G -0262..0293;N # Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL -0294;N # Lo LATIN LETTER GLOTTAL STOP -0295..02AF;N # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL -02B0..02C1;N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP -02C2..02C3;N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD -02C4;A # Sk MODIFIER LETTER UP ARROWHEAD -02C5;N # Sk MODIFIER LETTER DOWN ARROWHEAD -02C6;N # Lm MODIFIER LETTER CIRCUMFLEX ACCENT -02C7;A # Lm CARON -02C8;N # Lm MODIFIER LETTER VERTICAL LINE -02C9..02CB;A # Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT -02CC;N # Lm MODIFIER LETTER LOW VERTICAL LINE -02CD;A # Lm MODIFIER LETTER LOW MACRON -02CE..02CF;N # Lm [2] MODIFIER LETTER LOW GRAVE ACCENT..MODIFIER LETTER LOW ACUTE ACCENT -02D0;A # Lm MODIFIER LETTER TRIANGULAR COLON -02D1;N # Lm MODIFIER LETTER HALF TRIANGULAR COLON -02D2..02D7;N # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN -02D8..02DB;A # Sk [4] BREVE..OGONEK -02DC;N # Sk SMALL TILDE -02DD;A # Sk DOUBLE ACUTE ACCENT -02DE;N # Sk MODIFIER LETTER RHOTIC HOOK -02DF;A # Sk MODIFIER LETTER CROSS ACCENT -02E0..02E4;N # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP -02E5..02EB;N # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK -02EC;N # Lm MODIFIER LETTER VOICING -02ED;N # Sk MODIFIER LETTER UNASPIRATED -02EE;N # Lm MODIFIER LETTER DOUBLE APOSTROPHE -02EF..02FF;N # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW -0300..036F;A # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X -0370..0373;N # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI -0374;N # Lm GREEK NUMERAL SIGN -0375;N # Sk GREEK LOWER NUMERAL SIGN -0376..0377;N # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA -037A;N # Lm GREEK YPOGEGRAMMENI -037B..037D;N # Ll [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL -037E;N # Po GREEK QUESTION MARK -037F;N # Lu GREEK CAPITAL LETTER YOT -0384..0385;N # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS -0386;N # Lu GREEK CAPITAL LETTER ALPHA WITH TONOS -0387;N # Po GREEK ANO TELEIA -0388..038A;N # Lu [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS -038C;N # Lu GREEK CAPITAL LETTER OMICRON WITH TONOS -038E..0390;N # L& [3] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS -0391..03A1;A # Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO -03A3..03A9;A # Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA -03AA..03B0;N # L& [7] GREEK CAPITAL LETTER IOTA WITH DIALYTIKA..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS -03B1..03C1;A # Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO -03C2;N # Ll GREEK SMALL LETTER FINAL SIGMA -03C3..03C9;A # Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA -03CA..03F5;N # L& [44] GREEK SMALL LETTER IOTA WITH DIALYTIKA..GREEK LUNATE EPSILON SYMBOL -03F6;N # Sm GREEK REVERSED LUNATE EPSILON SYMBOL -03F7..03FF;N # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL -0400;N # Lu CYRILLIC CAPITAL LETTER IE WITH GRAVE -0401;A # Lu CYRILLIC CAPITAL LETTER IO -0402..040F;N # Lu [14] CYRILLIC CAPITAL LETTER DJE..CYRILLIC CAPITAL LETTER DZHE -0410..044F;A # L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA -0450;N # Ll CYRILLIC SMALL LETTER IE WITH GRAVE -0451;A # Ll CYRILLIC SMALL LETTER IO -0452..0481;N # L& [48] CYRILLIC SMALL LETTER DJE..CYRILLIC SMALL LETTER KOPPA -0482;N # So CYRILLIC THOUSANDS SIGN -0483..0487;N # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE -0488..0489;N # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN -048A..04FF;N # L& [118] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER HA WITH STROKE -0500..052F;N # L& [48] CYRILLIC CAPITAL LETTER KOMI DE..CYRILLIC SMALL LETTER EL WITH DESCENDER -0531..0556;N # Lu [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH -0559;N # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING -055A..055F;N # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK -0560..0588;N # Ll [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE -0589;N # Po ARMENIAN FULL STOP -058A;N # Pd ARMENIAN HYPHEN -058D..058E;N # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN -058F;N # Sc ARMENIAN DRAM SIGN -0591..05BD;N # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG -05BE;N # Pd HEBREW PUNCTUATION MAQAF -05BF;N # Mn HEBREW POINT RAFE -05C0;N # Po HEBREW PUNCTUATION PASEQ -05C1..05C2;N # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT -05C3;N # Po HEBREW PUNCTUATION SOF PASUQ -05C4..05C5;N # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C6;N # Po HEBREW PUNCTUATION NUN HAFUKHA -05C7;N # Mn HEBREW POINT QAMATS QATAN -05D0..05EA;N # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV -05EF..05F2;N # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD -05F3..05F4;N # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -0600..0605;N # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE -0606..0608;N # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY -0609..060A;N # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN -060B;N # Sc AFGHANI SIGN -060C..060D;N # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR -060E..060F;N # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA -0610..061A;N # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA -061B;N # Po ARABIC SEMICOLON -061C;N # Cf ARABIC LETTER MARK -061D..061F;N # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK -0620..063F;N # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE -0640;N # Lm ARABIC TATWEEL -0641..064A;N # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH -064B..065F;N # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW -0660..0669;N # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE -066A..066D;N # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR -066E..066F;N # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF -0670;N # Mn ARABIC LETTER SUPERSCRIPT ALEF -0671..06D3;N # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE -06D4;N # Po ARABIC FULL STOP -06D5;N # Lo ARABIC LETTER AE -06D6..06DC;N # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN -06DD;N # Cf ARABIC END OF AYAH -06DE;N # So ARABIC START OF RUB EL HIZB -06DF..06E4;N # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA -06E5..06E6;N # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH -06E7..06E8;N # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON -06E9;N # So ARABIC PLACE OF SAJDAH -06EA..06ED;N # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM -06EE..06EF;N # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V -06F0..06F9;N # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE -06FA..06FC;N # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW -06FD..06FE;N # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN -06FF;N # Lo ARABIC LETTER HEH WITH INVERTED V -0700..070D;N # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS -070F;N # Cf SYRIAC ABBREVIATION MARK -0710;N # Lo SYRIAC LETTER ALAPH -0711;N # Mn SYRIAC LETTER SUPERSCRIPT ALAPH -0712..072F;N # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH -0730..074A;N # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH -074D..074F;N # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE -0750..077F;N # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE -0780..07A5;N # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU -07A6..07B0;N # Mn [11] THAANA ABAFILI..THAANA SUKUN -07B1;N # Lo THAANA LETTER NAA -07C0..07C9;N # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE -07CA..07EA;N # Lo [33] NKO LETTER A..NKO LETTER JONA RA -07EB..07F3;N # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -07F4..07F5;N # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE -07F6;N # So NKO SYMBOL OO DENNEN -07F7..07F9;N # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK -07FA;N # Lm NKO LAJANYALAN -07FD;N # Mn NKO DANTAYALAN -07FE..07FF;N # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN -0800..0815;N # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF -0816..0819;N # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH -081A;N # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT -081B..0823;N # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A -0824;N # Lm SAMARITAN MODIFIER LETTER SHORT A -0825..0827;N # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U -0828;N # Lm SAMARITAN MODIFIER LETTER I -0829..082D;N # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA -0830..083E;N # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU -0840..0858;N # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN -0859..085B;N # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -085E;N # Po MANDAIC PUNCTUATION -0860..086A;N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA -0870..0887;N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT -0888;N # Sk ARABIC RAISED ROUND DOT -0889..088E;N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL -0890..0891;N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F;N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA -08A0..08C8;N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF -08C9;N # Lm ARABIC SMALL FARSI YEH -08CA..08E1;N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA -08E2;N # Cf ARABIC DISPUTED END OF AYAH -08E3..08FF;N # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA -0900..0902;N # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA -0903;N # Mc DEVANAGARI SIGN VISARGA -0904..0939;N # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA -093A;N # Mn DEVANAGARI VOWEL SIGN OE -093B;N # Mc DEVANAGARI VOWEL SIGN OOE -093C;N # Mn DEVANAGARI SIGN NUKTA -093D;N # Lo DEVANAGARI SIGN AVAGRAHA -093E..0940;N # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II -0941..0948;N # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI -0949..094C;N # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU -094D;N # Mn DEVANAGARI SIGN VIRAMA -094E..094F;N # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW -0950;N # Lo DEVANAGARI OM -0951..0957;N # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE -0958..0961;N # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL -0962..0963;N # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL -0964..0965;N # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA -0966..096F;N # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE -0970;N # Po DEVANAGARI ABBREVIATION SIGN -0971;N # Lm DEVANAGARI SIGN HIGH SPACING DOT -0972..097F;N # Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA -0980;N # Lo BENGALI ANJI -0981;N # Mn BENGALI SIGN CANDRABINDU -0982..0983;N # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA -0985..098C;N # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L -098F..0990;N # Lo [2] BENGALI LETTER E..BENGALI LETTER AI -0993..09A8;N # Lo [22] BENGALI LETTER O..BENGALI LETTER NA -09AA..09B0;N # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA -09B2;N # Lo BENGALI LETTER LA -09B6..09B9;N # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA -09BC;N # Mn BENGALI SIGN NUKTA -09BD;N # Lo BENGALI SIGN AVAGRAHA -09BE..09C0;N # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II -09C1..09C4;N # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR -09C7..09C8;N # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI -09CB..09CC;N # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU -09CD;N # Mn BENGALI SIGN VIRAMA -09CE;N # Lo BENGALI LETTER KHANDA TA -09D7;N # Mc BENGALI AU LENGTH MARK -09DC..09DD;N # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA -09DF..09E1;N # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL -09E2..09E3;N # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL -09E6..09EF;N # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE -09F0..09F1;N # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL -09F2..09F3;N # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN -09F4..09F9;N # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN -09FA;N # So BENGALI ISSHAR -09FB;N # Sc BENGALI GANDA MARK -09FC;N # Lo BENGALI LETTER VEDIC ANUSVARA -09FD;N # Po BENGALI ABBREVIATION SIGN -09FE;N # Mn BENGALI SANDHI MARK -0A01..0A02;N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI -0A03;N # Mc GURMUKHI SIGN VISARGA -0A05..0A0A;N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU -0A0F..0A10;N # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI -0A13..0A28;N # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA -0A2A..0A30;N # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA -0A32..0A33;N # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA -0A35..0A36;N # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA -0A38..0A39;N # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA -0A3C;N # Mn GURMUKHI SIGN NUKTA -0A3E..0A40;N # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II -0A41..0A42;N # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU -0A47..0A48;N # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI -0A4B..0A4D;N # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA -0A51;N # Mn GURMUKHI SIGN UDAAT -0A59..0A5C;N # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA -0A5E;N # Lo GURMUKHI LETTER FA -0A66..0A6F;N # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE -0A70..0A71;N # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK -0A72..0A74;N # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR -0A75;N # Mn GURMUKHI SIGN YAKASH -0A76;N # Po GURMUKHI ABBREVIATION SIGN -0A81..0A82;N # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA -0A83;N # Mc GUJARATI SIGN VISARGA -0A85..0A8D;N # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E -0A8F..0A91;N # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O -0A93..0AA8;N # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA -0AAA..0AB0;N # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA -0AB2..0AB3;N # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA -0AB5..0AB9;N # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA -0ABC;N # Mn GUJARATI SIGN NUKTA -0ABD;N # Lo GUJARATI SIGN AVAGRAHA -0ABE..0AC0;N # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II -0AC1..0AC5;N # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E -0AC7..0AC8;N # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI -0AC9;N # Mc GUJARATI VOWEL SIGN CANDRA O -0ACB..0ACC;N # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU -0ACD;N # Mn GUJARATI SIGN VIRAMA -0AD0;N # Lo GUJARATI OM -0AE0..0AE1;N # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL -0AE2..0AE3;N # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL -0AE6..0AEF;N # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE -0AF0;N # Po GUJARATI ABBREVIATION SIGN -0AF1;N # Sc GUJARATI RUPEE SIGN -0AF9;N # Lo GUJARATI LETTER ZHA -0AFA..0AFF;N # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE -0B01;N # Mn ORIYA SIGN CANDRABINDU -0B02..0B03;N # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA -0B05..0B0C;N # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L -0B0F..0B10;N # Lo [2] ORIYA LETTER E..ORIYA LETTER AI -0B13..0B28;N # Lo [22] ORIYA LETTER O..ORIYA LETTER NA -0B2A..0B30;N # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA -0B32..0B33;N # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA -0B35..0B39;N # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA -0B3C;N # Mn ORIYA SIGN NUKTA -0B3D;N # Lo ORIYA SIGN AVAGRAHA -0B3E;N # Mc ORIYA VOWEL SIGN AA -0B3F;N # Mn ORIYA VOWEL SIGN I -0B40;N # Mc ORIYA VOWEL SIGN II -0B41..0B44;N # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR -0B47..0B48;N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI -0B4B..0B4C;N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU -0B4D;N # Mn ORIYA SIGN VIRAMA -0B55..0B56;N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK -0B57;N # Mc ORIYA AU LENGTH MARK -0B5C..0B5D;N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA -0B5F..0B61;N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL -0B62..0B63;N # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL -0B66..0B6F;N # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE -0B70;N # So ORIYA ISSHAR -0B71;N # Lo ORIYA LETTER WA -0B72..0B77;N # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS -0B82;N # Mn TAMIL SIGN ANUSVARA -0B83;N # Lo TAMIL SIGN VISARGA -0B85..0B8A;N # Lo [6] TAMIL LETTER A..TAMIL LETTER UU -0B8E..0B90;N # Lo [3] TAMIL LETTER E..TAMIL LETTER AI -0B92..0B95;N # Lo [4] TAMIL LETTER O..TAMIL LETTER KA -0B99..0B9A;N # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA -0B9C;N # Lo TAMIL LETTER JA -0B9E..0B9F;N # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA -0BA3..0BA4;N # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA -0BA8..0BAA;N # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA -0BAE..0BB9;N # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA -0BBE..0BBF;N # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I -0BC0;N # Mn TAMIL VOWEL SIGN II -0BC1..0BC2;N # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU -0BC6..0BC8;N # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI -0BCA..0BCC;N # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU -0BCD;N # Mn TAMIL SIGN VIRAMA -0BD0;N # Lo TAMIL OM -0BD7;N # Mc TAMIL AU LENGTH MARK -0BE6..0BEF;N # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE -0BF0..0BF2;N # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND -0BF3..0BF8;N # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN -0BF9;N # Sc TAMIL RUPEE SIGN -0BFA;N # So TAMIL NUMBER SIGN -0C00;N # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE -0C01..0C03;N # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA -0C04;N # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE -0C05..0C0C;N # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L -0C0E..0C10;N # Lo [3] TELUGU LETTER E..TELUGU LETTER AI -0C12..0C28;N # Lo [23] TELUGU LETTER O..TELUGU LETTER NA -0C2A..0C39;N # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA -0C3C;N # Mn TELUGU SIGN NUKTA -0C3D;N # Lo TELUGU SIGN AVAGRAHA -0C3E..0C40;N # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II -0C41..0C44;N # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR -0C46..0C48;N # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI -0C4A..0C4D;N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA -0C55..0C56;N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK -0C58..0C5A;N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D;N # Lo TELUGU LETTER NAKAARA POLLU -0C60..0C61;N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL -0C62..0C63;N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL -0C66..0C6F;N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE -0C77;N # Po TELUGU SIGN SIDDHAM -0C78..0C7E;N # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR -0C7F;N # So TELUGU SIGN TUUMU -0C80;N # Lo KANNADA SIGN SPACING CANDRABINDU -0C81;N # Mn KANNADA SIGN CANDRABINDU -0C82..0C83;N # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA -0C84;N # Po KANNADA SIGN SIDDHAM -0C85..0C8C;N # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L -0C8E..0C90;N # Lo [3] KANNADA LETTER E..KANNADA LETTER AI -0C92..0CA8;N # Lo [23] KANNADA LETTER O..KANNADA LETTER NA -0CAA..0CB3;N # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA -0CB5..0CB9;N # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA -0CBC;N # Mn KANNADA SIGN NUKTA -0CBD;N # Lo KANNADA SIGN AVAGRAHA -0CBE;N # Mc KANNADA VOWEL SIGN AA -0CBF;N # Mn KANNADA VOWEL SIGN I -0CC0..0CC4;N # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR -0CC6;N # Mn KANNADA VOWEL SIGN E -0CC7..0CC8;N # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI -0CCA..0CCB;N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO -0CCC..0CCD;N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA -0CD5..0CD6;N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE;N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA -0CE0..0CE1;N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL -0CE2..0CE3;N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0CE6..0CEF;N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE -0CF1..0CF2;N # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA -0CF3;N # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT -0D00..0D01;N # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU -0D02..0D03;N # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA -0D04..0D0C;N # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L -0D0E..0D10;N # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI -0D12..0D3A;N # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA -0D3B..0D3C;N # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA -0D3D;N # Lo MALAYALAM SIGN AVAGRAHA -0D3E..0D40;N # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II -0D41..0D44;N # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR -0D46..0D48;N # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI -0D4A..0D4C;N # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU -0D4D;N # Mn MALAYALAM SIGN VIRAMA -0D4E;N # Lo MALAYALAM LETTER DOT REPH -0D4F;N # So MALAYALAM SIGN PARA -0D54..0D56;N # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL -0D57;N # Mc MALAYALAM AU LENGTH MARK -0D58..0D5E;N # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH -0D5F..0D61;N # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL -0D62..0D63;N # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL -0D66..0D6F;N # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE -0D70..0D78;N # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS -0D79;N # So MALAYALAM DATE MARK -0D7A..0D7F;N # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K -0D81;N # Mn SINHALA SIGN CANDRABINDU -0D82..0D83;N # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA -0D85..0D96;N # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA -0D9A..0DB1;N # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA -0DB3..0DBB;N # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA -0DBD;N # Lo SINHALA LETTER DANTAJA LAYANNA -0DC0..0DC6;N # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA -0DCA;N # Mn SINHALA SIGN AL-LAKUNA -0DCF..0DD1;N # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA -0DD2..0DD4;N # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA -0DD6;N # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA -0DD8..0DDF;N # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA -0DE6..0DEF;N # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE -0DF2..0DF3;N # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA -0DF4;N # Po SINHALA PUNCTUATION KUNDDALIYA -0E01..0E30;N # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A -0E31;N # Mn THAI CHARACTER MAI HAN-AKAT -0E32..0E33;N # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM -0E34..0E3A;N # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU -0E3F;N # Sc THAI CURRENCY SYMBOL BAHT -0E40..0E45;N # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO -0E46;N # Lm THAI CHARACTER MAIYAMOK -0E47..0E4E;N # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN -0E4F;N # Po THAI CHARACTER FONGMAN -0E50..0E59;N # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE -0E5A..0E5B;N # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT -0E81..0E82;N # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG -0E84;N # Lo LAO LETTER KHO TAM -0E86..0E8A;N # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM -0E8C..0EA3;N # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING -0EA5;N # Lo LAO LETTER LO LOOT -0EA7..0EB0;N # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A -0EB1;N # Mn LAO VOWEL SIGN MAI KAN -0EB2..0EB3;N # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM -0EB4..0EBC;N # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO -0EBD;N # Lo LAO SEMIVOWEL SIGN NYO -0EC0..0EC4;N # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI -0EC6;N # Lm LAO KO LA -0EC8..0ECE;N # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN -0ED0..0ED9;N # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDF;N # Lo [4] LAO HO NO..LAO LETTER KHMU NYO -0F00;N # Lo TIBETAN SYLLABLE OM -0F01..0F03;N # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA -0F04..0F12;N # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13;N # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN -0F14;N # Po TIBETAN MARK GTER TSHEG -0F15..0F17;N # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS -0F18..0F19;N # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS -0F1A..0F1F;N # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG -0F20..0F29;N # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE -0F2A..0F33;N # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO -0F34;N # So TIBETAN MARK BSDUS RTAGS -0F35;N # Mn TIBETAN MARK NGAS BZUNG NYI ZLA -0F36;N # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN -0F37;N # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS -0F38;N # So TIBETAN MARK CHE MGO -0F39;N # Mn TIBETAN MARK TSA -PHRU -0F3A;N # Ps TIBETAN MARK GUG RTAGS GYON -0F3B;N # Pe TIBETAN MARK GUG RTAGS GYAS -0F3C;N # Ps TIBETAN MARK ANG KHANG GYON -0F3D;N # Pe TIBETAN MARK ANG KHANG GYAS -0F3E..0F3F;N # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES -0F40..0F47;N # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA -0F49..0F6C;N # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA -0F71..0F7E;N # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO -0F7F;N # Mc TIBETAN SIGN RNAM BCAD -0F80..0F84;N # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA -0F85;N # Po TIBETAN MARK PALUTA -0F86..0F87;N # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -0F88..0F8C;N # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN -0F8D..0F97;N # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA -0F99..0FBC;N # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA -0FBE..0FC5;N # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE -0FC6;N # Mn TIBETAN SYMBOL PADMA GDAN -0FC7..0FCC;N # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL -0FCE..0FCF;N # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM -0FD0..0FD4;N # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA -0FD5..0FD8;N # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS -0FD9..0FDA;N # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS -1000..102A;N # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU -102B..102C;N # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA -102D..1030;N # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU -1031;N # Mc MYANMAR VOWEL SIGN E -1032..1037;N # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW -1038;N # Mc MYANMAR SIGN VISARGA -1039..103A;N # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT -103B..103C;N # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA -103D..103E;N # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA -103F;N # Lo MYANMAR LETTER GREAT SA -1040..1049;N # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE -104A..104F;N # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE -1050..1055;N # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL -1056..1057;N # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR -1058..1059;N # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL -105A..105D;N # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE -105E..1060;N # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA -1061;N # Lo MYANMAR LETTER SGAW KAREN SHA -1062..1064;N # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO -1065..1066;N # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA -1067..106D;N # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 -106E..1070;N # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA -1071..1074;N # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE -1075..1081;N # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA -1082;N # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA -1083..1084;N # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E -1085..1086;N # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y -1087..108C;N # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 -108D;N # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE -108E;N # Lo MYANMAR LETTER RUMAI PALAUNG FA -108F;N # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 -1090..1099;N # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE -109A..109C;N # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A -109D;N # Mn MYANMAR VOWEL SIGN AITON AI -109E..109F;N # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION -10A0..10C5;N # Lu [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE -10C7;N # Lu GEORGIAN CAPITAL LETTER YN -10CD;N # Lu GEORGIAN CAPITAL LETTER AEN -10D0..10FA;N # Ll [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN -10FB;N # Po GEORGIAN PARAGRAPH SEPARATOR -10FC;N # Lm MODIFIER LETTER GEORGIAN NAR -10FD..10FF;N # Ll [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN -1100..115F;W # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER -1160..11FF;N # Lo [160] HANGUL JUNGSEONG FILLER..HANGUL JONGSEONG SSANGNIEUN -1200..1248;N # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA -124A..124D;N # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE -1250..1256;N # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO -1258;N # Lo ETHIOPIC SYLLABLE QHWA -125A..125D;N # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE -1260..1288;N # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA -128A..128D;N # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE -1290..12B0;N # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA -12B2..12B5;N # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE -12B8..12BE;N # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO -12C0;N # Lo ETHIOPIC SYLLABLE KXWA -12C2..12C5;N # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE -12C8..12D6;N # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O -12D8..1310;N # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA -1312..1315;N # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE -1318..135A;N # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -135D..135F;N # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK -1360..1368;N # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR -1369..137C;N # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND -1380..138F;N # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE -1390..1399;N # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT -13A0..13F5;N # Lu [86] CHEROKEE LETTER A..CHEROKEE LETTER MV -13F8..13FD;N # Ll [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV -1400;N # Pd CANADIAN SYLLABICS HYPHEN -1401..166C;N # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA -166D;N # So CANADIAN SYLLABICS CHI SIGN -166E;N # Po CANADIAN SYLLABICS FULL STOP -166F..167F;N # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W -1680;N # Zs OGHAM SPACE MARK -1681..169A;N # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH -169B;N # Ps OGHAM FEATHER MARK -169C;N # Pe OGHAM REVERSED FEATHER MARK -16A0..16EA;N # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X -16EB..16ED;N # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION -16EE..16F0;N # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL -16F1..16F8;N # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC -1700..1711;N # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA -1712..1714;N # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA -1715;N # Mc TAGALOG SIGN PAMUDPOD -171F;N # Lo TAGALOG LETTER ARCHAIC RA -1720..1731;N # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA -1732..1733;N # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U -1734;N # Mc HANUNOO SIGN PAMUDPOD -1735..1736;N # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION -1740..1751;N # Lo [18] BUHID LETTER A..BUHID LETTER HA -1752..1753;N # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U -1760..176C;N # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA -176E..1770;N # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA -1772..1773;N # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -1780..17B3;N # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5;N # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA -17B6;N # Mc KHMER VOWEL SIGN AA -17B7..17BD;N # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA -17BE..17C5;N # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU -17C6;N # Mn KHMER SIGN NIKAHIT -17C7..17C8;N # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU -17C9..17D3;N # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT -17D4..17D6;N # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH -17D7;N # Lm KHMER SIGN LEK TOO -17D8..17DA;N # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT -17DB;N # Sc KHMER CURRENCY SYMBOL RIEL -17DC;N # Lo KHMER SIGN AVAKRAHASANYA -17DD;N # Mn KHMER SIGN ATTHACAN -17E0..17E9;N # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE -17F0..17F9;N # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON -1800..1805;N # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS -1806;N # Pd MONGOLIAN TODO SOFT HYPHEN -1807..180A;N # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU -180B..180D;N # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -180E;N # Cf MONGOLIAN VOWEL SEPARATOR -180F;N # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR -1810..1819;N # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE -1820..1842;N # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI -1843;N # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN -1844..1878;N # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS -1880..1884;N # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA -1885..1886;N # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA -1887..18A8;N # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA -18A9;N # Mn MONGOLIAN LETTER ALI GALI DAGALGA -18AA;N # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA -18B0..18F5;N # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S -1900..191E;N # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA -1920..1922;N # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U -1923..1926;N # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU -1927..1928;N # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O -1929..192B;N # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA -1930..1931;N # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA -1932;N # Mn LIMBU SMALL LETTER ANUSVARA -1933..1938;N # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA -1939..193B;N # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I -1940;N # So LIMBU SIGN LOO -1944..1945;N # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK -1946..194F;N # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE -1950..196D;N # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI -1970..1974;N # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 -1980..19AB;N # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA -19B0..19C9;N # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 -19D0..19D9;N # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE -19DA;N # No NEW TAI LUE THAM DIGIT ONE -19DE..19DF;N # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV -19E0..19FF;N # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC -1A00..1A16;N # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA -1A17..1A18;N # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U -1A19..1A1A;N # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O -1A1B;N # Mn BUGINESE VOWEL SIGN AE -1A1E..1A1F;N # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION -1A20..1A54;N # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA -1A55;N # Mc TAI THAM CONSONANT SIGN MEDIAL RA -1A56;N # Mn TAI THAM CONSONANT SIGN MEDIAL LA -1A57;N # Mc TAI THAM CONSONANT SIGN LA TANG LAI -1A58..1A5E;N # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA -1A60;N # Mn TAI THAM SIGN SAKOT -1A61;N # Mc TAI THAM VOWEL SIGN A -1A62;N # Mn TAI THAM VOWEL SIGN MAI SAT -1A63..1A64;N # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA -1A65..1A6C;N # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW -1A6D..1A72;N # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI -1A73..1A7C;N # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN -1A7F;N # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT -1A80..1A89;N # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE -1A90..1A99;N # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE -1AA0..1AA6;N # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA -1AA7;N # Lm TAI THAM SIGN MAI YAMOK -1AA8..1AAD;N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG -1AB0..1ABD;N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABE;N # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE;N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T -1B00..1B03;N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG -1B04;N # Mc BALINESE SIGN BISAH -1B05..1B33;N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA -1B34;N # Mn BALINESE SIGN REREKAN -1B35;N # Mc BALINESE VOWEL SIGN TEDUNG -1B36..1B3A;N # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA -1B3B;N # Mc BALINESE VOWEL SIGN RA REPA TEDUNG -1B3C;N # Mn BALINESE VOWEL SIGN LA LENGA -1B3D..1B41;N # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG -1B42;N # Mn BALINESE VOWEL SIGN PEPET -1B43..1B44;N # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG -1B45..1B4C;N # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA -1B50..1B59;N # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE -1B5A..1B60;N # Po [7] BALINESE PANTI..BALINESE PAMENENG -1B61..1B6A;N # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE -1B6B..1B73;N # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG -1B74..1B7C;N # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING -1B7D..1B7E;N # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG -1B80..1B81;N # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR -1B82;N # Mc SUNDANESE SIGN PANGWISAD -1B83..1BA0;N # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA -1BA1;N # Mc SUNDANESE CONSONANT SIGN PAMINGKAL -1BA2..1BA5;N # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU -1BA6..1BA7;N # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG -1BA8..1BA9;N # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG -1BAA;N # Mc SUNDANESE SIGN PAMAAEH -1BAB..1BAD;N # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA -1BAE..1BAF;N # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BB0..1BB9;N # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BBA..1BBF;N # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M -1BC0..1BE5;N # Lo [38] BATAK LETTER A..BATAK LETTER U -1BE6;N # Mn BATAK SIGN TOMPI -1BE7;N # Mc BATAK VOWEL SIGN E -1BE8..1BE9;N # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE -1BEA..1BEC;N # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O -1BED;N # Mn BATAK VOWEL SIGN KARO O -1BEE;N # Mc BATAK VOWEL SIGN U -1BEF..1BF1;N # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H -1BF2..1BF3;N # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN -1BFC..1BFF;N # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT -1C00..1C23;N # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A -1C24..1C2B;N # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU -1C2C..1C33;N # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T -1C34..1C35;N # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG -1C36..1C37;N # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA -1C3B..1C3F;N # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK -1C40..1C49;N # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE -1C4D..1C4F;N # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA -1C50..1C59;N # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE -1C5A..1C77;N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH -1C78..1C7D;N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C7E..1C7F;N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88;N # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK -1C90..1CBA;N # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN -1CBD..1CBF;N # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN -1CC0..1CC7;N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA -1CD0..1CD2;N # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA -1CD3;N # Po VEDIC SIGN NIHSHVASA -1CD4..1CE0;N # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA -1CE1;N # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA -1CE2..1CE8;N # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL -1CE9..1CEC;N # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL -1CED;N # Mn VEDIC SIGN TIRYAK -1CEE..1CF3;N # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA -1CF4;N # Mn VEDIC TONE CANDRA ABOVE -1CF5..1CF6;N # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA -1CF7;N # Mc VEDIC SIGN ATIKRAMA -1CF8..1CF9;N # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1CFA;N # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA -1D00..1D2B;N # Ll [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D6A;N # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI -1D6B..1D77;N # Ll [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G -1D78;N # Lm MODIFIER LETTER CYRILLIC EN -1D79..1D7F;N # Ll [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE -1D80..1D9A;N # Ll [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK -1D9B..1DBF;N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -1DC0..1DFF;N # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW -1E00..1EFF;N # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP -1F00..1F15;N # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA -1F18..1F1D;N # Lu [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA -1F20..1F45;N # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA -1F48..1F4D;N # Lu [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA -1F50..1F57;N # Ll [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI -1F59;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA -1F5B;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA -1F5D;N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA -1F5F..1F7D;N # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA -1F80..1FB4;N # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI -1FB6..1FBC;N # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI -1FBD;N # Sk GREEK KORONIS -1FBE;N # Ll GREEK PROSGEGRAMMENI -1FBF..1FC1;N # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI -1FC2..1FC4;N # Ll [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI -1FC6..1FCC;N # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI -1FCD..1FCF;N # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI -1FD0..1FD3;N # Ll [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA -1FD6..1FDB;N # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA -1FDD..1FDF;N # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI -1FE0..1FEC;N # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA -1FED..1FEF;N # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA -1FF2..1FF4;N # Ll [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI -1FF6..1FFC;N # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -1FFD..1FFE;N # Sk [2] GREEK OXIA..GREEK DASIA -2000..200A;N # Zs [11] EN QUAD..HAIR SPACE -200B..200F;N # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK -2010;A # Pd HYPHEN -2011..2012;N # Pd [2] NON-BREAKING HYPHEN..FIGURE DASH -2013..2015;A # Pd [3] EN DASH..HORIZONTAL BAR -2016;A # Po DOUBLE VERTICAL LINE -2017;N # Po DOUBLE LOW LINE -2018;A # Pi LEFT SINGLE QUOTATION MARK -2019;A # Pf RIGHT SINGLE QUOTATION MARK -201A;N # Ps SINGLE LOW-9 QUOTATION MARK -201B;N # Pi SINGLE HIGH-REVERSED-9 QUOTATION MARK -201C;A # Pi LEFT DOUBLE QUOTATION MARK -201D;A # Pf RIGHT DOUBLE QUOTATION MARK -201E;N # Ps DOUBLE LOW-9 QUOTATION MARK -201F;N # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK -2020..2022;A # Po [3] DAGGER..BULLET -2023;N # Po TRIANGULAR BULLET -2024..2027;A # Po [4] ONE DOT LEADER..HYPHENATION POINT -2028;N # Zl LINE SEPARATOR -2029;N # Zp PARAGRAPH SEPARATOR -202A..202E;N # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE -202F;N # Zs NARROW NO-BREAK SPACE -2030;A # Po PER MILLE SIGN -2031;N # Po PER TEN THOUSAND SIGN -2032..2033;A # Po [2] PRIME..DOUBLE PRIME -2034;N # Po TRIPLE PRIME -2035;A # Po REVERSED PRIME -2036..2038;N # Po [3] REVERSED DOUBLE PRIME..CARET -2039;N # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK -203A;N # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -203B;A # Po REFERENCE MARK -203C..203D;N # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG -203E;A # Po OVERLINE -203F..2040;N # Pc [2] UNDERTIE..CHARACTER TIE -2041..2043;N # Po [3] CARET INSERTION POINT..HYPHEN BULLET -2044;N # Sm FRACTION SLASH -2045;N # Ps LEFT SQUARE BRACKET WITH QUILL -2046;N # Pe RIGHT SQUARE BRACKET WITH QUILL -2047..2051;N # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY -2052;N # Sm COMMERCIAL MINUS SIGN -2053;N # Po SWUNG DASH -2054;N # Pc INVERTED UNDERTIE -2055..205E;N # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS -205F;N # Zs MEDIUM MATHEMATICAL SPACE -2060..2064;N # Cf [5] WORD JOINER..INVISIBLE PLUS -2066..206F;N # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES -2070;N # No SUPERSCRIPT ZERO -2071;N # Lm SUPERSCRIPT LATIN SMALL LETTER I -2074;A # No SUPERSCRIPT FOUR -2075..2079;N # No [5] SUPERSCRIPT FIVE..SUPERSCRIPT NINE -207A..207C;N # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN -207D;N # Ps SUPERSCRIPT LEFT PARENTHESIS -207E;N # Pe SUPERSCRIPT RIGHT PARENTHESIS -207F;A # Lm SUPERSCRIPT LATIN SMALL LETTER N -2080;N # No SUBSCRIPT ZERO -2081..2084;A # No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR -2085..2089;N # No [5] SUBSCRIPT FIVE..SUBSCRIPT NINE -208A..208C;N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN -208D;N # Ps SUBSCRIPT LEFT PARENTHESIS -208E;N # Pe SUBSCRIPT RIGHT PARENTHESIS -2090..209C;N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -20A0..20A8;N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN -20A9;H # Sc WON SIGN -20AA..20AB;N # Sc [2] NEW SHEQEL SIGN..DONG SIGN -20AC;A # Sc EURO SIGN -20AD..20C0;N # Sc [20] KIP SIGN..SOM SIGN -20D0..20DC;N # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE -20DD..20E0;N # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH -20E1;N # Mn COMBINING LEFT RIGHT ARROW ABOVE -20E2..20E4;N # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE -20E5..20F0;N # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE -2100..2101;N # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT -2102;N # Lu DOUBLE-STRUCK CAPITAL C -2103;A # So DEGREE CELSIUS -2104;N # So CENTRE LINE SYMBOL -2105;A # So CARE OF -2106;N # So CADA UNA -2107;N # Lu EULER CONSTANT -2108;N # So SCRUPLE -2109;A # So DEGREE FAHRENHEIT -210A..2112;N # L& [9] SCRIPT SMALL G..SCRIPT CAPITAL L -2113;A # Ll SCRIPT SMALL L -2114;N # So L B BAR SYMBOL -2115;N # Lu DOUBLE-STRUCK CAPITAL N -2116;A # So NUMERO SIGN -2117;N # So SOUND RECORDING COPYRIGHT -2118;N # Sm SCRIPT CAPITAL P -2119..211D;N # Lu [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R -211E..2120;N # So [3] PRESCRIPTION TAKE..SERVICE MARK -2121..2122;A # So [2] TELEPHONE SIGN..TRADE MARK SIGN -2123;N # So VERSICLE -2124;N # Lu DOUBLE-STRUCK CAPITAL Z -2125;N # So OUNCE SIGN -2126;A # Lu OHM SIGN -2127;N # So INVERTED OHM SIGN -2128;N # Lu BLACK-LETTER CAPITAL Z -2129;N # So TURNED GREEK SMALL LETTER IOTA -212A;N # Lu KELVIN SIGN -212B;A # Lu ANGSTROM SIGN -212C..212D;N # Lu [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C -212E;N # So ESTIMATED SYMBOL -212F..2134;N # L& [6] SCRIPT SMALL E..SCRIPT SMALL O -2135..2138;N # Lo [4] ALEF SYMBOL..DALET SYMBOL -2139;N # Ll INFORMATION SOURCE -213A..213B;N # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN -213C..213F;N # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI -2140..2144;N # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y -2145..2149;N # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J -214A;N # So PROPERTY LINE -214B;N # Sm TURNED AMPERSAND -214C..214D;N # So [2] PER SIGN..AKTIESELSKAB -214E;N # Ll TURNED SMALL F -214F;N # So SYMBOL FOR SAMARITAN SOURCE -2150..2152;N # No [3] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION ONE TENTH -2153..2154;A # No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS -2155..215A;N # No [6] VULGAR FRACTION ONE FIFTH..VULGAR FRACTION FIVE SIXTHS -215B..215E;A # No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS -215F;N # No FRACTION NUMERATOR ONE -2160..216B;A # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE -216C..216F;N # Nl [4] ROMAN NUMERAL FIFTY..ROMAN NUMERAL ONE THOUSAND -2170..2179;A # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN -217A..2182;N # Nl [9] SMALL ROMAN NUMERAL ELEVEN..ROMAN NUMERAL TEN THOUSAND -2183..2184;N # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C -2185..2188;N # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND -2189;A # No VULGAR FRACTION ZERO THIRDS -218A..218B;N # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE -2190..2194;A # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW -2195..2199;A # So [5] UP DOWN ARROW..SOUTH WEST ARROW -219A..219B;N # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE -219C..219F;N # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW -21A0;N # Sm RIGHTWARDS TWO HEADED ARROW -21A1..21A2;N # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL -21A3;N # Sm RIGHTWARDS ARROW WITH TAIL -21A4..21A5;N # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR -21A6;N # Sm RIGHTWARDS ARROW FROM BAR -21A7..21AD;N # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW -21AE;N # Sm LEFT RIGHT ARROW WITH STROKE -21AF..21B7;N # So [9] DOWNWARDS ZIGZAG ARROW..CLOCKWISE TOP SEMICIRCLE ARROW -21B8..21B9;A # So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR -21BA..21CD;N # So [20] ANTICLOCKWISE OPEN CIRCLE ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE -21CE..21CF;N # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE -21D0..21D1;N # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW -21D2;A # Sm RIGHTWARDS DOUBLE ARROW -21D3;N # So DOWNWARDS DOUBLE ARROW -21D4;A # Sm LEFT RIGHT DOUBLE ARROW -21D5..21E6;N # So [18] UP DOWN DOUBLE ARROW..LEFTWARDS WHITE ARROW -21E7;A # So UPWARDS WHITE ARROW -21E8..21F3;N # So [12] RIGHTWARDS WHITE ARROW..UP DOWN WHITE ARROW -21F4..21FF;N # Sm [12] RIGHT ARROW WITH SMALL CIRCLE..LEFT RIGHT OPEN-HEADED ARROW -2200;A # Sm FOR ALL -2201;N # Sm COMPLEMENT -2202..2203;A # Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS -2204..2206;N # Sm [3] THERE DOES NOT EXIST..INCREMENT -2207..2208;A # Sm [2] NABLA..ELEMENT OF -2209..220A;N # Sm [2] NOT AN ELEMENT OF..SMALL ELEMENT OF -220B;A # Sm CONTAINS AS MEMBER -220C..220E;N # Sm [3] DOES NOT CONTAIN AS MEMBER..END OF PROOF -220F;A # Sm N-ARY PRODUCT -2210;N # Sm N-ARY COPRODUCT -2211;A # Sm N-ARY SUMMATION -2212..2214;N # Sm [3] MINUS SIGN..DOT PLUS -2215;A # Sm DIVISION SLASH -2216..2219;N # Sm [4] SET MINUS..BULLET OPERATOR -221A;A # Sm SQUARE ROOT -221B..221C;N # Sm [2] CUBE ROOT..FOURTH ROOT -221D..2220;A # Sm [4] PROPORTIONAL TO..ANGLE -2221..2222;N # Sm [2] MEASURED ANGLE..SPHERICAL ANGLE -2223;A # Sm DIVIDES -2224;N # Sm DOES NOT DIVIDE -2225;A # Sm PARALLEL TO -2226;N # Sm NOT PARALLEL TO -2227..222C;A # Sm [6] LOGICAL AND..DOUBLE INTEGRAL -222D;N # Sm TRIPLE INTEGRAL -222E;A # Sm CONTOUR INTEGRAL -222F..2233;N # Sm [5] SURFACE INTEGRAL..ANTICLOCKWISE CONTOUR INTEGRAL -2234..2237;A # Sm [4] THEREFORE..PROPORTION -2238..223B;N # Sm [4] DOT MINUS..HOMOTHETIC -223C..223D;A # Sm [2] TILDE OPERATOR..REVERSED TILDE -223E..2247;N # Sm [10] INVERTED LAZY S..NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO -2248;A # Sm ALMOST EQUAL TO -2249..224B;N # Sm [3] NOT ALMOST EQUAL TO..TRIPLE TILDE -224C;A # Sm ALL EQUAL TO -224D..2251;N # Sm [5] EQUIVALENT TO..GEOMETRICALLY EQUAL TO -2252;A # Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF -2253..225F;N # Sm [13] IMAGE OF OR APPROXIMATELY EQUAL TO..QUESTIONED EQUAL TO -2260..2261;A # Sm [2] NOT EQUAL TO..IDENTICAL TO -2262..2263;N # Sm [2] NOT IDENTICAL TO..STRICTLY EQUIVALENT TO -2264..2267;A # Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO -2268..2269;N # Sm [2] LESS-THAN BUT NOT EQUAL TO..GREATER-THAN BUT NOT EQUAL TO -226A..226B;A # Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN -226C..226D;N # Sm [2] BETWEEN..NOT EQUIVALENT TO -226E..226F;A # Sm [2] NOT LESS-THAN..NOT GREATER-THAN -2270..2281;N # Sm [18] NEITHER LESS-THAN NOR EQUAL TO..DOES NOT SUCCEED -2282..2283;A # Sm [2] SUBSET OF..SUPERSET OF -2284..2285;N # Sm [2] NOT A SUBSET OF..NOT A SUPERSET OF -2286..2287;A # Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO -2288..2294;N # Sm [13] NEITHER A SUBSET OF NOR EQUAL TO..SQUARE CUP -2295;A # Sm CIRCLED PLUS -2296..2298;N # Sm [3] CIRCLED MINUS..CIRCLED DIVISION SLASH -2299;A # Sm CIRCLED DOT OPERATOR -229A..22A4;N # Sm [11] CIRCLED RING OPERATOR..DOWN TACK -22A5;A # Sm UP TACK -22A6..22BE;N # Sm [25] ASSERTION..RIGHT ANGLE WITH ARC -22BF;A # Sm RIGHT TRIANGLE -22C0..22FF;N # Sm [64] N-ARY LOGICAL AND..Z NOTATION BAG MEMBERSHIP -2300..2307;N # So [8] DIAMETER SIGN..WAVY LINE -2308;N # Ps LEFT CEILING -2309;N # Pe RIGHT CEILING -230A;N # Ps LEFT FLOOR -230B;N # Pe RIGHT FLOOR -230C..2311;N # So [6] BOTTOM RIGHT CROP..SQUARE LOZENGE -2312;A # So ARC -2313..2319;N # So [7] SEGMENT..TURNED NOT SIGN -231A..231B;W # So [2] WATCH..HOURGLASS -231C..231F;N # So [4] TOP LEFT CORNER..BOTTOM RIGHT CORNER -2320..2321;N # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL -2322..2328;N # So [7] FROWN..KEYBOARD -2329;W # Ps LEFT-POINTING ANGLE BRACKET -232A;W # Pe RIGHT-POINTING ANGLE BRACKET -232B..237B;N # So [81] ERASE TO THE LEFT..NOT CHECK MARK -237C;N # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW -237D..239A;N # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL -239B..23B3;N # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM -23B4..23DB;N # So [40] TOP SQUARE BRACKET..FUSE -23DC..23E1;N # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET -23E2..23E8;N # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL -23E9..23EC;W # So [4] BLACK RIGHT-POINTING DOUBLE TRIANGLE..BLACK DOWN-POINTING DOUBLE TRIANGLE -23ED..23EF;N # So [3] BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR -23F0;W # So ALARM CLOCK -23F1..23F2;N # So [2] STOPWATCH..TIMER CLOCK -23F3;W # So HOURGLASS WITH FLOWING SAND -23F4..23FF;N # So [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL -2400..2426;N # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO -2440..244A;N # So [11] OCR HOOK..OCR DOUBLE BACKSLASH -2460..249B;A # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP -249C..24E9;A # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z -24EA;N # No CIRCLED DIGIT ZERO -24EB..24FF;A # No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO -2500..254B;A # So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL -254C..254F;N # So [4] BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL..BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL -2550..2573;A # So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS -2574..257F;N # So [12] BOX DRAWINGS LIGHT LEFT..BOX DRAWINGS HEAVY UP AND LIGHT DOWN -2580..258F;A # So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK -2590..2591;N # So [2] RIGHT HALF BLOCK..LIGHT SHADE -2592..2595;A # So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK -2596..259F;N # So [10] QUADRANT LOWER LEFT..QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT -25A0..25A1;A # So [2] BLACK SQUARE..WHITE SQUARE -25A2;N # So WHITE SQUARE WITH ROUNDED CORNERS -25A3..25A9;A # So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL -25AA..25B1;N # So [8] BLACK SMALL SQUARE..WHITE PARALLELOGRAM -25B2..25B3;A # So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE -25B4..25B5;N # So [2] BLACK UP-POINTING SMALL TRIANGLE..WHITE UP-POINTING SMALL TRIANGLE -25B6;A # So BLACK RIGHT-POINTING TRIANGLE -25B7;A # Sm WHITE RIGHT-POINTING TRIANGLE -25B8..25BB;N # So [4] BLACK RIGHT-POINTING SMALL TRIANGLE..WHITE RIGHT-POINTING POINTER -25BC..25BD;A # So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE -25BE..25BF;N # So [2] BLACK DOWN-POINTING SMALL TRIANGLE..WHITE DOWN-POINTING SMALL TRIANGLE -25C0;A # So BLACK LEFT-POINTING TRIANGLE -25C1;A # Sm WHITE LEFT-POINTING TRIANGLE -25C2..25C5;N # So [4] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE LEFT-POINTING POINTER -25C6..25C8;A # So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND -25C9..25CA;N # So [2] FISHEYE..LOZENGE -25CB;A # So WHITE CIRCLE -25CC..25CD;N # So [2] DOTTED CIRCLE..CIRCLE WITH VERTICAL FILL -25CE..25D1;A # So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK -25D2..25E1;N # So [16] CIRCLE WITH LOWER HALF BLACK..LOWER HALF CIRCLE -25E2..25E5;A # So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE -25E6..25EE;N # So [9] WHITE BULLET..UP-POINTING TRIANGLE WITH RIGHT HALF BLACK -25EF;A # So LARGE CIRCLE -25F0..25F7;N # So [8] WHITE SQUARE WITH UPPER LEFT QUADRANT..WHITE CIRCLE WITH UPPER RIGHT QUADRANT -25F8..25FC;N # Sm [5] UPPER LEFT TRIANGLE..BLACK MEDIUM SQUARE -25FD..25FE;W # Sm [2] WHITE MEDIUM SMALL SQUARE..BLACK MEDIUM SMALL SQUARE -25FF;N # Sm LOWER RIGHT TRIANGLE -2600..2604;N # So [5] BLACK SUN WITH RAYS..COMET -2605..2606;A # So [2] BLACK STAR..WHITE STAR -2607..2608;N # So [2] LIGHTNING..THUNDERSTORM -2609;A # So SUN -260A..260D;N # So [4] ASCENDING NODE..OPPOSITION -260E..260F;A # So [2] BLACK TELEPHONE..WHITE TELEPHONE -2610..2613;N # So [4] BALLOT BOX..SALTIRE -2614..2615;W # So [2] UMBRELLA WITH RAIN DROPS..HOT BEVERAGE -2616..261B;N # So [6] WHITE SHOGI PIECE..BLACK RIGHT POINTING INDEX -261C;A # So WHITE LEFT POINTING INDEX -261D;N # So WHITE UP POINTING INDEX -261E;A # So WHITE RIGHT POINTING INDEX -261F..263F;N # So [33] WHITE DOWN POINTING INDEX..MERCURY -2640;A # So FEMALE SIGN -2641;N # So EARTH -2642;A # So MALE SIGN -2643..2647;N # So [5] JUPITER..PLUTO -2648..2653;W # So [12] ARIES..PISCES -2654..265F;N # So [12] WHITE CHESS KING..BLACK CHESS PAWN -2660..2661;A # So [2] BLACK SPADE SUIT..WHITE HEART SUIT -2662;N # So WHITE DIAMOND SUIT -2663..2665;A # So [3] BLACK CLUB SUIT..BLACK HEART SUIT -2666;N # So BLACK DIAMOND SUIT -2667..266A;A # So [4] WHITE CLUB SUIT..EIGHTH NOTE -266B;N # So BEAMED EIGHTH NOTES -266C..266D;A # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN -266E;N # So MUSIC NATURAL SIGN -266F;A # Sm MUSIC SHARP SIGN -2670..267E;N # So [15] WEST SYRIAC CROSS..PERMANENT PAPER SIGN -267F;W # So WHEELCHAIR SYMBOL -2680..2692;N # So [19] DIE FACE-1..HAMMER AND PICK -2693;W # So ANCHOR -2694..269D;N # So [10] CROSSED SWORDS..OUTLINED WHITE STAR -269E..269F;A # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT -26A0;N # So WARNING SIGN -26A1;W # So HIGH VOLTAGE SIGN -26A2..26A9;N # So [8] DOUBLED FEMALE SIGN..HORIZONTAL MALE WITH STROKE SIGN -26AA..26AB;W # So [2] MEDIUM WHITE CIRCLE..MEDIUM BLACK CIRCLE -26AC..26BC;N # So [17] MEDIUM SMALL WHITE CIRCLE..SESQUIQUADRATE -26BD..26BE;W # So [2] SOCCER BALL..BASEBALL -26BF;A # So SQUARED KEY -26C0..26C3;N # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING -26C4..26C5;W # So [2] SNOWMAN WITHOUT SNOW..SUN BEHIND CLOUD -26C6..26CD;A # So [8] RAIN..DISABLED CAR -26CE;W # So OPHIUCHUS -26CF..26D3;A # So [5] PICK..CHAINS -26D4;W # So NO ENTRY -26D5..26E1;A # So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2 -26E2;N # So ASTRONOMICAL SYMBOL FOR URANUS -26E3;A # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE -26E4..26E7;N # So [4] PENTAGRAM..INVERTED PENTAGRAM -26E8..26E9;A # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE -26EA;W # So CHURCH -26EB..26F1;A # So [7] CASTLE..UMBRELLA ON GROUND -26F2..26F3;W # So [2] FOUNTAIN..FLAG IN HOLE -26F4;A # So FERRY -26F5;W # So SAILBOAT -26F6..26F9;A # So [4] SQUARE FOUR CORNERS..PERSON WITH BALL -26FA;W # So TENT -26FB..26FC;A # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL -26FD;W # So FUEL PUMP -26FE..26FF;A # So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE -2700..2704;N # So [5] BLACK SAFETY SCISSORS..WHITE SCISSORS -2705;W # So WHITE HEAVY CHECK MARK -2706..2709;N # So [4] TELEPHONE LOCATION SIGN..ENVELOPE -270A..270B;W # So [2] RAISED FIST..RAISED HAND -270C..2727;N # So [28] VICTORY HAND..WHITE FOUR POINTED STAR -2728;W # So SPARKLES -2729..273C;N # So [20] STRESS OUTLINED WHITE STAR..OPEN CENTRE TEARDROP-SPOKED ASTERISK -273D;A # So HEAVY TEARDROP-SPOKED ASTERISK -273E..274B;N # So [14] SIX PETALLED BLACK AND WHITE FLORETTE..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK -274C;W # So CROSS MARK -274D;N # So SHADOWED WHITE CIRCLE -274E;W # So NEGATIVE SQUARED CROSS MARK -274F..2752;N # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE -2753..2755;W # So [3] BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK ORNAMENT -2756;N # So BLACK DIAMOND MINUS WHITE X -2757;W # So HEAVY EXCLAMATION MARK SYMBOL -2758..2767;N # So [16] LIGHT VERTICAL BAR..ROTATED FLORAL HEART BULLET -2768;N # Ps MEDIUM LEFT PARENTHESIS ORNAMENT -2769;N # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT -276A;N # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT -276B;N # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT -276C;N # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT -276D;N # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT -276E;N # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT -276F;N # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT -2770;N # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT -2771;N # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT -2772;N # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT -2773;N # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT -2774;N # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT -2775;N # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT -2776..277F;A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN -2780..2793;N # No [20] DINGBAT CIRCLED SANS-SERIF DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN -2794;N # So HEAVY WIDE-HEADED RIGHTWARDS ARROW -2795..2797;W # So [3] HEAVY PLUS SIGN..HEAVY DIVISION SIGN -2798..27AF;N # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW -27B0;W # So CURLY LOOP -27B1..27BE;N # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW -27BF;W # So DOUBLE CURLY LOOP -27C0..27C4;N # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET -27C5;N # Ps LEFT S-SHAPED BAG DELIMITER -27C6;N # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27E5;N # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK -27E6;Na # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET -27E7;Na # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET -27E8;Na # Ps MATHEMATICAL LEFT ANGLE BRACKET -27E9;Na # Pe MATHEMATICAL RIGHT ANGLE BRACKET -27EA;Na # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET -27EB;Na # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET -27EC;Na # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET -27ED;Na # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET -27EE;N # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS -27EF;N # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS -27F0..27FF;N # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW -2800..28FF;N # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 -2900..297F;N # Sm [128] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..DOWN FISH TAIL -2980..2982;N # Sm [3] TRIPLE VERTICAL BAR DELIMITER..Z NOTATION TYPE COLON -2983;N # Ps LEFT WHITE CURLY BRACKET -2984;N # Pe RIGHT WHITE CURLY BRACKET -2985;Na # Ps LEFT WHITE PARENTHESIS -2986;Na # Pe RIGHT WHITE PARENTHESIS -2987;N # Ps Z NOTATION LEFT IMAGE BRACKET -2988;N # Pe Z NOTATION RIGHT IMAGE BRACKET -2989;N # Ps Z NOTATION LEFT BINDING BRACKET -298A;N # Pe Z NOTATION RIGHT BINDING BRACKET -298B;N # Ps LEFT SQUARE BRACKET WITH UNDERBAR -298C;N # Pe RIGHT SQUARE BRACKET WITH UNDERBAR -298D;N # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER -298E;N # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER -298F;N # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER -2990;N # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER -2991;N # Ps LEFT ANGLE BRACKET WITH DOT -2992;N # Pe RIGHT ANGLE BRACKET WITH DOT -2993;N # Ps LEFT ARC LESS-THAN BRACKET -2994;N # Pe RIGHT ARC GREATER-THAN BRACKET -2995;N # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET -2996;N # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET -2997;N # Ps LEFT BLACK TORTOISE SHELL BRACKET -2998;N # Pe RIGHT BLACK TORTOISE SHELL BRACKET -2999..29D7;N # Sm [63] DOTTED FENCE..BLACK HOURGLASS -29D8;N # Ps LEFT WIGGLY FENCE -29D9;N # Pe RIGHT WIGGLY FENCE -29DA;N # Ps LEFT DOUBLE WIGGLY FENCE -29DB;N # Pe RIGHT DOUBLE WIGGLY FENCE -29DC..29FB;N # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS -29FC;N # Ps LEFT-POINTING CURVED ANGLE BRACKET -29FD;N # Pe RIGHT-POINTING CURVED ANGLE BRACKET -29FE..29FF;N # Sm [2] TINY..MINY -2A00..2AFF;N # Sm [256] N-ARY CIRCLED DOT OPERATOR..N-ARY WHITE VERTICAL BAR -2B00..2B1A;N # So [27] NORTH EAST WHITE ARROW..DOTTED SQUARE -2B1B..2B1C;W # So [2] BLACK LARGE SQUARE..WHITE LARGE SQUARE -2B1D..2B2F;N # So [19] BLACK VERY SMALL SQUARE..WHITE VERTICAL ELLIPSE -2B30..2B44;N # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET -2B45..2B46;N # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW -2B47..2B4C;N # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR -2B4D..2B4F;N # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW -2B50;W # So WHITE MEDIUM STAR -2B51..2B54;N # So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON -2B55;W # So HEAVY LARGE CIRCLE -2B56..2B59;A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE -2B5A..2B73;N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR -2B76..2B95;N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW -2B97..2BFF;N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL -2C00..2C5F;N # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI -2C60..2C7B;N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E -2C7C..2C7D;N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V -2C7E..2C7F;N # Lu [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL -2C80..2CE4;N # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI -2CE5..2CEA;N # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA -2CEB..2CEE;N # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA -2CEF..2CF1;N # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS -2CF2..2CF3;N # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI -2CF9..2CFC;N # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER -2CFD;N # No COPTIC FRACTION ONE HALF -2CFE..2CFF;N # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER -2D00..2D25;N # Ll [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D27;N # Ll GEORGIAN SMALL LETTER YN -2D2D;N # Ll GEORGIAN SMALL LETTER AEN -2D30..2D67;N # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO -2D6F;N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK -2D70;N # Po TIFINAGH SEPARATOR MARK -2D7F;N # Mn TIFINAGH CONSONANT JOINER -2D80..2D96;N # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE -2DA0..2DA6;N # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO -2DA8..2DAE;N # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO -2DB0..2DB6;N # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO -2DB8..2DBE;N # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO -2DC0..2DC6;N # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO -2DC8..2DCE;N # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO -2DD0..2DD6;N # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO -2DD8..2DDE;N # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO -2DE0..2DFF;N # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -2E00..2E01;N # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER -2E02;N # Pi LEFT SUBSTITUTION BRACKET -2E03;N # Pf RIGHT SUBSTITUTION BRACKET -2E04;N # Pi LEFT DOTTED SUBSTITUTION BRACKET -2E05;N # Pf RIGHT DOTTED SUBSTITUTION BRACKET -2E06..2E08;N # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER -2E09;N # Pi LEFT TRANSPOSITION BRACKET -2E0A;N # Pf RIGHT TRANSPOSITION BRACKET -2E0B;N # Po RAISED SQUARE -2E0C;N # Pi LEFT RAISED OMISSION BRACKET -2E0D;N # Pf RIGHT RAISED OMISSION BRACKET -2E0E..2E16;N # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE -2E17;N # Pd DOUBLE OBLIQUE HYPHEN -2E18..2E19;N # Po [2] INVERTED INTERROBANG..PALM BRANCH -2E1A;N # Pd HYPHEN WITH DIAERESIS -2E1B;N # Po TILDE WITH RING ABOVE -2E1C;N # Pi LEFT LOW PARAPHRASE BRACKET -2E1D;N # Pf RIGHT LOW PARAPHRASE BRACKET -2E1E..2E1F;N # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW -2E20;N # Pi LEFT VERTICAL BAR WITH QUILL -2E21;N # Pf RIGHT VERTICAL BAR WITH QUILL -2E22;N # Ps TOP LEFT HALF BRACKET -2E23;N # Pe TOP RIGHT HALF BRACKET -2E24;N # Ps BOTTOM LEFT HALF BRACKET -2E25;N # Pe BOTTOM RIGHT HALF BRACKET -2E26;N # Ps LEFT SIDEWAYS U BRACKET -2E27;N # Pe RIGHT SIDEWAYS U BRACKET -2E28;N # Ps LEFT DOUBLE PARENTHESIS -2E29;N # Pe RIGHT DOUBLE PARENTHESIS -2E2A..2E2E;N # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK -2E2F;N # Lm VERTICAL TILDE -2E30..2E39;N # Po [10] RING POINT..TOP HALF SECTION SIGN -2E3A..2E3B;N # Pd [2] TWO-EM DASH..THREE-EM DASH -2E3C..2E3F;N # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM -2E40;N # Pd DOUBLE HYPHEN -2E41;N # Po REVERSED COMMA -2E42;N # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK -2E43..2E4F;N # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER -2E50..2E51;N # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR -2E52..2E54;N # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK -2E55;N # Ps LEFT SQUARE BRACKET WITH STROKE -2E56;N # Pe RIGHT SQUARE BRACKET WITH STROKE -2E57;N # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE -2E58;N # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE -2E59;N # Ps TOP HALF LEFT PARENTHESIS -2E5A;N # Pe TOP HALF RIGHT PARENTHESIS -2E5B;N # Ps BOTTOM HALF LEFT PARENTHESIS -2E5C;N # Pe BOTTOM HALF RIGHT PARENTHESIS -2E5D;N # Pd OBLIQUE HYPHEN -2E80..2E99;W # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP -2E9B..2EF3;W # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE -2F00..2FD5;W # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE -2FF0..2FFB;W # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID -3000;F # Zs IDEOGRAPHIC SPACE -3001..3003;W # Po [3] IDEOGRAPHIC COMMA..DITTO MARK -3004;W # So JAPANESE INDUSTRIAL STANDARD SYMBOL -3005;W # Lm IDEOGRAPHIC ITERATION MARK -3006;W # Lo IDEOGRAPHIC CLOSING MARK -3007;W # Nl IDEOGRAPHIC NUMBER ZERO -3008;W # Ps LEFT ANGLE BRACKET -3009;W # Pe RIGHT ANGLE BRACKET -300A;W # Ps LEFT DOUBLE ANGLE BRACKET -300B;W # Pe RIGHT DOUBLE ANGLE BRACKET -300C;W # Ps LEFT CORNER BRACKET -300D;W # Pe RIGHT CORNER BRACKET -300E;W # Ps LEFT WHITE CORNER BRACKET -300F;W # Pe RIGHT WHITE CORNER BRACKET -3010;W # Ps LEFT BLACK LENTICULAR BRACKET -3011;W # Pe RIGHT BLACK LENTICULAR BRACKET -3012..3013;W # So [2] POSTAL MARK..GETA MARK -3014;W # Ps LEFT TORTOISE SHELL BRACKET -3015;W # Pe RIGHT TORTOISE SHELL BRACKET -3016;W # Ps LEFT WHITE LENTICULAR BRACKET -3017;W # Pe RIGHT WHITE LENTICULAR BRACKET -3018;W # Ps LEFT WHITE TORTOISE SHELL BRACKET -3019;W # Pe RIGHT WHITE TORTOISE SHELL BRACKET -301A;W # Ps LEFT WHITE SQUARE BRACKET -301B;W # Pe RIGHT WHITE SQUARE BRACKET -301C;W # Pd WAVE DASH -301D;W # Ps REVERSED DOUBLE PRIME QUOTATION MARK -301E..301F;W # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK -3020;W # So POSTAL MARK FACE -3021..3029;W # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302D;W # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK -302E..302F;W # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK -3030;W # Pd WAVY DASH -3031..3035;W # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF -3036..3037;W # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL -3038..303A;W # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY -303B;W # Lm VERTICAL IDEOGRAPHIC ITERATION MARK -303C;W # Lo MASU MARK -303D;W # Po PART ALTERNATION MARK -303E;W # So IDEOGRAPHIC VARIATION INDICATOR -303F;N # So IDEOGRAPHIC HALF FILL SPACE -3041..3096;W # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE -3099..309A;W # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -309B..309C;W # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -309D..309E;W # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK -309F;W # Lo HIRAGANA DIGRAPH YORI -30A0;W # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN -30A1..30FA;W # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO -30FB;W # Po KATAKANA MIDDLE DOT -30FC..30FE;W # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK -30FF;W # Lo KATAKANA DIGRAPH KOTO -3105..312F;W # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN -3131..318E;W # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE -3190..3191;W # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK -3192..3195;W # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK -3196..319F;W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK -31A0..31BF;W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH -31C0..31E3;W # So [36] CJK STROKE T..CJK STROKE Q -31F0..31FF;W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO -3200..321E;W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU -3220..3229;W # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..3247;W # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO -3248..324F;A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE -3250;W # So PARTNERSHIP SIGN -3251..325F;W # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE -3260..327F;W # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL -3280..3289;W # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN -328A..32B0;W # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT -32B1..32BF;W # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY -32C0..32FF;W # So [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA -3300..33FF;W # So [256] SQUARE APAATO..SQUARE GAL -3400..4DBF;W # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF -4DC0..4DFF;N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION -4E00..9FFF;W # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF -A000..A014;W # Lo [21] YI SYLLABLE IT..YI SYLLABLE E -A015;W # Lm YI SYLLABLE WU -A016..A48C;W # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR -A490..A4C6;W # So [55] YI RADICAL QOT..YI RADICAL KE -A4D0..A4F7;N # Lo [40] LISU LETTER BA..LISU LETTER OE -A4F8..A4FD;N # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU -A4FE..A4FF;N # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP -A500..A60B;N # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG -A60C;N # Lm VAI SYLLABLE LENGTHENER -A60D..A60F;N # Po [3] VAI COMMA..VAI QUESTION MARK -A610..A61F;N # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG -A620..A629;N # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE -A62A..A62B;N # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO -A640..A66D;N # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O -A66E;N # Lo CYRILLIC LETTER MULTIOCULAR O -A66F;N # Mn COMBINING CYRILLIC VZMET -A670..A672;N # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A673;N # Po SLAVONIC ASTERISK -A674..A67D;N # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK -A67E;N # Po CYRILLIC KAVYKA -A67F;N # Lm CYRILLIC PAYEROK -A680..A69B;N # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O -A69C..A69D;N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN -A69E..A69F;N # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E -A6A0..A6E5;N # Lo [70] BAMUM LETTER A..BAMUM LETTER KI -A6E6..A6EF;N # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM -A6F0..A6F1;N # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS -A6F2..A6F7;N # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK -A700..A716;N # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR -A717..A71F;N # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK -A720..A721;N # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE -A722..A76F;N # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON -A770;N # Lm MODIFIER LETTER US -A771..A787;N # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T -A788;N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT -A789..A78A;N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN -A78B..A78E;N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A78F;N # Lo LATIN LETTER SINOLOGICAL DOT -A790..A7CA;N # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY -A7D0..A7D1;N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G -A7D3;N # Ll LATIN SMALL LETTER DOUBLE THORN -A7D5..A7D9;N # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S -A7F2..A7F4;N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q -A7F5..A7F6;N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H -A7F7;N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I -A7F8..A7F9;N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE -A7FA;N # Ll LATIN LETTER SMALL CAPITAL TURNED M -A7FB..A7FF;N # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M -A800..A801;N # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I -A802;N # Mn SYLOTI NAGRI SIGN DVISVARA -A803..A805;N # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O -A806;N # Mn SYLOTI NAGRI SIGN HASANTA -A807..A80A;N # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO -A80B;N # Mn SYLOTI NAGRI SIGN ANUSVARA -A80C..A822;N # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO -A823..A824;N # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I -A825..A826;N # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E -A827;N # Mc SYLOTI NAGRI VOWEL SIGN OO -A828..A82B;N # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 -A82C;N # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA -A830..A835;N # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS -A836..A837;N # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK -A838;N # Sc NORTH INDIC RUPEE MARK -A839;N # So NORTH INDIC QUANTITY MARK -A840..A873;N # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU -A874..A877;N # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD -A880..A881;N # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA -A882..A8B3;N # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA -A8B4..A8C3;N # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU -A8C4..A8C5;N # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU -A8CE..A8CF;N # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA -A8D0..A8D9;N # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE -A8E0..A8F1;N # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA -A8F2..A8F7;N # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA -A8F8..A8FA;N # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET -A8FB;N # Lo DEVANAGARI HEADSTROKE -A8FC;N # Po DEVANAGARI SIGN SIDDHAM -A8FD..A8FE;N # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY -A8FF;N # Mn DEVANAGARI VOWEL SIGN AY -A900..A909;N # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE -A90A..A925;N # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO -A926..A92D;N # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU -A92E..A92F;N # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA -A930..A946;N # Lo [23] REJANG LETTER KA..REJANG LETTER A -A947..A951;N # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R -A952..A953;N # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA -A95F;N # Po REJANG SECTION MARK -A960..A97C;W # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH -A980..A982;N # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR -A983;N # Mc JAVANESE SIGN WIGNYAN -A984..A9B2;N # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA -A9B3;N # Mn JAVANESE SIGN CECAK TELU -A9B4..A9B5;N # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG -A9B6..A9B9;N # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT -A9BA..A9BB;N # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE -A9BC..A9BD;N # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET -A9BE..A9C0;N # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON -A9C1..A9CD;N # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH -A9CF;N # Lm JAVANESE PANGRANGKEP -A9D0..A9D9;N # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE -A9DE..A9DF;N # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN -A9E0..A9E4;N # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA -A9E5;N # Mn MYANMAR SIGN SHAN SAW -A9E6;N # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION -A9E7..A9EF;N # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA -A9F0..A9F9;N # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE -A9FA..A9FE;N # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA -AA00..AA28;N # Lo [41] CHAM LETTER A..CHAM LETTER HA -AA29..AA2E;N # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE -AA2F..AA30;N # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI -AA31..AA32;N # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE -AA33..AA34;N # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA -AA35..AA36;N # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA -AA40..AA42;N # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG -AA43;N # Mn CHAM CONSONANT SIGN FINAL NG -AA44..AA4B;N # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS -AA4C;N # Mn CHAM CONSONANT SIGN FINAL M -AA4D;N # Mc CHAM CONSONANT SIGN FINAL H -AA50..AA59;N # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE -AA5C..AA5F;N # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA -AA60..AA6F;N # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA -AA70;N # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION -AA71..AA76;N # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM -AA77..AA79;N # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO -AA7A;N # Lo MYANMAR LETTER AITON RA -AA7B;N # Mc MYANMAR SIGN PAO KAREN TONE -AA7C;N # Mn MYANMAR SIGN TAI LAING TONE-2 -AA7D;N # Mc MYANMAR SIGN TAI LAING TONE-5 -AA7E..AA7F;N # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA -AA80..AAAF;N # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O -AAB0;N # Mn TAI VIET MAI KANG -AAB1;N # Lo TAI VIET VOWEL AA -AAB2..AAB4;N # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U -AAB5..AAB6;N # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O -AAB7..AAB8;N # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA -AAB9..AABD;N # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN -AABE..AABF;N # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK -AAC0;N # Lo TAI VIET TONE MAI NUENG -AAC1;N # Mn TAI VIET TONE MAI THO -AAC2;N # Lo TAI VIET TONE MAI SONG -AADB..AADC;N # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG -AADD;N # Lm TAI VIET SYMBOL SAM -AADE..AADF;N # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI -AAE0..AAEA;N # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA -AAEB;N # Mc MEETEI MAYEK VOWEL SIGN II -AAEC..AAED;N # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI -AAEE..AAEF;N # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU -AAF0..AAF1;N # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM -AAF2;N # Lo MEETEI MAYEK ANJI -AAF3..AAF4;N # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK -AAF5;N # Mc MEETEI MAYEK VOWEL SIGN VISARGA -AAF6;N # Mn MEETEI MAYEK VIRAMA -AB01..AB06;N # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO -AB09..AB0E;N # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO -AB11..AB16;N # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO -AB20..AB26;N # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO -AB28..AB2E;N # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO -AB30..AB5A;N # Ll [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG -AB5B;N # Sk MODIFIER BREVE WITH INVERTED BREVE -AB5C..AB5F;N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK -AB60..AB68;N # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE -AB69;N # Lm MODIFIER LETTER SMALL TURNED W -AB6A..AB6B;N # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK -AB70..ABBF;N # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA -ABC0..ABE2;N # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM -ABE3..ABE4;N # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP -ABE5;N # Mn MEETEI MAYEK VOWEL SIGN ANAP -ABE6..ABE7;N # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP -ABE8;N # Mn MEETEI MAYEK VOWEL SIGN UNAP -ABE9..ABEA;N # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG -ABEB;N # Po MEETEI MAYEK CHEIKHEI -ABEC;N # Mc MEETEI MAYEK LUM IYEK -ABED;N # Mn MEETEI MAYEK APUN IYEK -ABF0..ABF9;N # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE -AC00..D7A3;W # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH -D7B0..D7C6;N # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E -D7CB..D7FB;N # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -D800..DB7F;N # Cs [896] .. -DB80..DBFF;N # Cs [128] .. -DC00..DFFF;N # Cs [1024] .. -E000..F8FF;A # Co [6400] .. -F900..FA6D;W # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D -FA6E..FA6F;W # Cn [2] .. -FA70..FAD9;W # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 -FADA..FAFF;W # Cn [38] .. -FB00..FB06;N # Ll [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST -FB13..FB17;N # Ll [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH -FB1D;N # Lo HEBREW LETTER YOD WITH HIRIQ -FB1E;N # Mn HEBREW POINT JUDEO-SPANISH VARIKA -FB1F..FB28;N # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV -FB29;N # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN -FB2A..FB36;N # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH -FB38..FB3C;N # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH -FB3E;N # Lo HEBREW LETTER MEM WITH DAGESH -FB40..FB41;N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH -FB43..FB44;N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH -FB46..FB4F;N # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED -FB50..FBB1;N # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM -FBB2..FBC2;N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE -FBD3..FD3D;N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM -FD3E;N # Pe ORNATE LEFT PARENTHESIS -FD3F;N # Ps ORNATE RIGHT PARENTHESIS -FD40..FD4F;N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH -FD50..FD8F;N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM -FD92..FDC7;N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM -FDCF;N # So ARABIC LIGATURE SALAAMUHU ALAYNAA -FDF0..FDFB;N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU -FDFC;N # Sc RIAL SIGN -FDFD..FDFF;N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL -FE00..FE0F;A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FE10..FE16;W # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK -FE17;W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET -FE18;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET -FE19;W # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS -FE20..FE2F;N # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF -FE30;W # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER -FE31..FE32;W # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH -FE33..FE34;W # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE -FE35;W # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS -FE36;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS -FE37;W # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET -FE38;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET -FE39;W # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET -FE3A;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET -FE3B;W # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET -FE3C;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET -FE3D;W # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET -FE3E;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET -FE3F;W # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET -FE40;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET -FE41;W # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET -FE42;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET -FE43;W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET -FE44;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET -FE45..FE46;W # Po [2] SESAME DOT..WHITE SESAME DOT -FE47;W # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET -FE48;W # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET -FE49..FE4C;W # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE -FE4D..FE4F;W # Pc [3] DASHED LOW LINE..WAVY LOW LINE -FE50..FE52;W # Po [3] SMALL COMMA..SMALL FULL STOP -FE54..FE57;W # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK -FE58;W # Pd SMALL EM DASH -FE59;W # Ps SMALL LEFT PARENTHESIS -FE5A;W # Pe SMALL RIGHT PARENTHESIS -FE5B;W # Ps SMALL LEFT CURLY BRACKET -FE5C;W # Pe SMALL RIGHT CURLY BRACKET -FE5D;W # Ps SMALL LEFT TORTOISE SHELL BRACKET -FE5E;W # Pe SMALL RIGHT TORTOISE SHELL BRACKET -FE5F..FE61;W # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK -FE62;W # Sm SMALL PLUS SIGN -FE63;W # Pd SMALL HYPHEN-MINUS -FE64..FE66;W # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN -FE68;W # Po SMALL REVERSE SOLIDUS -FE69;W # Sc SMALL DOLLAR SIGN -FE6A..FE6B;W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT -FE70..FE74;N # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM -FE76..FEFC;N # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM -FEFF;N # Cf ZERO WIDTH NO-BREAK SPACE -FF01..FF03;F # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN -FF04;F # Sc FULLWIDTH DOLLAR SIGN -FF05..FF07;F # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE -FF08;F # Ps FULLWIDTH LEFT PARENTHESIS -FF09;F # Pe FULLWIDTH RIGHT PARENTHESIS -FF0A;F # Po FULLWIDTH ASTERISK -FF0B;F # Sm FULLWIDTH PLUS SIGN -FF0C;F # Po FULLWIDTH COMMA -FF0D;F # Pd FULLWIDTH HYPHEN-MINUS -FF0E..FF0F;F # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS -FF10..FF19;F # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE -FF1A..FF1B;F # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON -FF1C..FF1E;F # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN -FF1F..FF20;F # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT -FF21..FF3A;F # Lu [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z -FF3B;F # Ps FULLWIDTH LEFT SQUARE BRACKET -FF3C;F # Po FULLWIDTH REVERSE SOLIDUS -FF3D;F # Pe FULLWIDTH RIGHT SQUARE BRACKET -FF3E;F # Sk FULLWIDTH CIRCUMFLEX ACCENT -FF3F;F # Pc FULLWIDTH LOW LINE -FF40;F # Sk FULLWIDTH GRAVE ACCENT -FF41..FF5A;F # Ll [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z -FF5B;F # Ps FULLWIDTH LEFT CURLY BRACKET -FF5C;F # Sm FULLWIDTH VERTICAL LINE -FF5D;F # Pe FULLWIDTH RIGHT CURLY BRACKET -FF5E;F # Sm FULLWIDTH TILDE -FF5F;F # Ps FULLWIDTH LEFT WHITE PARENTHESIS -FF60;F # Pe FULLWIDTH RIGHT WHITE PARENTHESIS -FF61;H # Po HALFWIDTH IDEOGRAPHIC FULL STOP -FF62;H # Ps HALFWIDTH LEFT CORNER BRACKET -FF63;H # Pe HALFWIDTH RIGHT CORNER BRACKET -FF64..FF65;H # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT -FF66..FF6F;H # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU -FF70;H # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -FF71..FF9D;H # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N -FF9E..FF9F;H # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -FFA0..FFBE;H # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH -FFC2..FFC7;H # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E -FFCA..FFCF;H # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE -FFD2..FFD7;H # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU -FFDA..FFDC;H # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I -FFE0..FFE1;F # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN -FFE2;F # Sm FULLWIDTH NOT SIGN -FFE3;F # Sk FULLWIDTH MACRON -FFE4;F # So FULLWIDTH BROKEN BAR -FFE5..FFE6;F # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -FFE8;H # So HALFWIDTH FORMS LIGHT VERTICAL -FFE9..FFEC;H # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW -FFED..FFEE;H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE -FFF9..FFFB;N # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR -FFFC;N # So OBJECT REPLACEMENT CHARACTER -FFFD;A # So REPLACEMENT CHARACTER -10000..1000B;N # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE -1000D..10026;N # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO -10028..1003A;N # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO -1003C..1003D;N # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE -1003F..1004D;N # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO -10050..1005D;N # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 -10080..100FA;N # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 -10100..10102;N # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK -10107..10133;N # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND -10137..1013F;N # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT -10140..10174;N # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS -10175..10178;N # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN -10179..10189;N # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN -1018A..1018B;N # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN -1018C..1018E;N # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN -10190..1019C;N # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL -101A0;N # So GREEK SYMBOL TAU RHO -101D0..101FC;N # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND -101FD;N # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE -10280..1029C;N # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X -102A0..102D0;N # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 -102E0;N # Mn COPTIC EPACT THOUSANDS MARK -102E1..102FB;N # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED -10300..1031F;N # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS -10320..10323;N # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY -1032D..1032F;N # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE -10330..10340;N # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA -10341;N # Nl GOTHIC LETTER NINETY -10342..10349;N # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL -1034A;N # Nl GOTHIC LETTER NINE HUNDRED -10350..10375;N # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA -10376..1037A;N # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII -10380..1039D;N # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU -1039F;N # Po UGARITIC WORD DIVIDER -103A0..103C3;N # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA -103C8..103CF;N # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH -103D0;N # Po OLD PERSIAN WORD DIVIDER -103D1..103D5;N # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED -10400..1044F;N # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW -10450..1047F;N # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW -10480..1049D;N # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO -104A0..104A9;N # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE -104B0..104D3;N # Lu [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA -104D8..104FB;N # Ll [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA -10500..10527;N # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE -10530..10563;N # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW -1056F;N # Po CAUCASIAN ALBANIAN CITATION MARK -10570..1057A;N # Lu [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA -1057C..1058A;N # Lu [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE -1058C..10592;N # Lu [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE -10594..10595;N # Lu [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE -10597..105A1;N # Ll [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA -105A3..105B1;N # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE -105B3..105B9;N # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE -105BB..105BC;N # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE -10600..10736;N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 -10740..10755;N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE -10760..10767;N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 -10780..10785;N # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK -10787..107B0;N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK -107B2..107BA;N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL -10800..10805;N # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA -10808;N # Lo CYPRIOT SYLLABLE JO -1080A..10835;N # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO -10837..10838;N # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE -1083C;N # Lo CYPRIOT SYLLABLE ZA -1083F;N # Lo CYPRIOT SYLLABLE ZO -10840..10855;N # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW -10857;N # Po IMPERIAL ARAMAIC SECTION SIGN -10858..1085F;N # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND -10860..10876;N # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW -10877..10878;N # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON -10879..1087F;N # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY -10880..1089E;N # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW -108A7..108AF;N # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED -108E0..108F2;N # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH -108F4..108F5;N # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW -108FB..108FF;N # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED -10900..10915;N # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU -10916..1091B;N # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE -1091F;N # Po PHOENICIAN WORD SEPARATOR -10920..10939;N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C -1093F;N # Po LYDIAN TRIANGULAR MARK -10980..1099F;N # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 -109A0..109B7;N # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA -109BC..109BD;N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF -109BE..109BF;N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN -109C0..109CF;N # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY -109D2..109FF;N # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS -10A00;N # Lo KHAROSHTHI LETTER A -10A01..10A03;N # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R -10A05..10A06;N # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O -10A0C..10A0F;N # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA -10A10..10A13;N # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA -10A15..10A17;N # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA -10A19..10A35;N # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA -10A38..10A3A;N # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW -10A3F;N # Mn KHAROSHTHI VIRAMA -10A40..10A48;N # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF -10A50..10A58;N # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES -10A60..10A7C;N # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH -10A7D..10A7E;N # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY -10A7F;N # Po OLD SOUTH ARABIAN NUMERIC INDICATOR -10A80..10A9C;N # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH -10A9D..10A9F;N # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY -10AC0..10AC7;N # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW -10AC8;N # So MANICHAEAN SIGN UD -10AC9..10AE4;N # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW -10AE5..10AE6;N # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW -10AEB..10AEF;N # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED -10AF0..10AF6;N # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER -10B00..10B35;N # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE -10B39..10B3F;N # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION -10B40..10B55;N # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW -10B58..10B5F;N # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND -10B60..10B72;N # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW -10B78..10B7F;N # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND -10B80..10B91;N # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW -10B99..10B9C;N # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT -10BA9..10BAF;N # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED -10C00..10C48;N # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH -10C80..10CB2;N # Lu [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US -10CC0..10CF2;N # Ll [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US -10CFA..10CFF;N # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND -10D00..10D23;N # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA -10D24..10D27;N # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI -10D30..10D39;N # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE -10E60..10E7E;N # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS -10E80..10EA9;N # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET -10EAB..10EAC;N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EAD;N # Pd YEZIDI HYPHENATION MARK -10EB0..10EB1;N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF;N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA -10F00..10F1C;N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL -10F1D..10F26;N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF -10F27;N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH -10F30..10F45;N # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN -10F46..10F50;N # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW -10F51..10F54;N # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED -10F55..10F59;N # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT -10F70..10F81;N # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH -10F82..10F85;N # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW -10F86..10F89;N # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS -10FB0..10FC4;N # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW -10FC5..10FCB;N # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED -10FE0..10FF6;N # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH -11000;N # Mc BRAHMI SIGN CANDRABINDU -11001;N # Mn BRAHMI SIGN ANUSVARA -11002;N # Mc BRAHMI SIGN VISARGA -11003..11037;N # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA -11038..11046;N # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA -11047..1104D;N # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS -11052..11065;N # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND -11066..1106F;N # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE -11070;N # Mn BRAHMI SIGN OLD TAMIL VIRAMA -11071..11072;N # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O -11073..11074;N # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O -11075;N # Lo BRAHMI LETTER OLD TAMIL LLA -1107F;N # Mn BRAHMI NUMBER JOINER -11080..11081;N # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA -11082;N # Mc KAITHI SIGN VISARGA -11083..110AF;N # Lo [45] KAITHI LETTER A..KAITHI LETTER HA -110B0..110B2;N # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II -110B3..110B6;N # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI -110B7..110B8;N # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU -110B9..110BA;N # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA -110BB..110BC;N # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN -110BD;N # Cf KAITHI NUMBER SIGN -110BE..110C1;N # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA -110C2;N # Mn KAITHI VOWEL SIGN VOCALIC R -110CD;N # Cf KAITHI NUMBER SIGN ABOVE -110D0..110E8;N # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE -110F0..110F9;N # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE -11100..11102;N # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA -11103..11126;N # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA -11127..1112B;N # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU -1112C;N # Mc CHAKMA VOWEL SIGN E -1112D..11134;N # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA -11136..1113F;N # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE -11140..11143;N # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK -11144;N # Lo CHAKMA LETTER LHAA -11145..11146;N # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI -11147;N # Lo CHAKMA LETTER VAA -11150..11172;N # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA -11173;N # Mn MAHAJANI SIGN NUKTA -11174..11175;N # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK -11176;N # Lo MAHAJANI LIGATURE SHRI -11180..11181;N # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA -11182;N # Mc SHARADA SIGN VISARGA -11183..111B2;N # Lo [48] SHARADA LETTER A..SHARADA LETTER HA -111B3..111B5;N # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II -111B6..111BE;N # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O -111BF..111C0;N # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA -111C1..111C4;N # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM -111C5..111C8;N # Po [4] SHARADA DANDA..SHARADA SEPARATOR -111C9..111CC;N # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK -111CD;N # Po SHARADA SUTRA MARK -111CE;N # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E -111CF;N # Mn SHARADA SIGN INVERTED CANDRABINDU -111D0..111D9;N # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE -111DA;N # Lo SHARADA EKAM -111DB;N # Po SHARADA SIGN SIDDHAM -111DC;N # Lo SHARADA HEADSTROKE -111DD..111DF;N # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 -111E1..111F4;N # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND -11200..11211;N # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA -11213..1122B;N # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA -1122C..1122E;N # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II -1122F..11231;N # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI -11232..11233;N # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU -11234;N # Mn KHOJKI SIGN ANUSVARA -11235;N # Mc KHOJKI SIGN VIRAMA -11236..11237;N # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA -11238..1123D;N # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN -1123E;N # Mn KHOJKI SIGN SUKUN -1123F..11240;N # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I -11241;N # Mn KHOJKI VOWEL SIGN VOCALIC R -11280..11286;N # Lo [7] MULTANI LETTER A..MULTANI LETTER GA -11288;N # Lo MULTANI LETTER GHA -1128A..1128D;N # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA -1128F..1129D;N # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA -1129F..112A8;N # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA -112A9;N # Po MULTANI SECTION MARK -112B0..112DE;N # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA -112DF;N # Mn KHUDAWADI SIGN ANUSVARA -112E0..112E2;N # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II -112E3..112EA;N # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA -112F0..112F9;N # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE -11300..11301;N # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU -11302..11303;N # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA -11305..1130C;N # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L -1130F..11310;N # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI -11313..11328;N # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA -1132A..11330;N # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA -11332..11333;N # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA -11335..11339;N # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA -1133B..1133C;N # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA -1133D;N # Lo GRANTHA SIGN AVAGRAHA -1133E..1133F;N # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I -11340;N # Mn GRANTHA VOWEL SIGN II -11341..11344;N # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR -11347..11348;N # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI -1134B..1134D;N # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA -11350;N # Lo GRANTHA OM -11357;N # Mc GRANTHA AU LENGTH MARK -1135D..11361;N # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL -11362..11363;N # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL -11366..1136C;N # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX -11370..11374;N # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA -11400..11434;N # Lo [53] NEWA LETTER A..NEWA LETTER HA -11435..11437;N # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II -11438..1143F;N # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI -11440..11441;N # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU -11442..11444;N # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA -11445;N # Mc NEWA SIGN VISARGA -11446;N # Mn NEWA SIGN NUKTA -11447..1144A;N # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI -1144B..1144F;N # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN -11450..11459;N # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE -1145A..1145B;N # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK -1145D;N # Po NEWA INSERTION SIGN -1145E;N # Mn NEWA SANDHI MARK -1145F..11461;N # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA -11480..114AF;N # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA -114B0..114B2;N # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II -114B3..114B8;N # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL -114B9;N # Mc TIRHUTA VOWEL SIGN E -114BA;N # Mn TIRHUTA VOWEL SIGN SHORT E -114BB..114BE;N # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU -114BF..114C0;N # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA -114C1;N # Mc TIRHUTA SIGN VISARGA -114C2..114C3;N # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA -114C4..114C5;N # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG -114C6;N # Po TIRHUTA ABBREVIATION SIGN -114C7;N # Lo TIRHUTA OM -114D0..114D9;N # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE -11580..115AE;N # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA -115AF..115B1;N # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II -115B2..115B5;N # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR -115B8..115BB;N # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU -115BC..115BD;N # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA -115BE;N # Mc SIDDHAM SIGN VISARGA -115BF..115C0;N # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA -115C1..115D7;N # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES -115D8..115DB;N # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U -115DC..115DD;N # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU -11600..1162F;N # Lo [48] MODI LETTER A..MODI LETTER LLA -11630..11632;N # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II -11633..1163A;N # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI -1163B..1163C;N # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU -1163D;N # Mn MODI SIGN ANUSVARA -1163E;N # Mc MODI SIGN VISARGA -1163F..11640;N # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA -11641..11643;N # Po [3] MODI DANDA..MODI ABBREVIATION SIGN -11644;N # Lo MODI SIGN HUVA -11650..11659;N # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE -11660..1166C;N # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT -11680..116AA;N # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA -116AB;N # Mn TAKRI SIGN ANUSVARA -116AC;N # Mc TAKRI SIGN VISARGA -116AD;N # Mn TAKRI VOWEL SIGN AA -116AE..116AF;N # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II -116B0..116B5;N # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU -116B6;N # Mc TAKRI SIGN VIRAMA -116B7;N # Mn TAKRI SIGN NUKTA -116B8;N # Lo TAKRI LETTER ARCHAIC KHA -116B9;N # Po TAKRI ABBREVIATION SIGN -116C0..116C9;N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE -11700..1171A;N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA -1171D..1171F;N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA -11720..11721;N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA -11722..11725;N # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU -11726;N # Mc AHOM VOWEL SIGN E -11727..1172B;N # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER -11730..11739;N # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE -1173A..1173B;N # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY -1173C..1173E;N # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI -1173F;N # So AHOM SYMBOL VI -11740..11746;N # Lo [7] AHOM LETTER CA..AHOM LETTER LLA -11800..1182B;N # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA -1182C..1182E;N # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II -1182F..11837;N # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA -11838;N # Mc DOGRA SIGN VISARGA -11839..1183A;N # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA -1183B;N # Po DOGRA ABBREVIATION SIGN -118A0..118DF;N # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO -118E0..118E9;N # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE -118EA..118F2;N # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY -118FF;N # Lo WARANG CITI OM -11900..11906;N # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E -11909;N # Lo DIVES AKURU LETTER O -1190C..11913;N # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA -11915..11916;N # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA -11918..1192F;N # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA -11930..11935;N # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E -11937..11938;N # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O -1193B..1193C;N # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU -1193D;N # Mc DIVES AKURU SIGN HALANTA -1193E;N # Mn DIVES AKURU VIRAMA -1193F;N # Lo DIVES AKURU PREFIXED NASAL SIGN -11940;N # Mc DIVES AKURU MEDIAL YA -11941;N # Lo DIVES AKURU INITIAL RA -11942;N # Mc DIVES AKURU MEDIAL RA -11943;N # Mn DIVES AKURU SIGN NUKTA -11944..11946;N # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK -11950..11959;N # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE -119A0..119A7;N # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR -119AA..119D0;N # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA -119D1..119D3;N # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II -119D4..119D7;N # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR -119DA..119DB;N # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI -119DC..119DF;N # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA -119E0;N # Mn NANDINAGARI SIGN VIRAMA -119E1;N # Lo NANDINAGARI SIGN AVAGRAHA -119E2;N # Po NANDINAGARI SIGN SIDDHAM -119E3;N # Lo NANDINAGARI HEADSTROKE -119E4;N # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E -11A00;N # Lo ZANABAZAR SQUARE LETTER A -11A01..11A0A;N # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK -11A0B..11A32;N # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA -11A33..11A38;N # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA -11A39;N # Mc ZANABAZAR SQUARE SIGN VISARGA -11A3A;N # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA -11A3B..11A3E;N # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA -11A3F..11A46;N # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK -11A47;N # Mn ZANABAZAR SQUARE SUBJOINER -11A50;N # Lo SOYOMBO LETTER A -11A51..11A56;N # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE -11A57..11A58;N # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU -11A59..11A5B;N # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK -11A5C..11A89;N # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA -11A8A..11A96;N # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA -11A97;N # Mc SOYOMBO SIGN VISARGA -11A98..11A99;N # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER -11A9A..11A9C;N # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD -11A9D;N # Lo SOYOMBO MARK PLUTA -11A9E..11AA2;N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 -11AB0..11ABF;N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA -11AC0..11AF8;N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL -11B00..11B09;N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU -11C00..11C08;N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L -11C0A..11C2E;N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA -11C2F;N # Mc BHAIKSUKI VOWEL SIGN AA -11C30..11C36;N # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L -11C38..11C3D;N # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA -11C3E;N # Mc BHAIKSUKI SIGN VISARGA -11C3F;N # Mn BHAIKSUKI SIGN VIRAMA -11C40;N # Lo BHAIKSUKI SIGN AVAGRAHA -11C41..11C45;N # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 -11C50..11C59;N # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE -11C5A..11C6C;N # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK -11C70..11C71;N # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD -11C72..11C8F;N # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A -11C92..11CA7;N # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA -11CA9;N # Mc MARCHEN SUBJOINED LETTER YA -11CAA..11CB0;N # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA -11CB1;N # Mc MARCHEN VOWEL SIGN I -11CB2..11CB3;N # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E -11CB4;N # Mc MARCHEN VOWEL SIGN O -11CB5..11CB6;N # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU -11D00..11D06;N # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E -11D08..11D09;N # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O -11D0B..11D30;N # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA -11D31..11D36;N # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R -11D3A;N # Mn MASARAM GONDI VOWEL SIGN E -11D3C..11D3D;N # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O -11D3F..11D45;N # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA -11D46;N # Lo MASARAM GONDI REPHA -11D47;N # Mn MASARAM GONDI RA-KARA -11D50..11D59;N # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE -11D60..11D65;N # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU -11D67..11D68;N # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI -11D6A..11D89;N # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA -11D8A..11D8E;N # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU -11D90..11D91;N # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI -11D93..11D94;N # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU -11D95;N # Mn GUNJALA GONDI SIGN ANUSVARA -11D96;N # Mc GUNJALA GONDI SIGN VISARGA -11D97;N # Mn GUNJALA GONDI VIRAMA -11D98;N # Lo GUNJALA GONDI OM -11DA0..11DA9;N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE -11EE0..11EF2;N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA -11EF3..11EF4;N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U -11EF5..11EF6;N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O -11EF7..11EF8;N # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION -11F00..11F01;N # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA -11F02;N # Lo KAWI SIGN REPHA -11F03;N # Mc KAWI SIGN VISARGA -11F04..11F10;N # Lo [13] KAWI LETTER A..KAWI LETTER O -11F12..11F33;N # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA -11F34..11F35;N # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA -11F36..11F3A;N # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R -11F3E..11F3F;N # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI -11F40;N # Mn KAWI VOWEL SIGN EU -11F41;N # Mc KAWI SIGN KILLER -11F42;N # Mn KAWI CONJOINER -11F43..11F4F;N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL -11F50..11F59;N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE -11FB0;N # Lo LISU LETTER YHA -11FC0..11FD4;N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH -11FD5..11FDC;N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI -11FDD..11FE0;N # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN -11FE1..11FF1;N # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA -11FFF;N # Po TAMIL PUNCTUATION END OF TEXT -12000..12399;N # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U -12400..1246E;N # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM -12470..12474;N # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON -12480..12543;N # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU -12F90..12FF0;N # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 -12FF1..12FF2;N # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 -13000..1342F;N # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D -13430..1343F;N # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE -13440;N # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY -13441..13446;N # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN -13447..13455;N # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED -14400..14646;N # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 -16800..16A38;N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ -16A40..16A5E;N # Lo [31] MRO LETTER TA..MRO LETTER TEK -16A60..16A69;N # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE -16A6E..16A6F;N # Po [2] MRO DANDA..MRO DOUBLE DANDA -16A70..16ABE;N # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA -16AC0..16AC9;N # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE -16AD0..16AED;N # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I -16AF0..16AF4;N # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE -16AF5;N # Po BASSA VAH FULL STOP -16B00..16B2F;N # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU -16B30..16B36;N # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM -16B37..16B3B;N # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM -16B3C..16B3F;N # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB -16B40..16B43;N # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM -16B44;N # Po PAHAWH HMONG SIGN XAUS -16B45;N # So PAHAWH HMONG SIGN CIM TSOV ROG -16B50..16B59;N # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE -16B5B..16B61;N # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS -16B63..16B77;N # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS -16B7D..16B8F;N # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ -16E40..16E7F;N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y -16E80..16E96;N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM -16E97..16E9A;N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH -16F00..16F4A;N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE -16F4F;N # Mn MIAO SIGN CONSONANT MODIFIER BAR -16F50;N # Lo MIAO LETTER NASALIZATION -16F51..16F87;N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI -16F8F..16F92;N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW -16F93..16F9F;N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -16FE0..16FE1;W # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK -16FE2;W # Po OLD CHINESE HOOK MARK -16FE3;W # Lm OLD CHINESE ITERATION MARK -16FE4;W # Mn KHITAN SMALL SCRIPT FILLER -16FF0..16FF1;W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY -17000..187F7;W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 -18800..18AFF;W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 -18B00..18CD5;W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 -18D00..18D08;W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 -1AFF0..1AFF3;W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 -1AFF5..1AFFB;W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 -1AFFD..1AFFE;W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 -1B000..1B0FF;W # Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 -1B100..1B122;W # Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU -1B132;W # Lo HIRAGANA LETTER SMALL KO -1B150..1B152;W # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO -1B155;W # Lo KATAKANA LETTER SMALL KO -1B164..1B167;W # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N -1B170..1B2FB;W # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB -1BC00..1BC6A;N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M -1BC70..1BC7C;N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK -1BC80..1BC88;N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL -1BC90..1BC99;N # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW -1BC9C;N # So DUPLOYAN SIGN O WITH CROSS -1BC9D..1BC9E;N # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK -1BC9F;N # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP -1BCA0..1BCA3;N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP -1CF00..1CF2D;N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT -1CF30..1CF46;N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG -1CF50..1CFC3;N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK -1D000..1D0F5;N # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO -1D100..1D126;N # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 -1D129..1D164;N # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE -1D165..1D166;N # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM -1D167..1D169;N # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 -1D16A..1D16C;N # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 -1D16D..1D172;N # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -1D173..1D17A;N # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE -1D17B..1D182;N # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE -1D183..1D184;N # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN -1D185..1D18B;N # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE -1D18C..1D1A9;N # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH -1D1AA..1D1AD;N # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO -1D1AE..1D1EA;N # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON -1D200..1D241;N # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 -1D242..1D244;N # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME -1D245;N # So GREEK MUSICAL LEIMMA -1D2C0..1D2D3;N # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN -1D2E0..1D2F3;N # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN -1D300..1D356;N # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING -1D360..1D378;N # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE -1D400..1D454;N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G -1D456..1D49C;N # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A -1D49E..1D49F;N # Lu [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D -1D4A2;N # Lu MATHEMATICAL SCRIPT CAPITAL G -1D4A5..1D4A6;N # Lu [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K -1D4A9..1D4AC;N # Lu [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q -1D4AE..1D4B9;N # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D -1D4BB;N # Ll MATHEMATICAL SCRIPT SMALL F -1D4BD..1D4C3;N # Ll [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N -1D4C5..1D505;N # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B -1D507..1D50A;N # Lu [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G -1D50D..1D514;N # Lu [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q -1D516..1D51C;N # Lu [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y -1D51E..1D539;N # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B -1D53B..1D53E;N # Lu [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G -1D540..1D544;N # Lu [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M -1D546;N # Lu MATHEMATICAL DOUBLE-STRUCK CAPITAL O -1D54A..1D550;N # Lu [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y -1D552..1D6A5;N # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J -1D6A8..1D6C0;N # Lu [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA -1D6C1;N # Sm MATHEMATICAL BOLD NABLA -1D6C2..1D6DA;N # Ll [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA -1D6DB;N # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL -1D6DC..1D6FA;N # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA -1D6FB;N # Sm MATHEMATICAL ITALIC NABLA -1D6FC..1D714;N # Ll [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA -1D715;N # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL -1D716..1D734;N # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA -1D735;N # Sm MATHEMATICAL BOLD ITALIC NABLA -1D736..1D74E;N # Ll [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA -1D74F;N # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL -1D750..1D76E;N # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA -1D76F;N # Sm MATHEMATICAL SANS-SERIF BOLD NABLA -1D770..1D788;N # Ll [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA -1D789;N # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL -1D78A..1D7A8;N # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA -1D7A9;N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA -1D7AA..1D7C2;N # Ll [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA -1D7C3;N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL -1D7C4..1D7CB;N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA -1D7CE..1D7FF;N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -1D800..1D9FF;N # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD -1DA00..1DA36;N # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN -1DA37..1DA3A;N # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE -1DA3B..1DA6C;N # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT -1DA6D..1DA74;N # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING -1DA75;N # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS -1DA76..1DA83;N # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH -1DA84;N # Mn SIGNWRITING LOCATION HEAD NECK -1DA85..1DA86;N # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS -1DA87..1DA8B;N # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS -1DA9B..1DA9F;N # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 -1DAA1..1DAAF;N # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 -1DF00..1DF09;N # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK -1DF0A;N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK -1DF0B..1DF1E;N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A;N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1E000..1E006;N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE -1E008..1E018;N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU -1E01B..1E021;N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI -1E023..1E024;N # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS -1E026..1E02A;N # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA -1E030..1E06D;N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE -1E08F;N # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -1E100..1E12C;N # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W -1E130..1E136;N # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D -1E137..1E13D;N # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER -1E140..1E149;N # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE -1E14E;N # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ -1E14F;N # So NYIAKENG PUACHUE HMONG CIRCLED CA -1E290..1E2AD;N # Lo [30] TOTO LETTER PA..TOTO LETTER A -1E2AE;N # Mn TOTO SIGN RISING TONE -1E2C0..1E2EB;N # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH -1E2EC..1E2EF;N # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI -1E2F0..1E2F9;N # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE -1E2FF;N # Sc WANCHO NGUN SIGN -1E4D0..1E4EA;N # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL -1E4EB;N # Lm NAG MUNDARI SIGN OJOD -1E4EC..1E4EF;N # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH -1E4F0..1E4F9;N # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE -1E7E0..1E7E6;N # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO -1E7E8..1E7EB;N # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE -1E7ED..1E7EE;N # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE -1E7F0..1E7FE;N # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE -1E800..1E8C4;N # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON -1E8C7..1E8CF;N # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE -1E8D0..1E8D6;N # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -1E900..1E943;N # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -1E944..1E94A;N # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA -1E94B;N # Lm ADLAM NASALIZATION MARK -1E950..1E959;N # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE -1E95E..1E95F;N # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -1EC71..1ECAB;N # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE -1ECAC;N # So INDIC SIYAQ PLACEHOLDER -1ECAD..1ECAF;N # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS -1ECB0;N # Sc INDIC SIYAQ RUPEE MARK -1ECB1..1ECB4;N # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK -1ED01..1ED2D;N # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND -1ED2E;N # So OTTOMAN SIYAQ MARRATAN -1ED2F..1ED3D;N # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH -1EE00..1EE03;N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL -1EE05..1EE1F;N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF -1EE21..1EE22;N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM -1EE24;N # Lo ARABIC MATHEMATICAL INITIAL HEH -1EE27;N # Lo ARABIC MATHEMATICAL INITIAL HAH -1EE29..1EE32;N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF -1EE34..1EE37;N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH -1EE39;N # Lo ARABIC MATHEMATICAL INITIAL DAD -1EE3B;N # Lo ARABIC MATHEMATICAL INITIAL GHAIN -1EE42;N # Lo ARABIC MATHEMATICAL TAILED JEEM -1EE47;N # Lo ARABIC MATHEMATICAL TAILED HAH -1EE49;N # Lo ARABIC MATHEMATICAL TAILED YEH -1EE4B;N # Lo ARABIC MATHEMATICAL TAILED LAM -1EE4D..1EE4F;N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN -1EE51..1EE52;N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF -1EE54;N # Lo ARABIC MATHEMATICAL TAILED SHEEN -1EE57;N # Lo ARABIC MATHEMATICAL TAILED KHAH -1EE59;N # Lo ARABIC MATHEMATICAL TAILED DAD -1EE5B;N # Lo ARABIC MATHEMATICAL TAILED GHAIN -1EE5D;N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON -1EE5F;N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF -1EE61..1EE62;N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM -1EE64;N # Lo ARABIC MATHEMATICAL STRETCHED HEH -1EE67..1EE6A;N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF -1EE6C..1EE72;N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF -1EE74..1EE77;N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH -1EE79..1EE7C;N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH -1EE7E;N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH -1EE80..1EE89;N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH -1EE8B..1EE9B;N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN -1EEA1..1EEA3;N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL -1EEA5..1EEA9;N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH -1EEAB..1EEBB;N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -1EEF0..1EEF1;N # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -1F000..1F003;N # So [4] MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND -1F004;W # So MAHJONG TILE RED DRAGON -1F005..1F02B;N # So [39] MAHJONG TILE GREEN DRAGON..MAHJONG TILE BACK -1F030..1F093;N # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 -1F0A0..1F0AE;N # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES -1F0B1..1F0BF;N # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER -1F0C1..1F0CE;N # So [14] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD KING OF DIAMONDS -1F0CF;W # So PLAYING CARD BLACK JOKER -1F0D1..1F0F5;N # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 -1F100..1F10A;A # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA -1F10B..1F10C;N # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO -1F10D..1F10F;N # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH -1F110..1F12D;A # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD -1F12E..1F12F;N # So [2] CIRCLED WZ..COPYLEFT SYMBOL -1F130..1F169;A # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z -1F16A..1F16F;N # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE -1F170..1F18D;A # So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA -1F18E;W # So NEGATIVE SQUARED AB -1F18F..1F190;A # So [2] NEGATIVE SQUARED WC..SQUARE DJ -1F191..1F19A;W # So [10] SQUARED CL..SQUARED VS -1F19B..1F1AC;A # So [18] SQUARED THREE D..SQUARED VOD -1F1AD;N # So MASK WORK SYMBOL -1F1E6..1F1FF;N # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z -1F200..1F202;W # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA -1F210..1F23B;W # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D -1F240..1F248;W # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -1F250..1F251;W # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT -1F260..1F265;W # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI -1F300..1F320;W # So [33] CYCLONE..SHOOTING STAR -1F321..1F32C;N # So [12] THERMOMETER..WIND BLOWING FACE -1F32D..1F335;W # So [9] HOT DOG..CACTUS -1F336;N # So HOT PEPPER -1F337..1F37C;W # So [70] TULIP..BABY BOTTLE -1F37D;N # So FORK AND KNIFE WITH PLATE -1F37E..1F393;W # So [22] BOTTLE WITH POPPING CORK..GRADUATION CAP -1F394..1F39F;N # So [12] HEART WITH TIP ON THE LEFT..ADMISSION TICKETS -1F3A0..1F3CA;W # So [43] CAROUSEL HORSE..SWIMMER -1F3CB..1F3CE;N # So [4] WEIGHT LIFTER..RACING CAR -1F3CF..1F3D3;W # So [5] CRICKET BAT AND BALL..TABLE TENNIS PADDLE AND BALL -1F3D4..1F3DF;N # So [12] SNOW CAPPED MOUNTAIN..STADIUM -1F3E0..1F3F0;W # So [17] HOUSE BUILDING..EUROPEAN CASTLE -1F3F1..1F3F3;N # So [3] WHITE PENNANT..WAVING WHITE FLAG -1F3F4;W # So WAVING BLACK FLAG -1F3F5..1F3F7;N # So [3] ROSETTE..LABEL -1F3F8..1F3FA;W # So [3] BADMINTON RACQUET AND SHUTTLECOCK..AMPHORA -1F3FB..1F3FF;W # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 -1F400..1F43E;W # So [63] RAT..PAW PRINTS -1F43F;N # So CHIPMUNK -1F440;W # So EYES -1F441;N # So EYE -1F442..1F4FC;W # So [187] EAR..VIDEOCASSETTE -1F4FD..1F4FE;N # So [2] FILM PROJECTOR..PORTABLE STEREO -1F4FF..1F53D;W # So [63] PRAYER BEADS..DOWN-POINTING SMALL RED TRIANGLE -1F53E..1F54A;N # So [13] LOWER RIGHT SHADOWED WHITE CIRCLE..DOVE OF PEACE -1F54B..1F54E;W # So [4] KAABA..MENORAH WITH NINE BRANCHES -1F54F;N # So BOWL OF HYGIEIA -1F550..1F567;W # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F568..1F579;N # So [18] RIGHT SPEAKER..JOYSTICK -1F57A;W # So MAN DANCING -1F57B..1F594;N # So [26] LEFT HAND TELEPHONE RECEIVER..REVERSED VICTORY HAND -1F595..1F596;W # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS -1F597..1F5A3;N # So [13] WHITE DOWN POINTING LEFT HAND INDEX..BLACK DOWN POINTING BACKHAND INDEX -1F5A4;W # So BLACK HEART -1F5A5..1F5FA;N # So [86] DESKTOP COMPUTER..WORLD MAP -1F5FB..1F5FF;W # So [5] MOUNT FUJI..MOYAI -1F600..1F64F;W # So [80] GRINNING FACE..PERSON WITH FOLDED HANDS -1F650..1F67F;N # So [48] NORTH WEST POINTING LEAF..REVERSE CHECKER BOARD -1F680..1F6C5;W # So [70] ROCKET..LEFT LUGGAGE -1F6C6..1F6CB;N # So [6] TRIANGLE WITH ROUNDED CORNERS..COUCH AND LAMP -1F6CC;W # So SLEEPING ACCOMMODATION -1F6CD..1F6CF;N # So [3] SHOPPING BAGS..BED -1F6D0..1F6D2;W # So [3] PLACE OF WORSHIP..SHOPPING TROLLEY -1F6D3..1F6D4;N # So [2] STUPA..PAGODA -1F6D5..1F6D7;W # So [3] HINDU TEMPLE..ELEVATOR -1F6DC..1F6DF;W # So [4] WIRELESS..RING BUOY -1F6E0..1F6EA;N # So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE -1F6EB..1F6EC;W # So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING -1F6F0..1F6F3;N # So [4] SATELLITE..PASSENGER SHIP -1F6F4..1F6FC;W # So [9] SCOOTER..ROLLER SKATE -1F700..1F776;N # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE -1F77B..1F77F;N # So [5] HAUMEA..ORCUS -1F780..1F7D9;N # So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR -1F7E0..1F7EB;W # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE -1F7F0;W # So HEAVY EQUALS SIGN -1F800..1F80B;N # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD -1F810..1F847;N # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW -1F850..1F859;N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW -1F860..1F887;N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW -1F890..1F8AD;N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1;N # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST -1F900..1F90B;N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT -1F90C..1F93A;W # So [47] PINCHED FINGERS..FENCER -1F93B;N # So MODERN PENTATHLON -1F93C..1F945;W # So [10] WRESTLERS..GOAL NET -1F946;N # So RIFLE -1F947..1F9FF;W # So [185] FIRST PLACE MEDAL..NAZAR AMULET -1FA00..1FA53;N # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP -1FA60..1FA6D;N # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER -1FA70..1FA7C;W # So [13] BALLET SHOES..CRUTCH -1FA80..1FA88;W # So [9] YO-YO..FLUTE -1FA90..1FABD;W # So [46] RINGED PLANET..WING -1FABF..1FAC5;W # So [7] GOOSE..PERSON WITH CROWN -1FACE..1FADB;W # So [14] MOOSE..PEA POD -1FAE0..1FAE8;W # So [9] MELTING FACE..SHAKING FACE -1FAF0..1FAF8;W # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND -1FB00..1FB92;N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK -1FB94..1FBCA;N # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -1FBF0..1FBF9;N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -20000..2A6DF;W # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF -2A6E0..2A6FF;W # Cn [32] .. -2A700..2B739;W # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 -2B73A..2B73F;W # Cn [6] .. -2B740..2B81D;W # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D -2B81E..2B81F;W # Cn [2] .. -2B820..2CEA1;W # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 -2CEA2..2CEAF;W # Cn [14] .. -2CEB0..2EBE0;W # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 -2EBE1..2F7FF;W # Cn [3103] .. -2F800..2FA1D;W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -2FA1E..2FA1F;W # Cn [2] .. -2FA20..2FFFD;W # Cn [1502] .. -30000..3134A;W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A -3134B..3134F;W # Cn [5] .. -31350..323AF;W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -323B0..3FFFD;W # Cn [56398] .. -E0001;N # Cf LANGUAGE TAG -E0020..E007F;N # Cf [96] TAG SPACE..CANCEL TAG -E0100..E01EF;A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -F0000..FFFFD;A # Co [65534] .. -100000..10FFFD;A # Co [65534] .. +0000..001F ; N # Cc [32] .. +0020 ; Na # Zs SPACE +0021..0023 ; Na # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Na # Sc DOLLAR SIGN +0025..0027 ; Na # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Na # Ps LEFT PARENTHESIS +0029 ; Na # Pe RIGHT PARENTHESIS +002A ; Na # Po ASTERISK +002B ; Na # Sm PLUS SIGN +002C ; Na # Po COMMA +002D ; Na # Pd HYPHEN-MINUS +002E..002F ; Na # Po [2] FULL STOP..SOLIDUS +0030..0039 ; Na # Nd [10] DIGIT ZERO..DIGIT NINE +003A..003B ; Na # Po [2] COLON..SEMICOLON +003C..003E ; Na # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Na # Po [2] QUESTION MARK..COMMERCIAL AT +0041..005A ; Na # Lu [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005B ; Na # Ps LEFT SQUARE BRACKET +005C ; Na # Po REVERSE SOLIDUS +005D ; Na # Pe RIGHT SQUARE BRACKET +005E ; Na # Sk CIRCUMFLEX ACCENT +005F ; Na # Pc LOW LINE +0060 ; Na # Sk GRAVE ACCENT +0061..007A ; Na # Ll [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +007B ; Na # Ps LEFT CURLY BRACKET +007C ; Na # Sm VERTICAL LINE +007D ; Na # Pe RIGHT CURLY BRACKET +007E ; Na # Sm TILDE +007F ; N # Cc +0080..009F ; N # Cc [32] .. +00A0 ; N # Zs NO-BREAK SPACE +00A1 ; A # Po INVERTED EXCLAMATION MARK +00A2..00A3 ; Na # Sc [2] CENT SIGN..POUND SIGN +00A4 ; A # Sc CURRENCY SIGN +00A5 ; Na # Sc YEN SIGN +00A6 ; Na # So BROKEN BAR +00A7 ; A # Po SECTION SIGN +00A8 ; A # Sk DIAERESIS +00A9 ; N # So COPYRIGHT SIGN +00AA ; A # Lo FEMININE ORDINAL INDICATOR +00AB ; N # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Na # Sm NOT SIGN +00AD ; A # Cf SOFT HYPHEN +00AE ; A # So REGISTERED SIGN +00AF ; Na # Sk MACRON +00B0 ; A # So DEGREE SIGN +00B1 ; A # Sm PLUS-MINUS SIGN +00B2..00B3 ; A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B4 ; A # Sk ACUTE ACCENT +00B5 ; N # Ll MICRO SIGN +00B6..00B7 ; A # Po [2] PILCROW SIGN..MIDDLE DOT +00B8 ; A # Sk CEDILLA +00B9 ; A # No SUPERSCRIPT ONE +00BA ; A # Lo MASCULINE ORDINAL INDICATOR +00BB ; N # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; A # Po INVERTED QUESTION MARK +00C0..00C5 ; N # Lu [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE +00C6 ; A # Lu LATIN CAPITAL LETTER AE +00C7..00CF ; N # Lu [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS +00D0 ; A # Lu LATIN CAPITAL LETTER ETH +00D1..00D6 ; N # Lu [6] LATIN CAPITAL LETTER N WITH TILDE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D7 ; A # Sm MULTIPLICATION SIGN +00D8 ; A # Lu LATIN CAPITAL LETTER O WITH STROKE +00D9..00DD ; N # Lu [5] LATIN CAPITAL LETTER U WITH GRAVE..LATIN CAPITAL LETTER Y WITH ACUTE +00DE..00E1 ; A # L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE +00E2..00E5 ; N # Ll [4] LATIN SMALL LETTER A WITH CIRCUMFLEX..LATIN SMALL LETTER A WITH RING ABOVE +00E6 ; A # Ll LATIN SMALL LETTER AE +00E7 ; N # Ll LATIN SMALL LETTER C WITH CEDILLA +00E8..00EA ; A # Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX +00EB ; N # Ll LATIN SMALL LETTER E WITH DIAERESIS +00EC..00ED ; A # Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE +00EE..00EF ; N # Ll [2] LATIN SMALL LETTER I WITH CIRCUMFLEX..LATIN SMALL LETTER I WITH DIAERESIS +00F0 ; A # Ll LATIN SMALL LETTER ETH +00F1 ; N # Ll LATIN SMALL LETTER N WITH TILDE +00F2..00F3 ; A # Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE +00F4..00F6 ; N # Ll [3] LATIN SMALL LETTER O WITH CIRCUMFLEX..LATIN SMALL LETTER O WITH DIAERESIS +00F7 ; A # Sm DIVISION SIGN +00F8..00FA ; A # Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE +00FB ; N # Ll LATIN SMALL LETTER U WITH CIRCUMFLEX +00FC ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS +00FD ; N # Ll LATIN SMALL LETTER Y WITH ACUTE +00FE ; A # Ll LATIN SMALL LETTER THORN +00FF ; N # Ll LATIN SMALL LETTER Y WITH DIAERESIS +0100 ; N # Lu LATIN CAPITAL LETTER A WITH MACRON +0101 ; A # Ll LATIN SMALL LETTER A WITH MACRON +0102..0110 ; N # L& [15] LATIN CAPITAL LETTER A WITH BREVE..LATIN CAPITAL LETTER D WITH STROKE +0111 ; A # Ll LATIN SMALL LETTER D WITH STROKE +0112 ; N # Lu LATIN CAPITAL LETTER E WITH MACRON +0113 ; A # Ll LATIN SMALL LETTER E WITH MACRON +0114..011A ; N # L& [7] LATIN CAPITAL LETTER E WITH BREVE..LATIN CAPITAL LETTER E WITH CARON +011B ; A # Ll LATIN SMALL LETTER E WITH CARON +011C..0125 ; N # L& [10] LATIN CAPITAL LETTER G WITH CIRCUMFLEX..LATIN SMALL LETTER H WITH CIRCUMFLEX +0126..0127 ; A # L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE +0128..012A ; N # L& [3] LATIN CAPITAL LETTER I WITH TILDE..LATIN CAPITAL LETTER I WITH MACRON +012B ; A # Ll LATIN SMALL LETTER I WITH MACRON +012C..0130 ; N # L& [5] LATIN CAPITAL LETTER I WITH BREVE..LATIN CAPITAL LETTER I WITH DOT ABOVE +0131..0133 ; A # L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ +0134..0137 ; N # L& [4] LATIN CAPITAL LETTER J WITH CIRCUMFLEX..LATIN SMALL LETTER K WITH CEDILLA +0138 ; A # Ll LATIN SMALL LETTER KRA +0139..013E ; N # L& [6] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER L WITH CARON +013F..0142 ; A # L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE +0143 ; N # Lu LATIN CAPITAL LETTER N WITH ACUTE +0144 ; A # Ll LATIN SMALL LETTER N WITH ACUTE +0145..0147 ; N # L& [3] LATIN CAPITAL LETTER N WITH CEDILLA..LATIN CAPITAL LETTER N WITH CARON +0148..014B ; A # L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG +014C ; N # Lu LATIN CAPITAL LETTER O WITH MACRON +014D ; A # Ll LATIN SMALL LETTER O WITH MACRON +014E..0151 ; N # L& [4] LATIN CAPITAL LETTER O WITH BREVE..LATIN SMALL LETTER O WITH DOUBLE ACUTE +0152..0153 ; A # L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE +0154..0165 ; N # L& [18] LATIN CAPITAL LETTER R WITH ACUTE..LATIN SMALL LETTER T WITH CARON +0166..0167 ; A # L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE +0168..016A ; N # L& [3] LATIN CAPITAL LETTER U WITH TILDE..LATIN CAPITAL LETTER U WITH MACRON +016B ; A # Ll LATIN SMALL LETTER U WITH MACRON +016C..017F ; N # L& [20] LATIN CAPITAL LETTER U WITH BREVE..LATIN SMALL LETTER LONG S +0180..01BA ; N # L& [59] LATIN SMALL LETTER B WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; N # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; N # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; N # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..01CD ; N # L& [10] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER A WITH CARON +01CE ; A # Ll LATIN SMALL LETTER A WITH CARON +01CF ; N # Lu LATIN CAPITAL LETTER I WITH CARON +01D0 ; A # Ll LATIN SMALL LETTER I WITH CARON +01D1 ; N # Lu LATIN CAPITAL LETTER O WITH CARON +01D2 ; A # Ll LATIN SMALL LETTER O WITH CARON +01D3 ; N # Lu LATIN CAPITAL LETTER U WITH CARON +01D4 ; A # Ll LATIN SMALL LETTER U WITH CARON +01D5 ; N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D6 ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D7 ; N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D8 ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DA ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DB ; N # Lu LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DC ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE +01DD..024F ; N # L& [115] LATIN SMALL LETTER TURNED E..LATIN SMALL LETTER Y WITH STROKE +0250 ; N # Ll LATIN SMALL LETTER TURNED A +0251 ; A # Ll LATIN SMALL LETTER ALPHA +0252..0260 ; N # Ll [15] LATIN SMALL LETTER TURNED ALPHA..LATIN SMALL LETTER G WITH HOOK +0261 ; A # Ll LATIN SMALL LETTER SCRIPT G +0262..0293 ; N # Ll [50] LATIN LETTER SMALL CAPITAL G..LATIN SMALL LETTER EZH WITH CURL +0294 ; N # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; N # Ll [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; N # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C3 ; N # Sk [2] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER RIGHT ARROWHEAD +02C4 ; A # Sk MODIFIER LETTER UP ARROWHEAD +02C5 ; N # Sk MODIFIER LETTER DOWN ARROWHEAD +02C6 ; N # Lm MODIFIER LETTER CIRCUMFLEX ACCENT +02C7 ; A # Lm CARON +02C8 ; N # Lm MODIFIER LETTER VERTICAL LINE +02C9..02CB ; A # Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT +02CC ; N # Lm MODIFIER LETTER LOW VERTICAL LINE +02CD ; A # Lm MODIFIER LETTER LOW MACRON +02CE..02CF ; N # Lm [2] MODIFIER LETTER LOW GRAVE ACCENT..MODIFIER LETTER LOW ACUTE ACCENT +02D0 ; A # Lm MODIFIER LETTER TRIANGULAR COLON +02D1 ; N # Lm MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02D7 ; N # Sk [6] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER MINUS SIGN +02D8..02DB ; A # Sk [4] BREVE..OGONEK +02DC ; N # Sk SMALL TILDE +02DD ; A # Sk DOUBLE ACUTE ACCENT +02DE ; N # Sk MODIFIER LETTER RHOTIC HOOK +02DF ; A # Sk MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; N # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; N # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; N # Lm MODIFIER LETTER VOICING +02ED ; N # Sk MODIFIER LETTER UNASPIRATED +02EE ; N # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; N # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..036F ; A # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0370..0373 ; N # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; N # Lm GREEK NUMERAL SIGN +0375 ; N # Sk GREEK LOWER NUMERAL SIGN +0376..0377 ; N # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; N # Lm GREEK YPOGEGRAMMENI +037B..037D ; N # Ll [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037E ; N # Po GREEK QUESTION MARK +037F ; N # Lu GREEK CAPITAL LETTER YOT +0384..0385 ; N # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0386 ; N # Lu GREEK CAPITAL LETTER ALPHA WITH TONOS +0387 ; N # Po GREEK ANO TELEIA +0388..038A ; N # Lu [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; N # Lu GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..0390 ; N # L& [3] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391..03A1 ; A # Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03A9 ; A # Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA +03AA..03B0 ; N # L& [7] GREEK CAPITAL LETTER IOTA WITH DIALYTIKA..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03B1..03C1 ; A # Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO +03C2 ; N # Ll GREEK SMALL LETTER FINAL SIGMA +03C3..03C9 ; A # Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA +03CA..03F5 ; N # L& [44] GREEK SMALL LETTER IOTA WITH DIALYTIKA..GREEK LUNATE EPSILON SYMBOL +03F6 ; N # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +03F7..03FF ; N # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +0400 ; N # Lu CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401 ; A # Lu CYRILLIC CAPITAL LETTER IO +0402..040F ; N # Lu [14] CYRILLIC CAPITAL LETTER DJE..CYRILLIC CAPITAL LETTER DZHE +0410..044F ; A # L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA +0450 ; N # Ll CYRILLIC SMALL LETTER IE WITH GRAVE +0451 ; A # Ll CYRILLIC SMALL LETTER IO +0452..0481 ; N # L& [48] CYRILLIC SMALL LETTER DJE..CYRILLIC SMALL LETTER KOPPA +0482 ; N # So CYRILLIC THOUSANDS SIGN +0483..0487 ; N # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; N # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +048A..04FF ; N # L& [118] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER HA WITH STROKE +0500..052F ; N # L& [48] CYRILLIC CAPITAL LETTER KOMI DE..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; N # Lu [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; N # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; N # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588 ; N # Ll [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589 ; N # Po ARMENIAN FULL STOP +058A ; N # Pd ARMENIAN HYPHEN +058D..058E ; N # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +058F ; N # Sc ARMENIAN DRAM SIGN +0591..05BD ; N # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BE ; N # Pd HEBREW PUNCTUATION MAQAF +05BF ; N # Mn HEBREW POINT RAFE +05C0 ; N # Po HEBREW PUNCTUATION PASEQ +05C1..05C2 ; N # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C3 ; N # Po HEBREW PUNCTUATION SOF PASUQ +05C4..05C5 ; N # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C6 ; N # Po HEBREW PUNCTUATION NUN HAFUKHA +05C7 ; N # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; N # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; N # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; N # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +0600..0605 ; N # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0606..0608 ; N # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +0609..060A ; N # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060B ; N # Sc AFGHANI SIGN +060C..060D ; N # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR +060E..060F ; N # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +0610..061A ; N # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061B ; N # Po ARABIC SEMICOLON +061C ; N # Cf ARABIC LETTER MARK +061D..061F ; N # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +0620..063F ; N # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; N # Lm ARABIC TATWEEL +0641..064A ; N # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..065F ; N # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0660..0669 ; N # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066A..066D ; N # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +066E..066F ; N # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670 ; N # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3 ; N # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; N # Po ARABIC FULL STOP +06D5 ; N # Lo ARABIC LETTER AE +06D6..06DC ; N # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DD ; N # Cf ARABIC END OF AYAH +06DE ; N # So ARABIC START OF RUB EL HIZB +06DF..06E4 ; N # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; N # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; N # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06E9 ; N # So ARABIC PLACE OF SAJDAH +06EA..06ED ; N # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; N # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; N # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; N # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; N # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; N # Lo ARABIC LETTER HEH WITH INVERTED V +0700..070D ; N # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070F ; N # Cf SYRIAC ABBREVIATION MARK +0710 ; N # Lo SYRIAC LETTER ALAPH +0711 ; N # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; N # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; N # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..074F ; N # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE +0750..077F ; N # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE +0780..07A5 ; N # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU +07A6..07B0 ; N # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; N # Lo THAANA LETTER NAA +07C0..07C9 ; N # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; N # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; N # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; N # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07F6 ; N # So NKO SYMBOL OO DENNEN +07F7..07F9 ; N # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +07FA ; N # Lm NKO LAJANYALAN +07FD ; N # Mn NKO DANTAYALAN +07FE..07FF ; N # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN +0800..0815 ; N # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; N # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; N # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; N # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; N # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; N # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; N # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; N # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0830..083E ; N # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +0840..0858 ; N # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0859..085B ; N # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +085E ; N # Po MANDAIC PUNCTUATION +0860..086A ; N # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; N # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888 ; N # Sk ARABIC RAISED ROUND DOT +0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +0898..089F ; N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; N # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E2 ; N # Cf ARABIC DISPUTED END OF AYAH +08E3..08FF ; N # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA +0900..0902 ; N # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA +0903 ; N # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; N # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; N # Mn DEVANAGARI VOWEL SIGN OE +093B ; N # Mc DEVANAGARI VOWEL SIGN OOE +093C ; N # Mn DEVANAGARI SIGN NUKTA +093D ; N # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; N # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; N # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; N # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; N # Mn DEVANAGARI SIGN VIRAMA +094E..094F ; N # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; N # Lo DEVANAGARI OM +0951..0957 ; N # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; N # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; N # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0964..0965 ; N # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F ; N # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; N # Po DEVANAGARI ABBREVIATION SIGN +0971 ; N # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..097F ; N # Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA +0980 ; N # Lo BENGALI ANJI +0981 ; N # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; N # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; N # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; N # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; N # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; N # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; N # Lo BENGALI LETTER LA +09B6..09B9 ; N # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; N # Mn BENGALI SIGN NUKTA +09BD ; N # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; N # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; N # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; N # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; N # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; N # Mn BENGALI SIGN VIRAMA +09CE ; N # Lo BENGALI LETTER KHANDA TA +09D7 ; N # Mc BENGALI AU LENGTH MARK +09DC..09DD ; N # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; N # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; N # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; N # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; N # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F2..09F3 ; N # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09F4..09F9 ; N # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; N # So BENGALI ISSHAR +09FB ; N # Sc BENGALI GANDA MARK +09FC ; N # Lo BENGALI LETTER VEDIC ANUSVARA +09FD ; N # Po BENGALI ABBREVIATION SIGN +09FE ; N # Mn BENGALI SANDHI MARK +0A01..0A02 ; N # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; N # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; N # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; N # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; N # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; N # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; N # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; N # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; N # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; N # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; N # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; N # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; N # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; N # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; N # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; N # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; N # Lo GURMUKHI LETTER FA +0A66..0A6F ; N # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; N # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; N # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; N # Mn GURMUKHI SIGN YAKASH +0A76 ; N # Po GURMUKHI ABBREVIATION SIGN +0A81..0A82 ; N # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; N # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; N # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; N # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; N # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; N # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; N # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; N # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; N # Mn GUJARATI SIGN NUKTA +0ABD ; N # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; N # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; N # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; N # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; N # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; N # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; N # Mn GUJARATI SIGN VIRAMA +0AD0 ; N # Lo GUJARATI OM +0AE0..0AE1 ; N # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; N # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; N # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; N # Po GUJARATI ABBREVIATION SIGN +0AF1 ; N # Sc GUJARATI RUPEE SIGN +0AF9 ; N # Lo GUJARATI LETTER ZHA +0AFA..0AFF ; N # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; N # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; N # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; N # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; N # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; N # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; N # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; N # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; N # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; N # Mn ORIYA SIGN NUKTA +0B3D ; N # Lo ORIYA SIGN AVAGRAHA +0B3E ; N # Mc ORIYA VOWEL SIGN AA +0B3F ; N # Mn ORIYA VOWEL SIGN I +0B40 ; N # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; N # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; N # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; N # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; N # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; N # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; N # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; N # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; N # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; N # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; N # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; N # So ORIYA ISSHAR +0B71 ; N # Lo ORIYA LETTER WA +0B72..0B77 ; N # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0B82 ; N # Mn TAMIL SIGN ANUSVARA +0B83 ; N # Lo TAMIL SIGN VISARGA +0B85..0B8A ; N # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; N # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; N # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; N # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; N # Lo TAMIL LETTER JA +0B9E..0B9F ; N # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; N # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; N # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; N # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; N # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; N # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; N # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; N # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; N # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; N # Mn TAMIL SIGN VIRAMA +0BD0 ; N # Lo TAMIL OM +0BD7 ; N # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; N # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; N # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3..0BF8 ; N # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BF9 ; N # Sc TAMIL RUPEE SIGN +0BFA ; N # So TAMIL NUMBER SIGN +0C00 ; N # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; N # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; N # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C ; N # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; N # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; N # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; N # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3C ; N # Mn TELUGU SIGN NUKTA +0C3D ; N # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; N # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; N # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; N # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77 ; N # Po TELUGU SIGN SIDDHAM +0C78..0C7E ; N # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0C7F ; N # So TELUGU SIGN TUUMU +0C80 ; N # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; N # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; N # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; N # Po KANNADA SIGN SIDDHAM +0C85..0C8C ; N # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; N # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; N # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; N # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; N # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; N # Mn KANNADA SIGN NUKTA +0CBD ; N # Lo KANNADA SIGN AVAGRAHA +0CBE ; N # Mc KANNADA VOWEL SIGN AA +0CBF ; N # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; N # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; N # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; N # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; N # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0CF3 ; N # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT +0D00..0D01 ; N # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; N # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; N # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; N # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; N # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C ; N # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3D ; N # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; N # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; N # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; N # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; N # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; N # Mn MALAYALAM SIGN VIRAMA +0D4E ; N # Lo MALAYALAM LETTER DOT REPH +0D4F ; N # So MALAYALAM SIGN PARA +0D54..0D56 ; N # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; N # Mc MALAYALAM AU LENGTH MARK +0D58..0D5E ; N # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61 ; N # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; N # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; N # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78 ; N # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79 ; N # So MALAYALAM DATE MARK +0D7A..0D7F ; N # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81 ; N # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; N # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; N # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; N # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; N # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; N # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; N # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; N # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; N # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; N # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; N # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; N # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; N # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; N # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; N # Po SINHALA PUNCTUATION KUNDDALIYA +0E01..0E30 ; N # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; N # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; N # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; N # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E3F ; N # Sc THAI CURRENCY SYMBOL BAHT +0E40..0E45 ; N # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; N # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; N # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E4F ; N # Po THAI CHARACTER FONGMAN +0E50..0E59 ; N # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; N # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0E81..0E82 ; N # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; N # Lo LAO LETTER KHO TAM +0E86..0E8A ; N # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; N # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; N # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; N # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; N # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; N # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EBC ; N # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EBD ; N # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; N # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; N # Lm LAO KO LA +0EC8..0ECE ; N # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN +0ED0..0ED9 ; N # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; N # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; N # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; N # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; N # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13 ; N # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; N # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; N # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F18..0F19 ; N # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F1A..0F1F ; N # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; N # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; N # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; N # So TIBETAN MARK BSDUS RTAGS +0F35 ; N # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F36 ; N # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F37 ; N # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F38 ; N # So TIBETAN MARK CHE MGO +0F39 ; N # Mn TIBETAN MARK TSA -PHRU +0F3A ; N # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; N # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; N # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; N # Pe TIBETAN MARK ANG KHANG GYAS +0F3E..0F3F ; N # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; N # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; N # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; N # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; N # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; N # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F85 ; N # Po TIBETAN MARK PALUTA +0F86..0F87 ; N # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8C ; N # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; N # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; N # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FBE..0FC5 ; N # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC6 ; N # Mn TIBETAN SYMBOL PADMA GDAN +0FC7..0FCC ; N # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; N # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; N # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD5..0FD8 ; N # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +0FD9..0FDA ; N # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +1000..102A ; N # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; N # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; N # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; N # Mc MYANMAR VOWEL SIGN E +1032..1037 ; N # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; N # Mc MYANMAR SIGN VISARGA +1039..103A ; N # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; N # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; N # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; N # Lo MYANMAR LETTER GREAT SA +1040..1049 ; N # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; N # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; N # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; N # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; N # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; N # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; N # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; N # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; N # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; N # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; N # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; N # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; N # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; N # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; N # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; N # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; N # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; N # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; N # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; N # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; N # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; N # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; N # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; N # Mn MYANMAR VOWEL SIGN AITON AI +109E..109F ; N # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +10A0..10C5 ; N # Lu [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; N # Lu GEORGIAN CAPITAL LETTER YN +10CD ; N # Lu GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; N # Ll [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FB ; N # Po GEORGIAN PARAGRAPH SEPARATOR +10FC ; N # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; N # Ll [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..115F ; W # Lo [96] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG FILLER +1160..11FF ; N # Lo [160] HANGUL JUNGSEONG FILLER..HANGUL JONGSEONG SSANGNIEUN +1200..1248 ; N # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +124A..124D ; N # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; N # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; N # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; N # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; N # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; N # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; N # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; N # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; N # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; N # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; N # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; N # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; N # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; N # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; N # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135D..135F ; N # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1360..1368 ; N # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; N # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; N # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1390..1399 ; N # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +13A0..13F5 ; N # Lu [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; N # Ll [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1400 ; N # Pd CANADIAN SYLLABICS HYPHEN +1401..166C ; N # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D ; N # So CANADIAN SYLLABICS CHI SIGN +166E ; N # Po CANADIAN SYLLABICS FULL STOP +166F..167F ; N # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1680 ; N # Zs OGHAM SPACE MARK +1681..169A ; N # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B ; N # Ps OGHAM FEATHER MARK +169C ; N # Pe OGHAM REVERSED FEATHER MARK +16A0..16EA ; N # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16ED ; N # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +16EE..16F0 ; N # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; N # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; N # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1714 ; N # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; N # Mc TAGALOG SIGN PAMUDPOD +171F ; N # Lo TAGALOG LETTER ARCHAIC RA +1720..1731 ; N # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA +1732..1733 ; N # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; N # Mc HANUNOO SIGN PAMUDPOD +1735..1736 ; N # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1740..1751 ; N # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; N # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C ; N # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; N # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; N # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3 ; N # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; N # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; N # Mc KHMER VOWEL SIGN AA +17B7..17BD ; N # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; N # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; N # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; N # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; N # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D4..17D6 ; N # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; N # Lm KHMER SIGN LEK TOO +17D8..17DA ; N # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DB ; N # Sc KHMER CURRENCY SYMBOL RIEL +17DC ; N # Lo KHMER SIGN AVAKRAHASANYA +17DD ; N # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; N # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +17F0..17F9 ; N # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +1800..1805 ; N # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1806 ; N # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; N # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +180B..180D ; N # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; N # Cf MONGOLIAN VOWEL SEPARATOR +180F ; N # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1810..1819 ; N # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; N # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; N # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; N # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; N # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; N # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; N # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; N # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; N # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; N # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; N # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; N # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; N # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; N # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; N # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; N # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; N # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; N # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; N # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1940 ; N # So LIMBU SIGN LOO +1944..1945 ; N # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..194F ; N # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; N # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; N # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; N # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; N # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; N # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; N # No NEW TAI LUE THAM DIGIT ONE +19DE..19DF ; N # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV +19E0..19FF ; N # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC +1A00..1A16 ; N # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; N # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; N # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; N # Mn BUGINESE VOWEL SIGN AE +1A1E..1A1F ; N # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A54 ; N # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; N # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; N # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; N # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; N # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; N # Mn TAI THAM SIGN SAKOT +1A61 ; N # Mc TAI THAM VOWEL SIGN A +1A62 ; N # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; N # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; N # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; N # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; N # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; N # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; N # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; N # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; N # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; N # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; N # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1AB0..1ABD ; N # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; N # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; N # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; N # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; N # Mc BALINESE SIGN BISAH +1B05..1B33 ; N # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; N # Mn BALINESE SIGN REREKAN +1B35 ; N # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; N # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; N # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; N # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; N # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; N # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; N # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; N # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; N # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; N # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; N # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B6B..1B73 ; N # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B74..1B7C ; N # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7E ; N # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B80..1B81 ; N # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; N # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; N # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; N # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; N # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; N # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; N # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; N # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; N # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; N # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; N # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BBF ; N # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M +1BC0..1BE5 ; N # Lo [38] BATAK LETTER A..BATAK LETTER U +1BE6 ; N # Mn BATAK SIGN TOMPI +1BE7 ; N # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; N # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; N # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; N # Mn BATAK VOWEL SIGN KARO O +1BEE ; N # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; N # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; N # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1BFC..1BFF ; N # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C00..1C23 ; N # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; N # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; N # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; N # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; N # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C3B..1C3F ; N # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; N # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; N # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; N # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C88 ; N # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; N # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; N # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CC0..1CC7 ; N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD0..1CD2 ; N # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; N # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0 ; N # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; N # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; N # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CE9..1CEC ; N # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CED ; N # Mn VEDIC SIGN TIRYAK +1CEE..1CF3 ; N # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; N # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; N # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; N # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; N # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1CFA ; N # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; N # Ll [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; N # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; N # Ll [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; N # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D7F ; N # Ll [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE +1D80..1D9A ; N # Ll [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF ; N # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1E00..1EFF ; N # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP +1F00..1F15 ; N # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; N # Lu [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; N # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; N # Lu [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; N # Ll [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; N # Lu GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; N # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; N # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; N # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBD ; N # Sk GREEK KORONIS +1FBE ; N # Ll GREEK PROSGEGRAMMENI +1FBF..1FC1 ; N # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FC2..1FC4 ; N # Ll [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; N # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCD..1FCF ; N # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FD0..1FD3 ; N # Ll [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; N # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FDD..1FDF ; N # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FE0..1FEC ; N # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FED..1FEF ; N # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FF2..1FF4 ; N # Ll [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; N # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFD..1FFE ; N # Sk [2] GREEK OXIA..GREEK DASIA +2000..200A ; N # Zs [11] EN QUAD..HAIR SPACE +200B..200F ; N # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK +2010 ; A # Pd HYPHEN +2011..2012 ; N # Pd [2] NON-BREAKING HYPHEN..FIGURE DASH +2013..2015 ; A # Pd [3] EN DASH..HORIZONTAL BAR +2016 ; A # Po DOUBLE VERTICAL LINE +2017 ; N # Po DOUBLE LOW LINE +2018 ; A # Pi LEFT SINGLE QUOTATION MARK +2019 ; A # Pf RIGHT SINGLE QUOTATION MARK +201A ; N # Ps SINGLE LOW-9 QUOTATION MARK +201B ; N # Pi SINGLE HIGH-REVERSED-9 QUOTATION MARK +201C ; A # Pi LEFT DOUBLE QUOTATION MARK +201D ; A # Pf RIGHT DOUBLE QUOTATION MARK +201E ; N # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; N # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2022 ; A # Po [3] DAGGER..BULLET +2023 ; N # Po TRIANGULAR BULLET +2024..2027 ; A # Po [4] ONE DOT LEADER..HYPHENATION POINT +2028 ; N # Zl LINE SEPARATOR +2029 ; N # Zp PARAGRAPH SEPARATOR +202A..202E ; N # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +202F ; N # Zs NARROW NO-BREAK SPACE +2030 ; A # Po PER MILLE SIGN +2031 ; N # Po PER TEN THOUSAND SIGN +2032..2033 ; A # Po [2] PRIME..DOUBLE PRIME +2034 ; N # Po TRIPLE PRIME +2035 ; A # Po REVERSED PRIME +2036..2038 ; N # Po [3] REVERSED DOUBLE PRIME..CARET +2039 ; N # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; N # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B ; A # Po REFERENCE MARK +203C..203D ; N # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +203E ; A # Po OVERLINE +203F..2040 ; N # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; N # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; N # Sm FRACTION SLASH +2045 ; N # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; N # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; N # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; N # Sm COMMERCIAL MINUS SIGN +2053 ; N # Po SWUNG DASH +2054 ; N # Pc INVERTED UNDERTIE +2055..205E ; N # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +205F ; N # Zs MEDIUM MATHEMATICAL SPACE +2060..2064 ; N # Cf [5] WORD JOINER..INVISIBLE PLUS +2066..206F ; N # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +2070 ; N # No SUPERSCRIPT ZERO +2071 ; N # Lm SUPERSCRIPT LATIN SMALL LETTER I +2074 ; A # No SUPERSCRIPT FOUR +2075..2079 ; N # No [5] SUPERSCRIPT FIVE..SUPERSCRIPT NINE +207A..207C ; N # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; N # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; N # Pe SUPERSCRIPT RIGHT PARENTHESIS +207F ; A # Lm SUPERSCRIPT LATIN SMALL LETTER N +2080 ; N # No SUBSCRIPT ZERO +2081..2084 ; A # No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR +2085..2089 ; N # No [5] SUBSCRIPT FIVE..SUBSCRIPT NINE +208A..208C ; N # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; N # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; N # Pe SUBSCRIPT RIGHT PARENTHESIS +2090..209C ; N # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20A0..20A8 ; N # Sc [9] EURO-CURRENCY SIGN..RUPEE SIGN +20A9 ; H # Sc WON SIGN +20AA..20AB ; N # Sc [2] NEW SHEQEL SIGN..DONG SIGN +20AC ; A # Sc EURO SIGN +20AD..20C0 ; N # Sc [20] KIP SIGN..SOM SIGN +20D0..20DC ; N # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; N # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; N # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; N # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; N # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2100..2101 ; N # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2102 ; N # Lu DOUBLE-STRUCK CAPITAL C +2103 ; A # So DEGREE CELSIUS +2104 ; N # So CENTRE LINE SYMBOL +2105 ; A # So CARE OF +2106 ; N # So CADA UNA +2107 ; N # Lu EULER CONSTANT +2108 ; N # So SCRUPLE +2109 ; A # So DEGREE FAHRENHEIT +210A..2112 ; N # L& [9] SCRIPT SMALL G..SCRIPT CAPITAL L +2113 ; A # Ll SCRIPT SMALL L +2114 ; N # So L B BAR SYMBOL +2115 ; N # Lu DOUBLE-STRUCK CAPITAL N +2116 ; A # So NUMERO SIGN +2117 ; N # So SOUND RECORDING COPYRIGHT +2118 ; N # Sm SCRIPT CAPITAL P +2119..211D ; N # Lu [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +211E..2120 ; N # So [3] PRESCRIPTION TAKE..SERVICE MARK +2121..2122 ; A # So [2] TELEPHONE SIGN..TRADE MARK SIGN +2123 ; N # So VERSICLE +2124 ; N # Lu DOUBLE-STRUCK CAPITAL Z +2125 ; N # So OUNCE SIGN +2126 ; A # Lu OHM SIGN +2127 ; N # So INVERTED OHM SIGN +2128 ; N # Lu BLACK-LETTER CAPITAL Z +2129 ; N # So TURNED GREEK SMALL LETTER IOTA +212A ; N # Lu KELVIN SIGN +212B ; A # Lu ANGSTROM SIGN +212C..212D ; N # Lu [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212E ; N # So ESTIMATED SYMBOL +212F..2134 ; N # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; N # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; N # Ll INFORMATION SOURCE +213A..213B ; N # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +213C..213F ; N # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; N # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; N # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214A ; N # So PROPERTY LINE +214B ; N # Sm TURNED AMPERSAND +214C..214D ; N # So [2] PER SIGN..AKTIESELSKAB +214E ; N # Ll TURNED SMALL F +214F ; N # So SYMBOL FOR SAMARITAN SOURCE +2150..2152 ; N # No [3] VULGAR FRACTION ONE SEVENTH..VULGAR FRACTION ONE TENTH +2153..2154 ; A # No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS +2155..215A ; N # No [6] VULGAR FRACTION ONE FIFTH..VULGAR FRACTION FIVE SIXTHS +215B..215E ; A # No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS +215F ; N # No FRACTION NUMERATOR ONE +2160..216B ; A # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE +216C..216F ; N # Nl [4] ROMAN NUMERAL FIFTY..ROMAN NUMERAL ONE THOUSAND +2170..2179 ; A # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN +217A..2182 ; N # Nl [9] SMALL ROMAN NUMERAL ELEVEN..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; N # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; N # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2189 ; A # No VULGAR FRACTION ZERO THIRDS +218A..218B ; N # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194 ; A # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; A # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; N # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; N # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; N # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; N # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; N # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; N # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; N # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; N # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; N # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21B7 ; N # So [9] DOWNWARDS ZIGZAG ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21B8..21B9 ; A # So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR +21BA..21CD ; N # So [20] ANTICLOCKWISE OPEN CIRCLE ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; N # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; N # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; A # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; N # So DOWNWARDS DOUBLE ARROW +21D4 ; A # Sm LEFT RIGHT DOUBLE ARROW +21D5..21E6 ; N # So [18] UP DOWN DOUBLE ARROW..LEFTWARDS WHITE ARROW +21E7 ; A # So UPWARDS WHITE ARROW +21E8..21F3 ; N # So [12] RIGHTWARDS WHITE ARROW..UP DOWN WHITE ARROW +21F4..21FF ; N # Sm [12] RIGHT ARROW WITH SMALL CIRCLE..LEFT RIGHT OPEN-HEADED ARROW +2200 ; A # Sm FOR ALL +2201 ; N # Sm COMPLEMENT +2202..2203 ; A # Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS +2204..2206 ; N # Sm [3] THERE DOES NOT EXIST..INCREMENT +2207..2208 ; A # Sm [2] NABLA..ELEMENT OF +2209..220A ; N # Sm [2] NOT AN ELEMENT OF..SMALL ELEMENT OF +220B ; A # Sm CONTAINS AS MEMBER +220C..220E ; N # Sm [3] DOES NOT CONTAIN AS MEMBER..END OF PROOF +220F ; A # Sm N-ARY PRODUCT +2210 ; N # Sm N-ARY COPRODUCT +2211 ; A # Sm N-ARY SUMMATION +2212..2214 ; N # Sm [3] MINUS SIGN..DOT PLUS +2215 ; A # Sm DIVISION SLASH +2216..2219 ; N # Sm [4] SET MINUS..BULLET OPERATOR +221A ; A # Sm SQUARE ROOT +221B..221C ; N # Sm [2] CUBE ROOT..FOURTH ROOT +221D..2220 ; A # Sm [4] PROPORTIONAL TO..ANGLE +2221..2222 ; N # Sm [2] MEASURED ANGLE..SPHERICAL ANGLE +2223 ; A # Sm DIVIDES +2224 ; N # Sm DOES NOT DIVIDE +2225 ; A # Sm PARALLEL TO +2226 ; N # Sm NOT PARALLEL TO +2227..222C ; A # Sm [6] LOGICAL AND..DOUBLE INTEGRAL +222D ; N # Sm TRIPLE INTEGRAL +222E ; A # Sm CONTOUR INTEGRAL +222F..2233 ; N # Sm [5] SURFACE INTEGRAL..ANTICLOCKWISE CONTOUR INTEGRAL +2234..2237 ; A # Sm [4] THEREFORE..PROPORTION +2238..223B ; N # Sm [4] DOT MINUS..HOMOTHETIC +223C..223D ; A # Sm [2] TILDE OPERATOR..REVERSED TILDE +223E..2247 ; N # Sm [10] INVERTED LAZY S..NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +2248 ; A # Sm ALMOST EQUAL TO +2249..224B ; N # Sm [3] NOT ALMOST EQUAL TO..TRIPLE TILDE +224C ; A # Sm ALL EQUAL TO +224D..2251 ; N # Sm [5] EQUIVALENT TO..GEOMETRICALLY EQUAL TO +2252 ; A # Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253..225F ; N # Sm [13] IMAGE OF OR APPROXIMATELY EQUAL TO..QUESTIONED EQUAL TO +2260..2261 ; A # Sm [2] NOT EQUAL TO..IDENTICAL TO +2262..2263 ; N # Sm [2] NOT IDENTICAL TO..STRICTLY EQUIVALENT TO +2264..2267 ; A # Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO +2268..2269 ; N # Sm [2] LESS-THAN BUT NOT EQUAL TO..GREATER-THAN BUT NOT EQUAL TO +226A..226B ; A # Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN +226C..226D ; N # Sm [2] BETWEEN..NOT EQUIVALENT TO +226E..226F ; A # Sm [2] NOT LESS-THAN..NOT GREATER-THAN +2270..2281 ; N # Sm [18] NEITHER LESS-THAN NOR EQUAL TO..DOES NOT SUCCEED +2282..2283 ; A # Sm [2] SUBSET OF..SUPERSET OF +2284..2285 ; N # Sm [2] NOT A SUBSET OF..NOT A SUPERSET OF +2286..2287 ; A # Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO +2288..2294 ; N # Sm [13] NEITHER A SUBSET OF NOR EQUAL TO..SQUARE CUP +2295 ; A # Sm CIRCLED PLUS +2296..2298 ; N # Sm [3] CIRCLED MINUS..CIRCLED DIVISION SLASH +2299 ; A # Sm CIRCLED DOT OPERATOR +229A..22A4 ; N # Sm [11] CIRCLED RING OPERATOR..DOWN TACK +22A5 ; A # Sm UP TACK +22A6..22BE ; N # Sm [25] ASSERTION..RIGHT ANGLE WITH ARC +22BF ; A # Sm RIGHT TRIANGLE +22C0..22FF ; N # Sm [64] N-ARY LOGICAL AND..Z NOTATION BAG MEMBERSHIP +2300..2307 ; N # So [8] DIAMETER SIGN..WAVY LINE +2308 ; N # Ps LEFT CEILING +2309 ; N # Pe RIGHT CEILING +230A ; N # Ps LEFT FLOOR +230B ; N # Pe RIGHT FLOOR +230C..2311 ; N # So [6] BOTTOM RIGHT CROP..SQUARE LOZENGE +2312 ; A # So ARC +2313..2319 ; N # So [7] SEGMENT..TURNED NOT SIGN +231A..231B ; W # So [2] WATCH..HOURGLASS +231C..231F ; N # So [4] TOP LEFT CORNER..BOTTOM RIGHT CORNER +2320..2321 ; N # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; N # So [7] FROWN..KEYBOARD +2329 ; W # Ps LEFT-POINTING ANGLE BRACKET +232A ; W # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; N # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; N # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; N # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; N # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; N # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; N # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..23E8 ; N # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL +23E9..23EC ; W # So [4] BLACK RIGHT-POINTING DOUBLE TRIANGLE..BLACK DOWN-POINTING DOUBLE TRIANGLE +23ED..23EF ; N # So [3] BLACK RIGHT-POINTING DOUBLE TRIANGLE WITH VERTICAL BAR..BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR +23F0 ; W # So ALARM CLOCK +23F1..23F2 ; N # So [2] STOPWATCH..TIMER CLOCK +23F3 ; W # So HOURGLASS WITH FLOWING SAND +23F4..23FF ; N # So [12] BLACK MEDIUM LEFT-POINTING TRIANGLE..OBSERVER EYE SYMBOL +2400..2426 ; N # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A ; N # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..249B ; A # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +249C..24E9 ; A # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +24EA ; N # No CIRCLED DIGIT ZERO +24EB..24FF ; A # No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO +2500..254B ; A # So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL +254C..254F ; N # So [4] BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL..BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL +2550..2573 ; A # So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS +2574..257F ; N # So [12] BOX DRAWINGS LIGHT LEFT..BOX DRAWINGS HEAVY UP AND LIGHT DOWN +2580..258F ; A # So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK +2590..2591 ; N # So [2] RIGHT HALF BLOCK..LIGHT SHADE +2592..2595 ; A # So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK +2596..259F ; N # So [10] QUADRANT LOWER LEFT..QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT +25A0..25A1 ; A # So [2] BLACK SQUARE..WHITE SQUARE +25A2 ; N # So WHITE SQUARE WITH ROUNDED CORNERS +25A3..25A9 ; A # So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL +25AA..25B1 ; N # So [8] BLACK SMALL SQUARE..WHITE PARALLELOGRAM +25B2..25B3 ; A # So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE +25B4..25B5 ; N # So [2] BLACK UP-POINTING SMALL TRIANGLE..WHITE UP-POINTING SMALL TRIANGLE +25B6 ; A # So BLACK RIGHT-POINTING TRIANGLE +25B7 ; A # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25BB ; N # So [4] BLACK RIGHT-POINTING SMALL TRIANGLE..WHITE RIGHT-POINTING POINTER +25BC..25BD ; A # So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE +25BE..25BF ; N # So [2] BLACK DOWN-POINTING SMALL TRIANGLE..WHITE DOWN-POINTING SMALL TRIANGLE +25C0 ; A # So BLACK LEFT-POINTING TRIANGLE +25C1 ; A # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25C5 ; N # So [4] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE LEFT-POINTING POINTER +25C6..25C8 ; A # So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND +25C9..25CA ; N # So [2] FISHEYE..LOZENGE +25CB ; A # So WHITE CIRCLE +25CC..25CD ; N # So [2] DOTTED CIRCLE..CIRCLE WITH VERTICAL FILL +25CE..25D1 ; A # So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK +25D2..25E1 ; N # So [16] CIRCLE WITH LOWER HALF BLACK..LOWER HALF CIRCLE +25E2..25E5 ; A # So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE +25E6..25EE ; N # So [9] WHITE BULLET..UP-POINTING TRIANGLE WITH RIGHT HALF BLACK +25EF ; A # So LARGE CIRCLE +25F0..25F7 ; N # So [8] WHITE SQUARE WITH UPPER LEFT QUADRANT..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FC ; N # Sm [5] UPPER LEFT TRIANGLE..BLACK MEDIUM SQUARE +25FD..25FE ; W # Sm [2] WHITE MEDIUM SMALL SQUARE..BLACK MEDIUM SMALL SQUARE +25FF ; N # Sm LOWER RIGHT TRIANGLE +2600..2604 ; N # So [5] BLACK SUN WITH RAYS..COMET +2605..2606 ; A # So [2] BLACK STAR..WHITE STAR +2607..2608 ; N # So [2] LIGHTNING..THUNDERSTORM +2609 ; A # So SUN +260A..260D ; N # So [4] ASCENDING NODE..OPPOSITION +260E..260F ; A # So [2] BLACK TELEPHONE..WHITE TELEPHONE +2610..2613 ; N # So [4] BALLOT BOX..SALTIRE +2614..2615 ; W # So [2] UMBRELLA WITH RAIN DROPS..HOT BEVERAGE +2616..261B ; N # So [6] WHITE SHOGI PIECE..BLACK RIGHT POINTING INDEX +261C ; A # So WHITE LEFT POINTING INDEX +261D ; N # So WHITE UP POINTING INDEX +261E ; A # So WHITE RIGHT POINTING INDEX +261F..263F ; N # So [33] WHITE DOWN POINTING INDEX..MERCURY +2640 ; A # So FEMALE SIGN +2641 ; N # So EARTH +2642 ; A # So MALE SIGN +2643..2647 ; N # So [5] JUPITER..PLUTO +2648..2653 ; W # So [12] ARIES..PISCES +2654..265F ; N # So [12] WHITE CHESS KING..BLACK CHESS PAWN +2660..2661 ; A # So [2] BLACK SPADE SUIT..WHITE HEART SUIT +2662 ; N # So WHITE DIAMOND SUIT +2663..2665 ; A # So [3] BLACK CLUB SUIT..BLACK HEART SUIT +2666 ; N # So BLACK DIAMOND SUIT +2667..266A ; A # So [4] WHITE CLUB SUIT..EIGHTH NOTE +266B ; N # So BEAMED EIGHTH NOTES +266C..266D ; A # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN +266E ; N # So MUSIC NATURAL SIGN +266F ; A # Sm MUSIC SHARP SIGN +2670..267E ; N # So [15] WEST SYRIAC CROSS..PERMANENT PAPER SIGN +267F ; W # So WHEELCHAIR SYMBOL +2680..2692 ; N # So [19] DIE FACE-1..HAMMER AND PICK +2693 ; W # So ANCHOR +2694..269D ; N # So [10] CROSSED SWORDS..OUTLINED WHITE STAR +269E..269F ; A # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT +26A0 ; N # So WARNING SIGN +26A1 ; W # So HIGH VOLTAGE SIGN +26A2..26A9 ; N # So [8] DOUBLED FEMALE SIGN..HORIZONTAL MALE WITH STROKE SIGN +26AA..26AB ; W # So [2] MEDIUM WHITE CIRCLE..MEDIUM BLACK CIRCLE +26AC..26BC ; N # So [17] MEDIUM SMALL WHITE CIRCLE..SESQUIQUADRATE +26BD..26BE ; W # So [2] SOCCER BALL..BASEBALL +26BF ; A # So SQUARED KEY +26C0..26C3 ; N # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING +26C4..26C5 ; W # So [2] SNOWMAN WITHOUT SNOW..SUN BEHIND CLOUD +26C6..26CD ; A # So [8] RAIN..DISABLED CAR +26CE ; W # So OPHIUCHUS +26CF..26D3 ; A # So [5] PICK..CHAINS +26D4 ; W # So NO ENTRY +26D5..26E1 ; A # So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2 +26E2 ; N # So ASTRONOMICAL SYMBOL FOR URANUS +26E3 ; A # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE +26E4..26E7 ; N # So [4] PENTAGRAM..INVERTED PENTAGRAM +26E8..26E9 ; A # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE +26EA ; W # So CHURCH +26EB..26F1 ; A # So [7] CASTLE..UMBRELLA ON GROUND +26F2..26F3 ; W # So [2] FOUNTAIN..FLAG IN HOLE +26F4 ; A # So FERRY +26F5 ; W # So SAILBOAT +26F6..26F9 ; A # So [4] SQUARE FOUR CORNERS..PERSON WITH BALL +26FA ; W # So TENT +26FB..26FC ; A # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL +26FD ; W # So FUEL PUMP +26FE..26FF ; A # So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE +2700..2704 ; N # So [5] BLACK SAFETY SCISSORS..WHITE SCISSORS +2705 ; W # So WHITE HEAVY CHECK MARK +2706..2709 ; N # So [4] TELEPHONE LOCATION SIGN..ENVELOPE +270A..270B ; W # So [2] RAISED FIST..RAISED HAND +270C..2727 ; N # So [28] VICTORY HAND..WHITE FOUR POINTED STAR +2728 ; W # So SPARKLES +2729..273C ; N # So [20] STRESS OUTLINED WHITE STAR..OPEN CENTRE TEARDROP-SPOKED ASTERISK +273D ; A # So HEAVY TEARDROP-SPOKED ASTERISK +273E..274B ; N # So [14] SIX PETALLED BLACK AND WHITE FLORETTE..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK +274C ; W # So CROSS MARK +274D ; N # So SHADOWED WHITE CIRCLE +274E ; W # So NEGATIVE SQUARED CROSS MARK +274F..2752 ; N # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE +2753..2755 ; W # So [3] BLACK QUESTION MARK ORNAMENT..WHITE EXCLAMATION MARK ORNAMENT +2756 ; N # So BLACK DIAMOND MINUS WHITE X +2757 ; W # So HEAVY EXCLAMATION MARK SYMBOL +2758..2767 ; N # So [16] LIGHT VERTICAL BAR..ROTATED FLORAL HEART BULLET +2768 ; N # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; N # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; N # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; N # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; N # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; N # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; N # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; N # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; N # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; N # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; N # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; N # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; N # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; N # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..277F ; A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN +2780..2793 ; N # No [20] DINGBAT CIRCLED SANS-SERIF DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794 ; N # So HEAVY WIDE-HEADED RIGHTWARDS ARROW +2795..2797 ; W # So [3] HEAVY PLUS SIGN..HEAVY DIVISION SIGN +2798..27AF ; N # So [24] HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW +27B0 ; W # So CURLY LOOP +27B1..27BE ; N # So [14] NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW +27BF ; W # So DOUBLE CURLY LOOP +27C0..27C4 ; N # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; N # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; N # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; N # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Na # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Na # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Na # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Na # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Na # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Na # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Na # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Na # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; N # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; N # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; N # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; N # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..297F ; N # Sm [128] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..DOWN FISH TAIL +2980..2982 ; N # Sm [3] TRIPLE VERTICAL BAR DELIMITER..Z NOTATION TYPE COLON +2983 ; N # Ps LEFT WHITE CURLY BRACKET +2984 ; N # Pe RIGHT WHITE CURLY BRACKET +2985 ; Na # Ps LEFT WHITE PARENTHESIS +2986 ; Na # Pe RIGHT WHITE PARENTHESIS +2987 ; N # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; N # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; N # Ps Z NOTATION LEFT BINDING BRACKET +298A ; N # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; N # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; N # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; N # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; N # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; N # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; N # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; N # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; N # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; N # Ps LEFT ARC LESS-THAN BRACKET +2994 ; N # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; N # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; N # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; N # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; N # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; N # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; N # Ps LEFT WIGGLY FENCE +29D9 ; N # Pe RIGHT WIGGLY FENCE +29DA ; N # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; N # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; N # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; N # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; N # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..29FF ; N # Sm [2] TINY..MINY +2A00..2AFF ; N # Sm [256] N-ARY CIRCLED DOT OPERATOR..N-ARY WHITE VERTICAL BAR +2B00..2B1A ; N # So [27] NORTH EAST WHITE ARROW..DOTTED SQUARE +2B1B..2B1C ; W # So [2] BLACK LARGE SQUARE..WHITE LARGE SQUARE +2B1D..2B2F ; N # So [19] BLACK VERY SMALL SQUARE..WHITE VERTICAL ELLIPSE +2B30..2B44 ; N # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; N # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; N # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B4F ; N # So [3] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..SHORT BACKSLANTED SOUTH ARROW +2B50 ; W # So WHITE MEDIUM STAR +2B51..2B54 ; N # So [4] BLACK SMALL STAR..WHITE RIGHT-POINTING PENTAGON +2B55 ; W # So HEAVY LARGE CIRCLE +2B56..2B59 ; A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE +2B5A..2B73 ; N # So [26] SLANTED NORTH ARROW WITH HOOKED HEAD..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; N # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; N # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2C00..2C5F ; N # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C60..2C7B ; N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2C7F ; N # Lu [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL +2C80..2CE4 ; N # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI +2CE5..2CEA ; N # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CEB..2CEE ; N # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; N # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; N # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2CF9..2CFC ; N # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; N # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; N # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2D00..2D25 ; N # Ll [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; N # Ll GEORGIAN SMALL LETTER YN +2D2D ; N # Ll GEORGIAN SMALL LETTER AEN +2D30..2D67 ; N # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70 ; N # Po TIFINAGH SEPARATOR MARK +2D7F ; N # Mn TIFINAGH CONSONANT JOINER +2D80..2D96 ; N # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; N # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; N # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; N # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; N # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; N # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; N # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; N # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; N # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF ; N # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +2E00..2E01 ; N # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; N # Pi LEFT SUBSTITUTION BRACKET +2E03 ; N # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; N # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; N # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; N # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; N # Pi LEFT TRANSPOSITION BRACKET +2E0A ; N # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; N # Po RAISED SQUARE +2E0C ; N # Pi LEFT RAISED OMISSION BRACKET +2E0D ; N # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; N # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; N # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; N # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; N # Pd HYPHEN WITH DIAERESIS +2E1B ; N # Po TILDE WITH RING ABOVE +2E1C ; N # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; N # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; N # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; N # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; N # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; N # Ps TOP LEFT HALF BRACKET +2E23 ; N # Pe TOP RIGHT HALF BRACKET +2E24 ; N # Ps BOTTOM LEFT HALF BRACKET +2E25 ; N # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; N # Ps LEFT SIDEWAYS U BRACKET +2E27 ; N # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; N # Ps LEFT DOUBLE PARENTHESIS +2E29 ; N # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; N # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; N # Lm VERTICAL TILDE +2E30..2E39 ; N # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; N # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; N # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; N # Pd DOUBLE HYPHEN +2E41 ; N # Po REVERSED COMMA +2E42 ; N # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; N # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; N # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; N # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; N # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; N # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; N # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; N # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; N # Ps TOP HALF LEFT PARENTHESIS +2E5A ; N # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; N # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; N # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; N # Pd OBLIQUE HYPHEN +2E80..2E99 ; W # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; W # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; W # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFF ; W # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION +3000 ; F # Zs IDEOGRAPHIC SPACE +3001..3003 ; W # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; W # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3005 ; W # Lm IDEOGRAPHIC ITERATION MARK +3006 ; W # Lo IDEOGRAPHIC CLOSING MARK +3007 ; W # Nl IDEOGRAPHIC NUMBER ZERO +3008 ; W # Ps LEFT ANGLE BRACKET +3009 ; W # Pe RIGHT ANGLE BRACKET +300A ; W # Ps LEFT DOUBLE ANGLE BRACKET +300B ; W # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; W # Ps LEFT CORNER BRACKET +300D ; W # Pe RIGHT CORNER BRACKET +300E ; W # Ps LEFT WHITE CORNER BRACKET +300F ; W # Pe RIGHT WHITE CORNER BRACKET +3010 ; W # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; W # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; W # So [2] POSTAL MARK..GETA MARK +3014 ; W # Ps LEFT TORTOISE SHELL BRACKET +3015 ; W # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; W # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; W # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; W # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; W # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; W # Ps LEFT WHITE SQUARE BRACKET +301B ; W # Pe RIGHT WHITE SQUARE BRACKET +301C ; W # Pd WAVE DASH +301D ; W # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; W # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; W # So POSTAL MARK FACE +3021..3029 ; W # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302A..302D ; W # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; W # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3030 ; W # Pd WAVY DASH +3031..3035 ; W # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3036..3037 ; W # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +3038..303A ; W # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; W # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; W # Lo MASU MARK +303D ; W # Po PART ALTERNATION MARK +303E ; W # So IDEOGRAPHIC VARIATION INDICATOR +303F ; N # So IDEOGRAPHIC HALF FILL SPACE +3041..3096 ; W # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +3099..309A ; W # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; W # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; W # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; W # Lo HIRAGANA DIGRAPH YORI +30A0 ; W # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30A1..30FA ; W # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; W # Po KATAKANA MIDDLE DOT +30FC..30FE ; W # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; W # Lo KATAKANA DIGRAPH KOTO +3105..312F ; W # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; W # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3190..3191 ; W # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; W # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; W # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31A0..31BF ; W # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31C0..31E3 ; W # So [36] CJK STROKE T..CJK STROKE Q +31EF ; W # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION +31F0..31FF ; W # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3200..321E ; W # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +3220..3229 ; W # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; W # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; W # So PARTNERSHIP SIGN +3251..325F ; W # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +3260..327F ; W # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL +3280..3289 ; W # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; W # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32B1..32BF ; W # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32C0..32FF ; W # So [64] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE ERA NAME REIWA +3300..33FF ; W # So [256] SQUARE APAATO..SQUARE GAL +3400..4DBF ; W # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4DC0..4DFF ; N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +4E00..9FFF ; W # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF +A000..A014 ; W # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +A015 ; W # Lm YI SYLLABLE WU +A016..A48C ; W # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A490..A4C6 ; W # So [55] YI RADICAL QOT..YI RADICAL KE +A4D0..A4F7 ; N # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; N # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; N # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A500..A60B ; N # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; N # Lm VAI SYLLABLE LENGTHENER +A60D..A60F ; N # Po [3] VAI COMMA..VAI QUESTION MARK +A610..A61F ; N # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; N # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; N # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; N # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; N # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; N # Mn COMBINING CYRILLIC VZMET +A670..A672 ; N # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A673 ; N # Po SLAVONIC ASTERISK +A674..A67D ; N # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67E ; N # Po CYRILLIC KAVYKA +A67F ; N # Lm CYRILLIC PAYEROK +A680..A69B ; N # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; N # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; N # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5 ; N # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; N # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; N # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A6F2..A6F7 ; N # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A700..A716 ; N # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; N # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; N # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A722..A76F ; N # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; N # Lm MODIFIER LETTER US +A771..A787 ; N # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; N # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; N # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; N # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; N # Ll LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; N # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; N # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; N # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; N # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; N # Ll LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A7FF ; N # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M +A800..A801 ; N # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I +A802 ; N # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; N # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; N # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; N # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; N # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; N # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; N # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; N # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; N # Mc SYLOTI NAGRI VOWEL SIGN OO +A828..A82B ; N # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A82C ; N # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A830..A835 ; N # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; N # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; N # Sc NORTH INDIC RUPEE MARK +A839 ; N # So NORTH INDIC QUANTITY MARK +A840..A873 ; N # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A874..A877 ; N # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +A880..A881 ; N # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; N # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; N # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5 ; N # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8CE..A8CF ; N # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; N # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8E0..A8F1 ; N # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; N # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; N # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; N # Lo DEVANAGARI HEADSTROKE +A8FC ; N # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE ; N # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; N # Mn DEVANAGARI VOWEL SIGN AY +A900..A909 ; N # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; N # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; N # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A92E..A92F ; N # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A946 ; N # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; N # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; N # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; N # Po REJANG SECTION MARK +A960..A97C ; W # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982 ; N # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; N # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; N # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; N # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; N # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; N # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; N # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; N # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0 ; N # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9C1..A9CD ; N # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; N # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; N # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; N # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +A9E0..A9E4 ; N # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; N # Mn MYANMAR SIGN SHAN SAW +A9E6 ; N # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; N # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; N # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; N # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; N # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; N # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; N # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; N # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; N # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; N # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; N # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; N # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; N # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; N # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; N # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; N # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; N # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AA60..AA6F ; N # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; N # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; N # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; N # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; N # Lo MYANMAR LETTER AITON RA +AA7B ; N # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; N # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; N # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AA7F ; N # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA +AA80..AAAF ; N # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O +AAB0 ; N # Mn TAI VIET MAI KANG +AAB1 ; N # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; N # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; N # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; N # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; N # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; N # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; N # Lo TAI VIET TONE MAI NUENG +AAC1 ; N # Mn TAI VIET TONE MAI THO +AAC2 ; N # Lo TAI VIET TONE MAI SONG +AADB..AADC ; N # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; N # Lm TAI VIET SYMBOL SAM +AADE..AADF ; N # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; N # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; N # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; N # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; N # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; N # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; N # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; N # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; N # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; N # Mn MEETEI MAYEK VIRAMA +AB01..AB06 ; N # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; N # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; N # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; N # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; N # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; N # Ll [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; N # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; N # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; N # Ll [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; N # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B ; N # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB70..ABBF ; N # Ll [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; N # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; N # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; N # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; N # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; N # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; N # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; N # Po MEETEI MAYEK CHEIKHEI +ABEC ; N # Mc MEETEI MAYEK LUM IYEK +ABED ; N # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; N # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; W # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; N # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; N # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +D800..DB7F ; N # Cs [896] .. +DB80..DBFF ; N # Cs [128] .. +DC00..DFFF ; N # Cs [1024] .. +E000..F8FF ; A # Co [6400] .. +F900..FA6D ; W # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA6E..FA6F ; W # Cn [2] .. +FA70..FAD9 ; W # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FADA..FAFF ; W # Cn [38] .. +FB00..FB06 ; N # Ll [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; N # Ll [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; N # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; N # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; N # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB29 ; N # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FB2A..FB36 ; N # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; N # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; N # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; N # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; N # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; N # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED +FB50..FBB1 ; N # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2 ; N # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBD3..FD3D ; N # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD3E ; N # Pe ORNATE LEFT PARENTHESIS +FD3F ; N # Ps ORNATE RIGHT PARENTHESIS +FD40..FD4F ; N # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FD50..FD8F ; N # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; N # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDCF ; N # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDF0..FDFB ; N # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; N # Sc RIAL SIGN +FDFD..FDFF ; N # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE00..FE0F ; A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE10..FE16 ; W # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; W # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE20..FE2F ; N # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE30 ; W # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; W # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; W # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; W # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; W # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; W # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; W # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; W # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE50..FE52 ; W # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; W # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE58 ; W # Pd SMALL EM DASH +FE59 ; W # Ps SMALL LEFT PARENTHESIS +FE5A ; W # Pe SMALL RIGHT PARENTHESIS +FE5B ; W # Ps SMALL LEFT CURLY BRACKET +FE5C ; W # Pe SMALL RIGHT CURLY BRACKET +FE5D ; W # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; W # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE5F..FE61 ; W # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE62 ; W # Sm SMALL PLUS SIGN +FE63 ; W # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; W # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; W # Po SMALL REVERSE SOLIDUS +FE69 ; W # Sc SMALL DOLLAR SIGN +FE6A..FE6B ; W # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FE70..FE74 ; N # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; N # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FEFF ; N # Cf ZERO WIDTH NO-BREAK SPACE +FF01..FF03 ; F # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF04 ; F # Sc FULLWIDTH DOLLAR SIGN +FF05..FF07 ; F # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF08 ; F # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; F # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; F # Po FULLWIDTH ASTERISK +FF0B ; F # Sm FULLWIDTH PLUS SIGN +FF0C ; F # Po FULLWIDTH COMMA +FF0D ; F # Pd FULLWIDTH HYPHEN-MINUS +FF0E..FF0F ; F # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF10..FF19 ; F # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF1A..FF1B ; F # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1C..FF1E ; F # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; F # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF21..FF3A ; F # Lu [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3B ; F # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; F # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; F # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; F # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; F # Pc FULLWIDTH LOW LINE +FF40 ; F # Sk FULLWIDTH GRAVE ACCENT +FF41..FF5A ; F # Ll [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF5B ; F # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; F # Sm FULLWIDTH VERTICAL LINE +FF5D ; F # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; F # Sm FULLWIDTH TILDE +FF5F ; F # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; F # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; H # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; H # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; H # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; H # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FF66..FF6F ; H # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; H # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; H # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; H # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; H # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; H # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; H # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; H # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; H # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +FFE0..FFE1 ; F # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE2 ; F # Sm FULLWIDTH NOT SIGN +FFE3 ; F # Sk FULLWIDTH MACRON +FFE4 ; F # So FULLWIDTH BROKEN BAR +FFE5..FFE6 ; F # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +FFE8 ; H # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; H # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB ; N # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC ; N # So OBJECT REPLACEMENT CHARACTER +FFFD ; A # So REPLACEMENT CHARACTER +10000..1000B ; N # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; N # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; N # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; N # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; N # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; N # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; N # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10100..10102 ; N # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +10107..10133 ; N # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; N # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10140..10174 ; N # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; N # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; N # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B ; N # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C..1018E ; N # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN +10190..1019C ; N # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0 ; N # So GREEK SYMBOL TAU RHO +101D0..101FC ; N # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +101FD ; N # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +10280..1029C ; N # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; N # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E0 ; N # Mn COPTIC EPACT THOUSANDS MARK +102E1..102FB ; N # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +10300..1031F ; N # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323 ; N # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..1032F ; N # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE +10330..10340 ; N # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +10341 ; N # Nl GOTHIC LETTER NINETY +10342..10349 ; N # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; N # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; N # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; N # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D ; N # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; N # Po UGARITIC WORD DIVIDER +103A0..103C3 ; N # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; N # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; N # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; N # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; N # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1047F ; N # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW +10480..1049D ; N # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO +104A0..104A9 ; N # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; N # Lu [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; N # Ll [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; N # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; N # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F ; N # Po CAUCASIAN ALBANIAN CITATION MARK +10570..1057A ; N # Lu [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; N # Lu [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; N # Lu [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; N # Lu [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; N # Ll [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; N # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; N # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; N # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; N # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; N # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; N # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; N # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; N # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; N # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; N # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; N # Lo CYPRIOT SYLLABLE ZA +1083F ; N # Lo CYPRIOT SYLLABLE ZO +10840..10855 ; N # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW +10857 ; N # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; N # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10860..10876 ; N # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878 ; N # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F ; N # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +10880..1089E ; N # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108A7..108AF ; N # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108E0..108F2 ; N # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; N # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108FB..108FF ; N # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10900..10915 ; N # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; N # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091F ; N # Po PHOENICIAN WORD SEPARATOR +10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F ; N # Po LYDIAN TRIANGULAR MARK +10980..1099F ; N # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +109A0..109B7 ; N # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA +109BC..109BD ; N # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF ; N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF ; N # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D2..109FF ; N # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A00 ; N # Lo KHAROSHTHI LETTER A +10A01..10A03 ; N # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; N # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; N # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; N # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; N # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; N # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A38..10A3A ; N # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; N # Mn KHAROSHTHI VIRAMA +10A40..10A48 ; N # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A50..10A58 ; N # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A60..10A7C ; N # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; N # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; N # Po OLD SOUTH ARABIAN NUMERIC INDICATOR +10A80..10A9C ; N # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F ; N # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AC0..10AC7 ; N # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8 ; N # So MANICHAEAN SIGN UD +10AC9..10AE4 ; N # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE5..10AE6 ; N # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10AEB..10AEF ; N # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6 ; N # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10B00..10B35 ; N # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B39..10B3F ; N # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B40..10B55 ; N # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F ; N # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B60..10B72 ; N # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F ; N # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10B80..10B91 ; N # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B99..10B9C ; N # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10BA9..10BAF ; N # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10C00..10C48 ; N # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; N # Lu [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; N # Ll [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CFA..10CFF ; N # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D00..10D23 ; N # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; N # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D30..10D39 ; N # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10E60..10E7E ; N # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS +10E80..10EA9 ; N # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EAD ; N # Pd YEZIDI HYPHENATION MARK +10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EFD..10EFF ; N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; N # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F46..10F50 ; N # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F51..10F54 ; N # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59 ; N # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F70..10F81 ; N # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F82..10F85 ; N # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +10F86..10F89 ; N # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +10FB0..10FC4 ; N # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB ; N # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +10FE0..10FF6 ; N # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; N # Mc BRAHMI SIGN CANDRABINDU +11001 ; N # Mn BRAHMI SIGN ANUSVARA +11002 ; N # Mc BRAHMI SIGN VISARGA +11003..11037 ; N # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11046 ; N # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11047..1104D ; N # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11052..11065 ; N # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11066..1106F ; N # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11070 ; N # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11071..11072 ; N # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; N # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; N # Lo BRAHMI LETTER OLD TAMIL LLA +1107F ; N # Mn BRAHMI NUMBER JOINER +11080..11081 ; N # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11082 ; N # Mc KAITHI SIGN VISARGA +11083..110AF ; N # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; N # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; N # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; N # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; N # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110BB..110BC ; N # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD ; N # Cf KAITHI NUMBER SIGN +110BE..110C1 ; N # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110C2 ; N # Mn KAITHI VOWEL SIGN VOCALIC R +110CD ; N # Cf KAITHI NUMBER SIGN ABOVE +110D0..110E8 ; N # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; N # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; N # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; N # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; N # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; N # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; N # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; N # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; N # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144 ; N # Lo CHAKMA LETTER LHAA +11145..11146 ; N # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; N # Lo CHAKMA LETTER VAA +11150..11172 ; N # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11173 ; N # Mn MAHAJANI SIGN NUKTA +11174..11175 ; N # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176 ; N # Lo MAHAJANI LIGATURE SHRI +11180..11181 ; N # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; N # Mc SHARADA SIGN VISARGA +11183..111B2 ; N # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; N # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; N # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; N # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; N # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; N # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111C9..111CC ; N # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CD ; N # Po SHARADA SUTRA MARK +111CE ; N # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; N # Mn SHARADA SIGN INVERTED CANDRABINDU +111D0..111D9 ; N # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; N # Lo SHARADA EKAM +111DB ; N # Po SHARADA SIGN SIDDHAM +111DC ; N # Lo SHARADA HEADSTROKE +111DD..111DF ; N # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +111E1..111F4 ; N # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +11200..11211 ; N # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; N # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; N # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; N # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; N # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; N # Mn KHOJKI SIGN ANUSVARA +11235 ; N # Mc KHOJKI SIGN VIRAMA +11236..11237 ; N # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +11238..1123D ; N # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +1123E ; N # Mn KHOJKI SIGN SUKUN +1123F..11240 ; N # Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I +11241 ; N # Mn KHOJKI VOWEL SIGN VOCALIC R +11280..11286 ; N # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; N # Lo MULTANI LETTER GHA +1128A..1128D ; N # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; N # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; N # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9 ; N # Po MULTANI SECTION MARK +112B0..112DE ; N # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; N # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; N # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA ; N # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +112F0..112F9 ; N # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11300..11301 ; N # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; N # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; N # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; N # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; N # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; N # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; N # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; N # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133B..1133C ; N # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133D ; N # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; N # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; N # Mn GRANTHA VOWEL SIGN II +11341..11344 ; N # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; N # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; N # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; N # Lo GRANTHA OM +11357 ; N # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; N # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; N # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C ; N # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; N # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11400..11434 ; N # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; N # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; N # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; N # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; N # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; N # Mc NEWA SIGN VISARGA +11446 ; N # Mn NEWA SIGN NUKTA +11447..1144A ; N # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F ; N # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459 ; N # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B ; N # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; N # Po NEWA INSERTION SIGN +1145E ; N # Mn NEWA SANDHI MARK +1145F..11461 ; N # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; N # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; N # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; N # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; N # Mc TIRHUTA VOWEL SIGN E +114BA ; N # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; N # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; N # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; N # Mc TIRHUTA SIGN VISARGA +114C2..114C3 ; N # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +114C4..114C5 ; N # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6 ; N # Po TIRHUTA ABBREVIATION SIGN +114C7 ; N # Lo TIRHUTA OM +114D0..114D9 ; N # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; N # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; N # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; N # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; N # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; N # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; N # Mc SIDDHAM SIGN VISARGA +115BF..115C0 ; N # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115C1..115D7 ; N # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB ; N # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; N # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F ; N # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; N # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; N # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; N # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; N # Mn MODI SIGN ANUSVARA +1163E ; N # Mc MODI SIGN VISARGA +1163F..11640 ; N # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +11641..11643 ; N # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644 ; N # Lo MODI SIGN HUVA +11650..11659 ; N # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11660..1166C ; N # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +11680..116AA ; N # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; N # Mn TAKRI SIGN ANUSVARA +116AC ; N # Mc TAKRI SIGN VISARGA +116AD ; N # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; N # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; N # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; N # Mc TAKRI SIGN VIRAMA +116B7 ; N # Mn TAKRI SIGN NUKTA +116B8 ; N # Lo TAKRI LETTER ARCHAIC KHA +116B9 ; N # Po TAKRI ABBREVIATION SIGN +116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11700..1171A ; N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; N # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; N # Mc AHOM VOWEL SIGN E +11727..1172B ; N # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11730..11739 ; N # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B ; N # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E ; N # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F ; N # So AHOM SYMBOL VI +11740..11746 ; N # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; N # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; N # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; N # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; N # Mc DOGRA SIGN VISARGA +11839..1183A ; N # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1183B ; N # Po DOGRA ABBREVIATION SIGN +118A0..118DF ; N # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; N # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2 ; N # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF ; N # Lo WARANG CITI OM +11900..11906 ; N # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E +11909 ; N # Lo DIVES AKURU LETTER O +1190C..11913 ; N # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; N # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; N # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; N # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; N # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; N # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; N # Mc DIVES AKURU SIGN HALANTA +1193E ; N # Mn DIVES AKURU VIRAMA +1193F ; N # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; N # Mc DIVES AKURU MEDIAL YA +11941 ; N # Lo DIVES AKURU INITIAL RA +11942 ; N # Mc DIVES AKURU MEDIAL RA +11943 ; N # Mn DIVES AKURU SIGN NUKTA +11944..11946 ; N # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959 ; N # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; N # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; N # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; N # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; N # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; N # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; N # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0 ; N # Mn NANDINAGARI SIGN VIRAMA +119E1 ; N # Lo NANDINAGARI SIGN AVAGRAHA +119E2 ; N # Po NANDINAGARI SIGN SIDDHAM +119E3 ; N # Lo NANDINAGARI HEADSTROKE +119E4 ; N # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; N # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; N # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; N # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38 ; N # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; N # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; N # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; N # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A3F..11A46 ; N # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A47 ; N # Mn ZANABAZAR SQUARE SUBJOINER +11A50 ; N # Lo SOYOMBO LETTER A +11A51..11A56 ; N # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; N # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; N # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; N # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; N # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; N # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; N # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9A..11A9C ; N # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D ; N # Lo SOYOMBO MARK PLUTA +11A9E..11AA2 ; N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11AB0..11ABF ; N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA +11AC0..11AF8 ; N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL +11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11C00..11C08 ; N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; N # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; N # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; N # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; N # Mc BHAIKSUKI SIGN VISARGA +11C3F ; N # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; N # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45 ; N # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59 ; N # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C ; N # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70..11C71 ; N # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F ; N # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; N # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; N # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; N # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; N # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; N # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; N # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; N # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06 ; N # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; N # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; N # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; N # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; N # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; N # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; N # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46 ; N # Lo MASARAM GONDI REPHA +11D47 ; N # Mn MASARAM GONDI RA-KARA +11D50..11D59 ; N # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; N # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; N # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; N # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; N # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; N # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; N # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; N # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; N # Mc GUNJALA GONDI SIGN VISARGA +11D97 ; N # Mn GUNJALA GONDI VIRAMA +11D98 ; N # Lo GUNJALA GONDI OM +11DA0..11DA9 ; N # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; N # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; N # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; N # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8 ; N # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11F00..11F01 ; N # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA +11F02 ; N # Lo KAWI SIGN REPHA +11F03 ; N # Mc KAWI SIGN VISARGA +11F04..11F10 ; N # Lo [13] KAWI LETTER A..KAWI LETTER O +11F12..11F33 ; N # Lo [34] KAWI LETTER KA..KAWI LETTER JNYA +11F34..11F35 ; N # Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA +11F36..11F3A ; N # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R +11F3E..11F3F ; N # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI +11F40 ; N # Mn KAWI VOWEL SIGN EU +11F41 ; N # Mc KAWI SIGN KILLER +11F42 ; N # Mn KAWI CONJOINER +11F43..11F4F ; N # Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL +11F50..11F59 ; N # Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE +11FB0 ; N # Lo LISU LETTER YHA +11FC0..11FD4 ; N # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FD5..11FDC ; N # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FDD..11FE0 ; N # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +11FE1..11FF1 ; N # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +11FFF ; N # Po TAMIL PUNCTUATION END OF TEXT +12000..12399 ; N # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; N # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474 ; N # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543 ; N # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; N # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2 ; N # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +13000..1342F ; N # Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D +13430..1343F ; N # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE +13440 ; N # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY +13441..13446 ; N # Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN +13447..13455 ; N # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED +14400..14646 ; N # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; N # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; N # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F ; N # Po [2] MRO DANDA..MRO DOUBLE DANDA +16A70..16ABE ; N # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; N # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; N # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF0..16AF4 ; N # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16AF5 ; N # Po BASSA VAH FULL STOP +16B00..16B2F ; N # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B30..16B36 ; N # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B37..16B3B ; N # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F ; N # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43 ; N # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44 ; N # Po PAHAWH HMONG SIGN XAUS +16B45 ; N # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59 ; N # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61 ; N # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77 ; N # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; N # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; N # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96 ; N # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A ; N # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16F00..16F4A ; N # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; N # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; N # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; N # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; W # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE2 ; W # Po OLD CHINESE HOOK MARK +16FE3 ; W # Lm OLD CHINESE ITERATION MARK +16FE4 ; W # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; W # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; W # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18AFF ; W # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 +18B00..18CD5 ; W # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; W # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; W # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; W # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; W # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B0FF ; W # Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 +1B100..1B122 ; W # Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU +1B132 ; W # Lo HIRAGANA LETTER SMALL KO +1B150..1B152 ; W # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B155 ; W # Lo KATAKANA LETTER SMALL KO +1B164..1B167 ; W # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; W # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; N # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; N # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; N # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; N # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C ; N # So DUPLOYAN SIGN O WITH CROSS +1BC9D..1BC9E ; N # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BC9F ; N # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1BCA0..1BCA3 ; N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; N # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; N # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; N # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166 ; N # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; N # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16A..1D16C ; N # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172 ; N # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D173..1D17A ; N # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D17B..1D182 ; N # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D183..1D184 ; N # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D185..1D18B ; N # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D18C..1D1A9 ; N # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AA..1D1AD ; N # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D1AE..1D1EA ; N # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON +1D200..1D241 ; N # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D242..1D244 ; N # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D245 ; N # So GREEK MUSICAL LEIMMA +1D2C0..1D2D3 ; N # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN +1D2E0..1D2F3 ; N # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D300..1D356 ; N # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D360..1D378 ; N # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454 ; N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; N # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; N # Lu [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; N # Lu MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; N # Lu [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; N # Lu [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; N # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; N # Ll MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; N # Ll [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; N # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; N # Lu [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; N # Lu [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; N # Lu [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; N # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; N # Lu [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; N # Lu [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; N # Lu MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; N # Lu [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; N # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; N # Lu [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; N # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; N # Ll [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; N # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; N # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; N # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; N # Ll [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; N # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; N # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; N # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; N # Ll [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; N # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; N # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; N # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; N # Ll [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; N # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; N # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; N # Ll [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1D800..1D9FF ; N # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA00..1DA36 ; N # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA37..1DA3A ; N # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA3B..1DA6C ; N # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA6D..1DA74 ; N # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA75 ; N # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA76..1DA83 ; N # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA84 ; N # Mn SIGNWRITING LOCATION HEAD NECK +1DA85..1DA86 ; N # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B ; N # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DA9B..1DA9F ; N # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; N # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DF00..1DF09 ; N # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1DF25..1DF2A ; N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; N # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; N # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E030..1E06D ; N # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE +1E08F ; N # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +1E100..1E12C ; N # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E130..1E136 ; N # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; N # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; N # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; N # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; N # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; N # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2AE ; N # Mn TOTO SIGN RISING TONE +1E2C0..1E2EB ; N # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2EC..1E2EF ; N # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E2F0..1E2F9 ; N # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E2FF ; N # Sc WANCHO NGUN SIGN +1E4D0..1E4EA ; N # Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL +1E4EB ; N # Lm NAG MUNDARI SIGN OJOD +1E4EC..1E4EF ; N # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH +1E4F0..1E4F9 ; N # Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE +1E7E0..1E7E6 ; N # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; N # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; N # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; N # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; N # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C7..1E8CF ; N # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E8D0..1E8D6 ; N # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E900..1E943 ; N # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A ; N # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; N # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; N # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95E..1E95F ; N # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK +1EC71..1ECAB ; N # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC ; N # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF ; N # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0 ; N # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4 ; N # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ED01..1ED2D ; N # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E ; N # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D ; N # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1EE00..1EE03 ; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; N # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F000..1F003 ; N # So [4] MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND +1F004 ; W # So MAHJONG TILE RED DRAGON +1F005..1F02B ; N # So [39] MAHJONG TILE GREEN DRAGON..MAHJONG TILE BACK +1F030..1F093 ; N # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; N # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; N # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CE ; N # So [14] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD KING OF DIAMONDS +1F0CF ; W # So PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; N # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F100..1F10A ; A # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1F10B..1F10C ; N # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F10F ; N # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH +1F110..1F12D ; A # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD +1F12E..1F12F ; N # So [2] CIRCLED WZ..COPYLEFT SYMBOL +1F130..1F169 ; A # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F16A..1F16F ; N # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE +1F170..1F18D ; A # So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA +1F18E ; W # So NEGATIVE SQUARED AB +1F18F..1F190 ; A # So [2] NEGATIVE SQUARED WC..SQUARE DJ +1F191..1F19A ; W # So [10] SQUARED CL..SQUARED VS +1F19B..1F1AC ; A # So [18] SQUARED THREE D..SQUARED VOD +1F1AD ; N # So MASK WORK SYMBOL +1F1E6..1F1FF ; N # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z +1F200..1F202 ; W # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA +1F210..1F23B ; W # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; W # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; W # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +1F260..1F265 ; W # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F320 ; W # So [33] CYCLONE..SHOOTING STAR +1F321..1F32C ; N # So [12] THERMOMETER..WIND BLOWING FACE +1F32D..1F335 ; W # So [9] HOT DOG..CACTUS +1F336 ; N # So HOT PEPPER +1F337..1F37C ; W # So [70] TULIP..BABY BOTTLE +1F37D ; N # So FORK AND KNIFE WITH PLATE +1F37E..1F393 ; W # So [22] BOTTLE WITH POPPING CORK..GRADUATION CAP +1F394..1F39F ; N # So [12] HEART WITH TIP ON THE LEFT..ADMISSION TICKETS +1F3A0..1F3CA ; W # So [43] CAROUSEL HORSE..SWIMMER +1F3CB..1F3CE ; N # So [4] WEIGHT LIFTER..RACING CAR +1F3CF..1F3D3 ; W # So [5] CRICKET BAT AND BALL..TABLE TENNIS PADDLE AND BALL +1F3D4..1F3DF ; N # So [12] SNOW CAPPED MOUNTAIN..STADIUM +1F3E0..1F3F0 ; W # So [17] HOUSE BUILDING..EUROPEAN CASTLE +1F3F1..1F3F3 ; N # So [3] WHITE PENNANT..WAVING WHITE FLAG +1F3F4 ; W # So WAVING BLACK FLAG +1F3F5..1F3F7 ; N # So [3] ROSETTE..LABEL +1F3F8..1F3FA ; W # So [3] BADMINTON RACQUET AND SHUTTLECOCK..AMPHORA +1F3FB..1F3FF ; W # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F43E ; W # So [63] RAT..PAW PRINTS +1F43F ; N # So CHIPMUNK +1F440 ; W # So EYES +1F441 ; N # So EYE +1F442..1F4FC ; W # So [187] EAR..VIDEOCASSETTE +1F4FD..1F4FE ; N # So [2] FILM PROJECTOR..PORTABLE STEREO +1F4FF..1F53D ; W # So [63] PRAYER BEADS..DOWN-POINTING SMALL RED TRIANGLE +1F53E..1F54A ; N # So [13] LOWER RIGHT SHADOWED WHITE CIRCLE..DOVE OF PEACE +1F54B..1F54E ; W # So [4] KAABA..MENORAH WITH NINE BRANCHES +1F54F ; N # So BOWL OF HYGIEIA +1F550..1F567 ; W # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY +1F568..1F579 ; N # So [18] RIGHT SPEAKER..JOYSTICK +1F57A ; W # So MAN DANCING +1F57B..1F594 ; N # So [26] LEFT HAND TELEPHONE RECEIVER..REVERSED VICTORY HAND +1F595..1F596 ; W # So [2] REVERSED HAND WITH MIDDLE FINGER EXTENDED..RAISED HAND WITH PART BETWEEN MIDDLE AND RING FINGERS +1F597..1F5A3 ; N # So [13] WHITE DOWN POINTING LEFT HAND INDEX..BLACK DOWN POINTING BACKHAND INDEX +1F5A4 ; W # So BLACK HEART +1F5A5..1F5FA ; N # So [86] DESKTOP COMPUTER..WORLD MAP +1F5FB..1F5FF ; W # So [5] MOUNT FUJI..MOYAI +1F600..1F64F ; W # So [80] GRINNING FACE..PERSON WITH FOLDED HANDS +1F650..1F67F ; N # So [48] NORTH WEST POINTING LEAF..REVERSE CHECKER BOARD +1F680..1F6C5 ; W # So [70] ROCKET..LEFT LUGGAGE +1F6C6..1F6CB ; N # So [6] TRIANGLE WITH ROUNDED CORNERS..COUCH AND LAMP +1F6CC ; W # So SLEEPING ACCOMMODATION +1F6CD..1F6CF ; N # So [3] SHOPPING BAGS..BED +1F6D0..1F6D2 ; W # So [3] PLACE OF WORSHIP..SHOPPING TROLLEY +1F6D3..1F6D4 ; N # So [2] STUPA..PAGODA +1F6D5..1F6D7 ; W # So [3] HINDU TEMPLE..ELEVATOR +1F6DC..1F6DF ; W # So [4] WIRELESS..RING BUOY +1F6E0..1F6EA ; N # So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE +1F6EB..1F6EC ; W # So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING +1F6F0..1F6F3 ; N # So [4] SATELLITE..PASSENGER SHIP +1F6F4..1F6FC ; W # So [9] SCOOTER..ROLLER SKATE +1F700..1F776 ; N # So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE +1F77B..1F77F ; N # So [5] HAUMEA..ORCUS +1F780..1F7D9 ; N # So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR +1F7E0..1F7EB ; W # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; W # So HEAVY EQUALS SIGN +1F800..1F80B ; N # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; N # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8B1 ; N # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT +1F90C..1F93A ; W # So [47] PINCHED FINGERS..FENCER +1F93B ; N # So MODERN PENTATHLON +1F93C..1F945 ; W # So [10] WRESTLERS..GOAL NET +1F946 ; N # So RIFLE +1F947..1F9FF ; W # So [185] FIRST PLACE MEDAL..NAZAR AMULET +1FA00..1FA53 ; N # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; N # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA7C ; W # So [13] BALLET SHOES..CRUTCH +1FA80..1FA88 ; W # So [9] YO-YO..FLUTE +1FA90..1FABD ; W # So [46] RINGED PLANET..WING +1FABF..1FAC5 ; W # So [7] GOOSE..PERSON WITH CROWN +1FACE..1FADB ; W # So [14] MOOSE..PEA POD +1FAE0..1FAE8 ; W # So [9] MELTING FACE..SHAKING FACE +1FAF0..1FAF8 ; W # So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND +1FB00..1FB92 ; N # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBCA ; N # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FBF0..1FBF9 ; N # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF ; W # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A6E0..2A6FF ; W # Cn [32] .. +2A700..2B739 ; W # Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739 +2B73A..2B73F ; W # Cn [6] .. +2B740..2B81D ; W # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B81E..2B81F ; W # Cn [2] .. +2B820..2CEA1 ; W # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEA2..2CEAF ; W # Cn [14] .. +2CEB0..2EBE0 ; W # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBE1..2EBEF ; W # Cn [15] .. +2EBF0..2EE5D ; W # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2EE5E..2F7FF ; W # Cn [2466] .. +2F800..2FA1D ; W # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +2FA1E..2FA1F ; W # Cn [2] .. +2FA20..2FFFD ; W # Cn [1502] .. +30000..3134A ; W # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +3134B..3134F ; W # Cn [5] .. +31350..323AF ; W # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF +323B0..3FFFD ; W # Cn [56398] .. +E0001 ; N # Cf LANGUAGE TAG +E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 +F0000..FFFFD ; A # Co [65534] .. +100000..10FFFD ; A # Co [65534] .. # EOF diff --git a/libcxx/utils/data/unicode/GraphemeBreakProperty.txt b/libcxx/utils/data/unicode/GraphemeBreakProperty.txt index a12b5eef1efce..12453cbdb54a1 100644 --- a/libcxx/utils/data/unicode/GraphemeBreakProperty.txt +++ b/libcxx/utils/data/unicode/GraphemeBreakProperty.txt @@ -1,6 +1,6 @@ -# GraphemeBreakProperty-15.0.0.txt -# Date: 2022-04-27, 17:07:38 GMT -# © 2022 Unicode®, Inc. +# GraphemeBreakProperty-15.1.0.txt +# Date: 2023-01-05, 20:34:41 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # diff --git a/libcxx/utils/data/unicode/GraphemeBreakTest.txt b/libcxx/utils/data/unicode/GraphemeBreakTest.txt index 3c73f97b7b82c..4c1ed512e4510 100644 --- a/libcxx/utils/data/unicode/GraphemeBreakTest.txt +++ b/libcxx/utils/data/unicode/GraphemeBreakTest.txt @@ -1,6 +1,6 @@ -# GraphemeBreakTest-15.0.0.txt -# Date: 2022-02-26, 00:38:37 GMT -# © 2022 Unicode®, Inc. +# GraphemeBreakTest-15.1.0.txt +# Date: 2023-08-07, 15:52:55 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -36,8 +36,8 @@ ÷ 0020 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 0020 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 0020 × 0308 ÷ 0600 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 × 0A03 ÷ # ÷ [0.2] SPACE (Other) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0020 × 0308 × 0A03 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0020 × 0308 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0020 ÷ 1160 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -48,10 +48,24 @@ ÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 × 0900 ÷ # ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0020 × 0308 × 0900 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0020 ÷ 0904 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0020 × 0308 ÷ 0904 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0020 ÷ 0D4E ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0020 × 0308 ÷ 0D4E ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0020 ÷ 0915 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0020 × 0308 ÷ 0915 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 0020 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 0020 × 0308 ÷ 231A ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0020 × 093C ÷ # ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0020 × 0308 × 093C ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0020 × 094D ÷ # ÷ [0.2] SPACE (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0020 × 0308 × 094D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 0020 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0020 × 0308 × 200D ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] (Other) ÷ [0.3] @@ -70,8 +84,8 @@ ÷ 000D ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 000D ÷ 0600 ÷ # ÷ [0.2] (CR) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 000D ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 000D ÷ 0903 ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 0A03 ÷ # ÷ [0.2] (CR) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000D ÷ 0308 × 0A03 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000D ÷ 1100 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 000D ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 000D ÷ 1160 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -82,10 +96,24 @@ ÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 000D ÷ AC01 ÷ # ÷ [0.2] (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0900 ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 000D ÷ 0308 × 0900 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 000D ÷ 0903 ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 000D ÷ 0904 ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0904 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 000D ÷ 0D4E ÷ # ÷ [0.2] (CR) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0D4E ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 000D ÷ 0915 ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 000D ÷ 231A ÷ # ÷ [0.2] (CR) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] ÷ 000D ÷ 0308 ÷ 231A ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 000D ÷ 0300 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 093C ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 0308 × 093C ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 094D ÷ # ÷ [0.2] (CR) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 000D ÷ 0308 × 094D ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 000D ÷ 200D ÷ # ÷ [0.2] (CR) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 000D ÷ 0308 × 200D ÷ # ÷ [0.2] (CR) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 000D ÷ 0378 ÷ # ÷ [0.2] (CR) ÷ [4.0] (Other) ÷ [0.3] @@ -104,8 +132,8 @@ ÷ 000A ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 000A ÷ 0600 ÷ # ÷ [0.2] (LF) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 000A ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 000A ÷ 0903 ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 0A03 ÷ # ÷ [0.2] (LF) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 000A ÷ 0308 × 0A03 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000A ÷ 1100 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 000A ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 000A ÷ 1160 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -116,10 +144,24 @@ ÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 000A ÷ AC01 ÷ # ÷ [0.2] (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0900 ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 000A ÷ 0308 × 0900 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 000A ÷ 0903 ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 000A ÷ 0904 ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0904 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 000A ÷ 0D4E ÷ # ÷ [0.2] (LF) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0D4E ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 000A ÷ 0915 ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 000A ÷ 231A ÷ # ÷ [0.2] (LF) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] ÷ 000A ÷ 0308 ÷ 231A ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 000A ÷ 0300 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 093C ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 0308 × 093C ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 094D ÷ # ÷ [0.2] (LF) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 000A ÷ 0308 × 094D ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 000A ÷ 200D ÷ # ÷ [0.2] (LF) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 000A ÷ 0308 × 200D ÷ # ÷ [0.2] (LF) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 000A ÷ 0378 ÷ # ÷ [0.2] (LF) ÷ [4.0] (Other) ÷ [0.3] @@ -138,8 +180,8 @@ ÷ 0001 ÷ 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 0001 ÷ 0600 ÷ # ÷ [0.2] (Control) ÷ [4.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ 0600 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 0001 ÷ 0903 ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0001 ÷ 0A03 ÷ # ÷ [0.2] (Control) ÷ [4.0] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0001 ÷ 0308 × 0A03 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0001 ÷ 1100 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0001 ÷ 1160 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -150,10 +192,24 @@ ÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0001 ÷ AC01 ÷ # ÷ [0.2] (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 0900 ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0001 ÷ 0308 × 0900 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0001 ÷ 0903 ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0001 ÷ 0904 ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0904 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0001 ÷ 0D4E ÷ # ÷ [0.2] (Control) ÷ [4.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0D4E ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0001 ÷ 0915 ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0915 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 0001 ÷ 231A ÷ # ÷ [0.2] (Control) ÷ [4.0] WATCH (ExtPict) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ 231A ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 0001 ÷ 0300 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 093C ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 0308 × 093C ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 094D ÷ # ÷ [0.2] (Control) ÷ [4.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0001 ÷ 0308 × 094D ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 0001 ÷ 200D ÷ # ÷ [0.2] (Control) ÷ [4.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0001 ÷ 0308 × 200D ÷ # ÷ [0.2] (Control) ÷ [4.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0001 ÷ 0378 ÷ # ÷ [0.2] (Control) ÷ [4.0] (Other) ÷ [0.3] @@ -172,8 +228,8 @@ ÷ 034F × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 034F ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 034F × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 034F × 0A03 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 034F × 0308 × 0A03 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 034F ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 034F × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 034F ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -184,10 +240,24 @@ ÷ 034F × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 034F ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 034F × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 034F × 0900 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 034F × 0308 × 0900 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 034F × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 034F × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 034F ÷ 0904 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 034F × 0308 ÷ 0904 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 034F ÷ 0D4E ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 034F × 0308 ÷ 0D4E ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 034F ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 034F × 0308 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 034F ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 034F × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 034F × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 034F × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 034F × 093C ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 034F × 0308 × 093C ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 034F × 094D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 034F × 0308 × 094D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 034F × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 034F × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 034F ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAPHEME JOINER (Extend) ÷ [999.0] (Other) ÷ [0.3] @@ -206,8 +276,8 @@ ÷ 1F1E6 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 1F1E6 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 1F1E6 × 0308 ÷ 0600 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 × 0A03 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1F1E6 × 0308 × 0A03 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1F1E6 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 1F1E6 × 0308 ÷ 1100 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 1F1E6 ÷ 1160 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -218,10 +288,24 @@ ÷ 1F1E6 × 0308 ÷ AC00 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1F1E6 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1F1E6 × 0308 ÷ AC01 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1F1E6 × 0900 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1F1E6 × 0308 × 0900 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1F1E6 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 1F1E6 × 0308 × 0903 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 1F1E6 ÷ 0904 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0904 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 1F1E6 ÷ 0D4E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0D4E ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1F1E6 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 1F1E6 × 0308 ÷ 0915 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 1F1E6 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 1F1E6 × 0308 ÷ 231A ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 1F1E6 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 1F1E6 × 0308 × 0300 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 093C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 0308 × 093C ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 1F1E6 × 0308 × 094D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 1F1E6 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 1F1E6 × 0308 × 200D ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 1F1E6 ÷ 0378 ÷ # ÷ [0.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [999.0] (Other) ÷ [0.3] @@ -240,8 +324,8 @@ ÷ 0600 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 0600 × 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 0600 × 0308 ÷ 0600 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0600 × 0A03 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0600 × 0308 × 0A03 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0600 × 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0600 × 0308 ÷ 1100 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0600 × 1160 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -252,48 +336,76 @@ ÷ 0600 × 0308 ÷ AC00 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0600 × AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0600 × 0308 ÷ AC01 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0600 × 0900 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0600 × 0308 × 0900 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0600 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0600 × 0308 × 0903 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0600 × 0904 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0600 × 0308 ÷ 0904 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0600 × 0D4E ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0600 × 0308 ÷ 0D4E ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0600 × 0915 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0600 × 0308 ÷ 0915 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 0600 × 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] WATCH (ExtPict) ÷ [0.3] ÷ 0600 × 0308 ÷ 231A ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 0600 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 0600 × 0308 × 0300 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0600 × 093C ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0600 × 0308 × 093C ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0600 × 094D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0600 × 0308 × 094D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 0600 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0600 × 0308 × 200D ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0600 × 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.2] (Other) ÷ [0.3] ÷ 0600 × 0308 ÷ 0378 ÷ # ÷ [0.2] ARABIC NUMBER SIGN (Prepend) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] -÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3] -÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] -÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (CR) ÷ [0.3] -÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] -÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (LF) ÷ [0.3] -÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] -÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] (Control) ÷ [0.3] -÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] -÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] -÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] -÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] -÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] -÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] -÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] -÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] -÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] -÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] -÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] -÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] -÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] -÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] -÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] -÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] -÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] -÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] -÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] -÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] (Other) ÷ [0.3] -÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0A03 ÷ 0020 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0A03 × 0308 ÷ 0020 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0A03 ÷ 000D ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] (CR) ÷ [0.3] +÷ 0A03 × 0308 ÷ 000D ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0A03 ÷ 000A ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] (LF) ÷ [0.3] +÷ 0A03 × 0308 ÷ 000A ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0A03 ÷ 0001 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [5.0] (Control) ÷ [0.3] +÷ 0A03 × 0308 ÷ 0001 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0A03 × 034F ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0A03 × 0308 × 034F ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0A03 ÷ 1F1E6 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0A03 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0A03 ÷ 0600 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0A03 × 0308 ÷ 0600 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0A03 × 0A03 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0A03 × 0308 × 0A03 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0A03 ÷ 1100 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0A03 × 0308 ÷ 1100 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0A03 ÷ 1160 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0A03 × 0308 ÷ 1160 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0A03 ÷ 11A8 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0A03 × 0308 ÷ 11A8 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0A03 ÷ AC00 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0A03 × 0308 ÷ AC00 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0A03 ÷ AC01 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0A03 × 0308 ÷ AC01 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0A03 × 0900 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0A03 × 0308 × 0900 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0A03 × 0903 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0A03 × 0308 × 0903 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0A03 ÷ 0904 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0A03 × 0308 ÷ 0904 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0A03 ÷ 0D4E ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0A03 × 0308 ÷ 0D4E ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0A03 ÷ 0915 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0A03 × 0308 ÷ 0915 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0A03 ÷ 231A ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0A03 × 0308 ÷ 231A ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0A03 × 0300 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0A03 × 0308 × 0300 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0A03 × 093C ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0A03 × 0308 × 093C ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0A03 × 094D ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0A03 × 0308 × 094D ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0A03 × 200D ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0A03 × 0308 × 200D ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0A03 ÷ 0378 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [999.0] (Other) ÷ [0.3] +÷ 0A03 × 0308 ÷ 0378 ÷ # ÷ [0.2] GURMUKHI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] ÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] (CR) ÷ [0.3] @@ -308,8 +420,8 @@ ÷ 1100 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 1100 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 1100 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 0A03 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1100 × 0308 × 0A03 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 1100 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 1100 × 1160 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -320,10 +432,24 @@ ÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 × 0900 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1100 × 0308 × 0900 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 1100 ÷ 0904 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 1100 × 0308 ÷ 0904 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 1100 ÷ 0D4E ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1100 × 0308 ÷ 0D4E ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1100 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 1100 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 1100 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 1100 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1100 × 093C ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 1100 × 0308 × 093C ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 1100 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 1100 × 0308 × 094D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 1100 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 1100 × 0308 × 200D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] (Other) ÷ [0.3] @@ -342,8 +468,8 @@ ÷ 1160 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 1160 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 1160 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 × 0A03 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 1160 × 0308 × 0A03 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 1160 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 1160 × 1160 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -354,10 +480,24 @@ ÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 × 0900 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1160 × 0308 × 0900 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 1160 ÷ 0904 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 1160 × 0308 ÷ 0904 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 1160 ÷ 0D4E ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1160 × 0308 ÷ 0D4E ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 1160 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 1160 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 1160 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 1160 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 1160 × 093C ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 1160 × 0308 × 093C ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 1160 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 1160 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 1160 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 1160 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] (Other) ÷ [0.3] @@ -376,8 +516,8 @@ ÷ 11A8 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 11A8 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 11A8 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 × 0A03 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 11A8 × 0308 × 0A03 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 11A8 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 11A8 ÷ 1160 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -388,10 +528,24 @@ ÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 × 0900 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 11A8 × 0308 × 0900 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 11A8 ÷ 0904 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0904 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 11A8 ÷ 0D4E ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0D4E ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 11A8 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 11A8 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 11A8 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 093C ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 0308 × 093C ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 11A8 × 0308 × 094D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 11A8 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 11A8 × 0308 × 200D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] (Other) ÷ [0.3] @@ -410,8 +564,8 @@ ÷ AC00 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ AC00 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ AC00 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 × 0A03 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC00 × 0308 × 0A03 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ AC00 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ AC00 × 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [7.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -422,10 +576,24 @@ ÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 × 0900 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ AC00 × 0308 × 0900 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ AC00 ÷ 0904 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ AC00 × 0308 ÷ 0904 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ AC00 ÷ 0D4E ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ AC00 × 0308 ÷ 0D4E ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ AC00 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ AC00 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ AC00 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ AC00 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC00 × 093C ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ AC00 × 0308 × 093C ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ AC00 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ AC00 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ AC00 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ AC00 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] (Other) ÷ [0.3] @@ -444,8 +612,8 @@ ÷ AC01 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ AC01 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ AC01 × 0308 ÷ 0600 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 × 0A03 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ AC01 × 0308 × 0A03 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ AC01 × 0308 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ AC01 ÷ 1160 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -456,14 +624,268 @@ ÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC01 × 0900 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ AC01 × 0308 × 0900 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ AC01 ÷ 0904 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ AC01 × 0308 ÷ 0904 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ AC01 ÷ 0D4E ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ AC01 × 0308 ÷ 0D4E ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ AC01 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ AC01 × 0308 ÷ 0915 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ AC01 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ AC01 × 0308 ÷ 231A ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ AC01 × 093C ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ AC01 × 0308 × 093C ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ AC01 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ AC01 × 0308 × 094D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ AC01 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ AC01 × 0308 × 200D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] (Other) ÷ [0.3] ÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0900 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0900 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0900 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] (CR) ÷ [0.3] +÷ 0900 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0900 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] (LF) ÷ [0.3] +÷ 0900 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0900 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [5.0] (Control) ÷ [0.3] +÷ 0900 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0900 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0900 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0900 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0900 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0900 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0900 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0900 × 0A03 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0900 × 0308 × 0A03 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0900 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0900 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0900 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0900 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0900 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0900 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0900 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0900 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0900 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0900 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0900 × 0900 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0900 × 0308 × 0900 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0900 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0900 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0900 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0900 × 0308 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0900 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0900 × 0308 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0900 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0900 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0900 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0900 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0900 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0900 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0900 × 093C ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0900 × 0308 × 093C ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0900 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0900 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0900 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0900 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0900 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [999.0] (Other) ÷ [0.3] +÷ 0900 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] (CR) ÷ [0.3] +÷ 0903 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0903 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] (LF) ÷ [0.3] +÷ 0903 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0903 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [5.0] (Control) ÷ [0.3] +÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0903 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0903 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0903 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0903 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0903 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0903 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0903 × 0A03 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 × 0308 × 0A03 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0903 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0903 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0903 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 × 0900 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0903 × 0308 × 0900 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0903 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0903 × 0308 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0903 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0903 × 0308 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0903 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0903 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0903 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0903 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0903 × 093C ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0903 × 0308 × 093C ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0903 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0903 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0903 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0903 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0904 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0904 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0904 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] (CR) ÷ [0.3] +÷ 0904 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0904 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] (LF) ÷ [0.3] +÷ 0904 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0904 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [5.0] (Control) ÷ [0.3] +÷ 0904 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0904 × 034F ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0904 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0904 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0904 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0904 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0904 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0904 × 0A03 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0904 × 0308 × 0A03 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0904 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0904 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0904 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0904 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0904 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0904 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0904 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0904 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0904 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0904 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0904 × 0900 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0904 × 0308 × 0900 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0904 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0904 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0904 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0904 × 0308 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0904 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0904 × 0308 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0904 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0904 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0904 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0904 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0904 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0904 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0904 × 093C ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0904 × 0308 × 093C ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0904 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0904 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0904 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0904 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0904 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [999.0] (Other) ÷ [0.3] +÷ 0904 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0D4E × 0020 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] SPACE (Other) ÷ [0.3] +÷ 0D4E × 0308 ÷ 0020 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0D4E ÷ 000D ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] (CR) ÷ [0.3] +÷ 0D4E × 0308 ÷ 000D ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0D4E ÷ 000A ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] (LF) ÷ [0.3] +÷ 0D4E × 0308 ÷ 000A ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0D4E ÷ 0001 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [5.0] (Control) ÷ [0.3] +÷ 0D4E × 0308 ÷ 0001 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0D4E × 034F ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0D4E × 0308 × 034F ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0D4E × 1F1E6 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0D4E × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0D4E × 0600 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0D4E × 0308 ÷ 0600 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0D4E × 0A03 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0D4E × 0308 × 0A03 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0D4E × 1100 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0D4E × 0308 ÷ 1100 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0D4E × 1160 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0D4E × 0308 ÷ 1160 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0D4E × 11A8 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0D4E × 0308 ÷ 11A8 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0D4E × AC00 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0D4E × 0308 ÷ AC00 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0D4E × AC01 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0D4E × 0308 ÷ AC01 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0D4E × 0900 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0D4E × 0308 × 0900 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0D4E × 0903 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0D4E × 0308 × 0903 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0D4E × 0904 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0D4E × 0308 ÷ 0904 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0D4E × 0D4E ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0D4E × 0308 ÷ 0D4E ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0D4E × 0915 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0D4E × 0308 ÷ 0915 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0D4E × 231A ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] WATCH (ExtPict) ÷ [0.3] +÷ 0D4E × 0308 ÷ 231A ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0D4E × 0300 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0D4E × 0308 × 0300 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0D4E × 093C ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0D4E × 0308 × 093C ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0D4E × 094D ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0D4E × 0308 × 094D ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0D4E × 200D ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0D4E × 0308 × 200D ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0D4E × 0378 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.2] (Other) ÷ [0.3] +÷ 0D4E × 0308 ÷ 0378 ÷ # ÷ [0.2] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 0915 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0915 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0915 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] (CR) ÷ [0.3] +÷ 0915 × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 0915 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] (LF) ÷ [0.3] +÷ 0915 × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 0915 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [5.0] (Control) ÷ [0.3] +÷ 0915 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 0915 × 034F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0915 × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 0915 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0915 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 0915 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0915 × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 0915 × 0A03 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0915 × 0308 × 0A03 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0915 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0915 × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0915 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0915 × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0915 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0915 × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0915 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0915 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0915 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0915 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0915 × 0900 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0915 × 0308 × 0900 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0915 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0915 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0915 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0915 × 0308 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0915 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0915 × 0308 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0915 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0915 × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 0915 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0915 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0915 × 093C ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0915 × 0308 × 093C ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0915 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0915 × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0915 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0915 × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 0915 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] (Other) ÷ [0.3] +÷ 0915 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] ÷ 231A ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 231A × 0308 ÷ 0020 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 231A ÷ 000D ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [5.0] (CR) ÷ [0.3] @@ -478,8 +900,8 @@ ÷ 231A × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 231A ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 231A × 0308 ÷ 0600 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 231A × 0A03 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 231A × 0308 × 0A03 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 231A ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 231A × 0308 ÷ 1100 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 231A ÷ 1160 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -490,10 +912,24 @@ ÷ 231A × 0308 ÷ AC00 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 231A ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 231A × 0308 ÷ AC01 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 231A × 0900 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 231A × 0308 × 0900 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 231A × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 231A × 0308 × 0903 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 231A ÷ 0904 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 231A × 0308 ÷ 0904 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 231A ÷ 0D4E ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 231A × 0308 ÷ 0D4E ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 231A ÷ 0915 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 231A × 0308 ÷ 0915 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 231A ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 231A × 0308 ÷ 231A ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 231A × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 231A × 0308 × 0300 ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 231A × 093C ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 231A × 0308 × 093C ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 231A × 094D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 231A × 0308 × 094D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 231A × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 231A × 0308 × 200D ÷ # ÷ [0.2] WATCH (ExtPict) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 231A ÷ 0378 ÷ # ÷ [0.2] WATCH (ExtPict) ÷ [999.0] (Other) ÷ [0.3] @@ -512,8 +948,8 @@ ÷ 0300 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 0300 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 0300 × 0308 ÷ 0600 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 × 0A03 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0300 × 0308 × 0A03 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0300 × 0308 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0300 ÷ 1160 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -524,14 +960,124 @@ ÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 × 0900 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0300 × 0308 × 0900 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0300 ÷ 0904 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0300 × 0308 ÷ 0904 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0300 ÷ 0D4E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0300 × 0308 ÷ 0D4E ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0300 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0300 × 0308 ÷ 0915 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 0300 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 0300 × 0308 ÷ 231A ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0300 × 093C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0300 × 0308 × 093C ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0300 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0300 × 0308 × 094D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 0300 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0300 × 0308 × 200D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] ÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 093C ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 093C × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 093C ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 093C × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 093C ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 093C × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 093C ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 093C × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 093C × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 093C × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 093C ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 093C × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 093C ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 093C × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 093C × 0A03 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 093C × 0308 × 0A03 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 093C ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 093C × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 093C ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 093C × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 093C ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 093C × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 093C ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 093C × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 093C ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 093C × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 093C × 0900 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 093C × 0308 × 0900 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 093C × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 093C × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 093C ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 093C × 0308 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 093C ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 093C × 0308 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 093C ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 093C × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 093C ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 093C × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 093C × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 093C × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 093C × 093C ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 093C × 0308 × 093C ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 093C × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 093C × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 093C × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 093C × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 093C ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 093C × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 094D ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 094D × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 094D ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 094D × 0308 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] +÷ 094D ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 094D × 0308 ÷ 000A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (LF) ÷ [0.3] +÷ 094D ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 094D × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [5.0] (Control) ÷ [0.3] +÷ 094D × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 094D × 0308 × 034F ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAPHEME JOINER (Extend) ÷ [0.3] +÷ 094D ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 094D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] +÷ 094D ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 094D × 0308 ÷ 0600 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] +÷ 094D × 0A03 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 094D × 0308 × 0A03 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 094D ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 094D × 0308 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 094D ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 094D × 0308 ÷ 1160 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 094D ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 094D × 0308 ÷ 11A8 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 094D ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 094D × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 094D ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 094D × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 094D × 0900 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 094D × 0308 × 0900 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 094D × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 094D × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 094D ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 094D × 0308 ÷ 0904 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 094D ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 094D × 0308 ÷ 0D4E ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 094D ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 094D × 0308 ÷ 0915 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 094D ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 094D × 0308 ÷ 231A ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] +÷ 094D × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 094D × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 094D × 093C ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 094D × 0308 × 093C ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 094D × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 094D × 0308 × 094D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 094D × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 094D × 0308 × 200D ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] +÷ 094D ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] +÷ 094D × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] ÷ 200D ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 200D × 0308 ÷ 0020 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 200D ÷ 000D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [5.0] (CR) ÷ [0.3] @@ -546,8 +1092,8 @@ ÷ 200D × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 200D ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 200D × 0308 ÷ 0600 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200D × 0A03 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 200D × 0308 × 0A03 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 200D ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 200D × 0308 ÷ 1100 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 200D ÷ 1160 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -558,10 +1104,24 @@ ÷ 200D × 0308 ÷ AC00 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 200D ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 200D × 0308 ÷ AC01 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 200D × 0900 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 200D × 0308 × 0900 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 200D × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 200D × 0308 × 0903 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 200D ÷ 0904 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 200D × 0308 ÷ 0904 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 200D ÷ 0D4E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 200D × 0308 ÷ 0D4E ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 200D ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 200D × 0308 ÷ 0915 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 200D ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 200D × 0308 ÷ 231A ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 200D × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 200D × 0308 × 0300 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 200D × 093C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 200D × 0308 × 093C ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 200D × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 200D × 0308 × 094D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 200D × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 200D × 0308 × 200D ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 200D ÷ 0378 ÷ # ÷ [0.2] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] (Other) ÷ [0.3] @@ -580,8 +1140,8 @@ ÷ 0378 × 0308 ÷ 1F1E6 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) ÷ [0.3] ÷ 0378 ÷ 0600 ÷ # ÷ [0.2] (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] ÷ 0378 × 0308 ÷ 0600 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) ÷ [0.3] -÷ 0378 × 0903 ÷ # ÷ [0.2] (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 × 0A03 ÷ # ÷ [0.2] (Other) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 × 0308 × 0A03 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] GURMUKHI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0378 ÷ 1100 ÷ # ÷ [0.2] (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] ÷ 0378 ÷ 1160 ÷ # ÷ [0.2] (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] @@ -592,10 +1152,24 @@ ÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0378 ÷ AC01 ÷ # ÷ [0.2] (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 × 0900 ÷ # ÷ [0.2] (Other) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0378 × 0308 × 0900 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN INVERTED CANDRABINDU (Extend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0378 × 0903 ÷ # ÷ [0.2] (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [0.3] +÷ 0378 ÷ 0904 ÷ # ÷ [0.2] (Other) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0378 × 0308 ÷ 0904 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER SHORT A (ConjunctLinkingScripts) ÷ [0.3] +÷ 0378 ÷ 0D4E ÷ # ÷ [0.2] (Other) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0378 × 0308 ÷ 0D4E ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] MALAYALAM LETTER DOT REPH (Prepend_ConjunctLinkingScripts) ÷ [0.3] +÷ 0378 ÷ 0915 ÷ # ÷ [0.2] (Other) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0378 × 0308 ÷ 0915 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] ÷ 0378 ÷ 231A ÷ # ÷ [0.2] (Other) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 0378 × 0308 ÷ 231A ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] WATCH (ExtPict) ÷ [0.3] ÷ 0378 × 0300 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] ÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] COMBINING GRAVE ACCENT (Extend_ExtCccZwj) ÷ [0.3] +÷ 0378 × 093C ÷ # ÷ [0.2] (Other) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0378 × 0308 × 093C ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) ÷ [0.3] +÷ 0378 × 094D ÷ # ÷ [0.2] (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] +÷ 0378 × 0308 × 094D ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [0.3] ÷ 0378 × 200D ÷ # ÷ [0.2] (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0378 × 0308 × 200D ÷ # ÷ [0.2] (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0378 ÷ 0378 ÷ # ÷ [0.2] (Other) ÷ [999.0] (Other) ÷ [0.3] @@ -614,7 +1188,7 @@ ÷ 0061 ÷ 1F1E6 × 1F1E7 ÷ 1F1E8 × 1F1E9 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER A (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER B (RI) ÷ [999.0] REGIONAL INDICATOR SYMBOL LETTER C (RI) × [13.0] REGIONAL INDICATOR SYMBOL LETTER D (RI) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] ÷ 0061 × 200D ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [0.3] ÷ 0061 × 0308 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] COMBINING DIAERESIS (Extend_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] -÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] +÷ 0061 × 0903 ÷ 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark_ConjunctLinkingScripts) ÷ [999.0] LATIN SMALL LETTER B (Other) ÷ [0.3] ÷ 0061 ÷ 0600 × 0062 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) ÷ [999.0] ARABIC NUMBER SIGN (Prepend) × [9.2] LATIN SMALL LETTER B (Other) ÷ [0.3] ÷ 1F476 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] BABY (ExtPict) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3] ÷ 0061 × 1F3FF ÷ 1F476 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (Extend) ÷ [999.0] BABY (ExtPict) ÷ [0.3] @@ -624,7 +1198,18 @@ ÷ 0061 × 200D ÷ 1F6D1 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] OCTAGONAL SIGN (ExtPict) ÷ [0.3] ÷ 2701 × 200D × 2701 ÷ # ÷ [0.2] UPPER BLADE SCISSORS (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [11.0] UPPER BLADE SCISSORS (Other) ÷ [0.3] ÷ 0061 × 200D ÷ 2701 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) ÷ [999.0] UPPER BLADE SCISSORS (Other) ÷ [0.3] +÷ 0915 ÷ 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 093C × 200D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 093C × 094D × 200D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN NUKTA (Extend_ConjunctLinkingScripts_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] ZERO WIDTH JOINER (ZWJ_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 0924 × 094D × 092F ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER YA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D ÷ 0061 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] LATIN SMALL LETTER A (Other) ÷ [0.3] +÷ 0061 × 094D ÷ 0924 ÷ # ÷ [0.2] LATIN SMALL LETTER A (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 003F × 094D ÷ 0924 ÷ # ÷ [0.2] QUESTION MARK (Other) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) ÷ [999.0] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] +÷ 0915 × 094D × 094D × 0924 ÷ # ÷ [0.2] DEVANAGARI LETTER KA (ConjunctLinkingScripts_LinkingConsonant) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.0] DEVANAGARI SIGN VIRAMA (Extend_ConjunctLinkingScripts_ConjunctLinker_ExtCccZwj) × [9.3] DEVANAGARI LETTER TA (ConjunctLinkingScripts_LinkingConsonant) ÷ [0.3] # -# Lines: 602 +# Lines: 1187 # # EOF diff --git a/libcxx/utils/data/unicode/emoji-data.txt b/libcxx/utils/data/unicode/emoji-data.txt index 7942fc89a3559..0ba10e9ce4c9a 100644 --- a/libcxx/utils/data/unicode/emoji-data.txt +++ b/libcxx/utils/data/unicode/emoji-data.txt @@ -1,16 +1,16 @@ # emoji-data.txt -# Date: 2022-08-02, 00:26:10 GMT -# © 2022 Unicode®, Inc. +# Date: 2023-02-01, 02:22:54 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Emoji Data for UTS #51 -# Used with Emoji Version 15.0 and subsequent minor revisions (if any) +# Used with Emoji Version 15.1 and subsequent minor revisions (if any) # # For documentation and usage, see https://www.unicode.org/reports/tr51 # -# Format: -# ; # +# Format: +# ; # # Note: there is no guarantee as to the structure of whitespace or comments # # Characters and sequences are listed in code point order. Users should be shown a more natural order. diff --git a/libcxx/utils/generate_extended_grapheme_cluster_table.py b/libcxx/utils/generate_extended_grapheme_cluster_table.py index 6a598399ce470..76d1e78e9239c 100755 --- a/libcxx/utils/generate_extended_grapheme_cluster_table.py +++ b/libcxx/utils/generate_extended_grapheme_cluster_table.py @@ -289,25 +289,16 @@ def generate_cpp_data(prop_name: str, ranges: list[PropertyRange]) -> str: def generate_data_tables() -> str: """ Generate Unicode data for inclusion into from - GraphemeBreakProperty.txt and emoji-data.txt. + - https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt + - https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt + - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt - GraphemeBreakProperty.txt can be found at - https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt - - emoji-data.txt can be found at - https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt - - Both files are expected to be in the same directory as this script. + These files are expected to be stored in the same directory as this script. """ - gbp_data_path = ( - Path(__file__).absolute().parent - / "data" - / "unicode" - / "GraphemeBreakProperty.txt" - ) - emoji_data_path = ( - Path(__file__).absolute().parent / "data" / "unicode" / "emoji-data.txt" - ) + root = Path(__file__).absolute().parent / "data" / "unicode" + gbp_data_path = root / "GraphemeBreakProperty.txt" + emoji_data_path = root / "emoji-data.txt" + gbp_ranges = list() emoji_ranges = list() with gbp_data_path.open(encoding="utf-8") as f: diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 759e49055be5c..f2b8d55c0e11b 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -211,10 +211,9 @@ def add_version_header(tc): "name": "__cpp_lib_bind_back", "values": { "c++23": 202202, - "c++26": 202306, # P2714R1 Bind front and back to NTTP callables + # "c++26": 202306, # P2714R1 Bind front and back to NTTP callables }, "headers": ["functional"], - "unimplemented": True, }, { "name": "__cpp_lib_bind_front", diff --git a/libcxx/utils/generate_indic_conjunct_break_table.py b/libcxx/utils/generate_indic_conjunct_break_table.py new file mode 100755 index 0000000000000..762dfa73b51f7 --- /dev/null +++ b/libcxx/utils/generate_indic_conjunct_break_table.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +# The code is based on +# https://github.com/microsoft/STL/blob/main/tools/unicode_properties_parse/grapheme_break_property_data_gen.py +# +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from io import StringIO +from pathlib import Path +from dataclasses import dataclass +from typing import Optional +import re +import sys + + +@dataclass +class PropertyRange: + lower: int = -1 + upper: int = -1 + prop: str = None + + +@dataclass +class Entry: + lower: int = -1 + offset: int = -1 + prop: int = -1 + + +LINE_REGEX = re.compile( + r"^(?P[0-9A-F]{4,5})(?:\.\.(?P[0-9A-F]{4,5}))?\s*;\s*InCB;\s*(?P\w+)" +) + +def parsePropertyLine(inputLine: str) -> Optional[PropertyRange]: + result = PropertyRange() + if m := LINE_REGEX.match(inputLine): + lower_str, upper_str, result.prop = m.group("lower", "upper", "prop") + result.lower = int(lower_str, base=16) + result.upper = result.lower + if upper_str is not None: + result.upper = int(upper_str, base=16) + return result + + else: + return None + + + +def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: + """ + Merges consecutive ranges with the same property to one range. + + Merging the ranges results in fewer ranges in the output table, + reducing binary and improving lookup performance. + """ + result = list() + for x in input: + if ( + len(result) + and result[-1].prop == x.prop + and result[-1].upper + 1 == x.lower + ): + result[-1].upper = x.upper + continue + result.append(x) + return result + + +PROP_VALUE_ENUMERATOR_TEMPLATE = " __{}" +PROP_VALUE_ENUM_TEMPLATE = """ +enum class __property : uint8_t {{ + // Values generated from the data files. +{enumerators}, + + // The code unit has none of above properties. + __none +}}; +""" + +DATA_ARRAY_TEMPLATE = """ +/// The entries of the indic conjunct break property table. +/// +/// The data is generated from +/// - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt +/// +/// The data has 3 values +/// - bits [0, 1] The property. One of the values generated from the datafiles +/// of \\ref __property +/// - bits [2, 10] The size of the range. +/// - bits [11, 31] The lower bound code point of the range. The upper bound of +/// the range is lower bound + size. +/// +/// The 9 bits for the size allow a maximum range of 512 elements. Some ranges +/// in the Unicode tables are larger. They are stored in multiple consecutive +/// ranges in the data table. An alternative would be to store the sizes in a +/// separate 16-bit value. The original MSVC STL code had such an approach, but +/// this approach uses less space for the data and is about 4% faster in the +/// following benchmark. +/// libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp +// clang-format off +_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[{size}] = {{ +{entries}}}; +// clang-format on + +/// Returns the indic conjuct break property of a code point. +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __property __get_property(const char32_t __code_point) noexcept {{ + // The algorithm searches for the upper bound of the range and, when found, + // steps back one entry. This algorithm is used since the code point can be + // anywhere in the range. After a lower bound is found the next step is to + // compare whether the code unit is indeed in the range. + // + // Since the entry contains a code unit, size, and property the code point + // being sought needs to be adjusted. Just shifting the code point to the + // proper position doesn't work; suppose an entry has property 0, size 1, + // and lower bound 3. This results in the entry 0x1810. + // When searching for code point 3 it will search for 0x1800, find 0x1810 + // and moves to the previous entry. Thus the lower bound value will never + // be found. + // The simple solution is to set the bits belonging to the property and + // size. Then the upper bound for code point 3 will return the entry after + // 0x1810. After moving to the previous entry the algorithm arrives at the + // correct entry. + ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 11) | 0x7ffu) - __entries; + if (__i == 0) + return __property::__none; + + --__i; + uint32_t __upper_bound = (__entries[__i] >> 11) + ((__entries[__i] >> 2) & 0b1'1111'1111); + if (__code_point <= __upper_bound) + return static_cast<__property>(__entries[__i] & 0b11); + + return __property::__none; +}} +""" + +MSVC_FORMAT_UCD_TABLES_HPP_TEMPLATE = """ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utils/generate_indic_conjunct_break_table.py +// DO NOT MODIFY! + +// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE +// +// See Terms of Use +// for definitions of Unicode Inc.'s Data Files and Software. +// +// NOTICE TO USER: Carefully read the following legal agreement. +// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +// TERMS AND CONDITIONS OF THIS AGREEMENT. +// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +// THE DATA FILES OR SOFTWARE. +// +// COPYRIGHT AND PERMISSION NOTICE +// +// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. +// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of the Unicode data files and any associated documentation +// (the "Data Files") or Unicode software and any associated documentation +// (the "Software") to deal in the Data Files or Software +// without restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, and/or sell copies of +// the Data Files or Software, and to permit persons to whom the Data Files +// or Software are furnished to do so, provided that either +// (a) this copyright and permission notice appear with all copies +// of the Data Files or Software, or +// (b) this copyright and permission notice appear in associated +// Documentation. +// +// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT OF THIRD PARTY RIGHTS. +// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THE DATA FILES OR SOFTWARE. +// +// Except as contained in this notice, the name of a copyright holder +// shall not be used in advertising or otherwise to promote the sale, +// use or other dealings in these Data Files or Software without prior +// written authorization of the copyright holder. + +#ifndef _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H +#define _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H + +#include <__algorithm/ranges_upper_bound.h> +#include <__config> +#include <__iterator/access.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 20 + +namespace __indic_conjunct_break {{ +{content} +}} // namespace __indic_conjunct_break + +#endif //_LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FORMAT_INDIC_CONJUNCT_BREAK_TABLE_H""" + + +def property_ranges_to_table( + ranges: list[PropertyRange], props: list[str] +) -> list[Entry]: + assert len(props) < 4 + result = list[Entry]() + high = -1 + for range in sorted(ranges, key=lambda x: x.lower): + # Validate overlapping ranges + assert range.lower > high + high = range.upper + + while True: + e = Entry(range.lower, range.upper - range.lower, props.index(range.prop)) + if e.offset <= 511: + result.append(e) + break + e.offset = 511 + result.append(e) + range.lower += 512 + return result + + +cpp_entrytemplate = " 0x{:08x}" + + +def generate_cpp_data(prop_name: str, ranges: list[PropertyRange]) -> str: + result = StringIO() + prop_values = sorted(set(x.prop for x in ranges)) + table = property_ranges_to_table(ranges, prop_values) + enumerator_values = [PROP_VALUE_ENUMERATOR_TEMPLATE.format(x) for x in prop_values] + result.write( + PROP_VALUE_ENUM_TEMPLATE.format(enumerators=",\n".join(enumerator_values)) + ) + result.write( + DATA_ARRAY_TEMPLATE.format( + prop_name=prop_name, + size=len(table), + entries=",\n".join( + [ + cpp_entrytemplate.format(x.lower << 11 | x.offset << 2 | x.prop) + for x in table + ] + ), + ) + ) + + return result.getvalue() + + +def generate_data_tables() -> str: + """ + Generate Unicode data for inclusion into from + - https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt + + These files are expected to be stored in the same directory as this script. + """ + root = Path(__file__).absolute().parent / "data" / "unicode" + derived_core_path = root / "DerivedCoreProperties.txt" + + indic_conjunct_break = list() + with derived_core_path.open(encoding="utf-8") as f: + indic_conjunct_break_ranges = compactPropertyRanges( + [x for line in f if (x := parsePropertyLine(line))] + ) + + indic_conjunct_break_data = generate_cpp_data("Grapheme_Break", indic_conjunct_break_ranges) + return "\n".join([indic_conjunct_break_data]) + + +if __name__ == "__main__": + if len(sys.argv) == 2: + sys.stdout = open(sys.argv[1], "w") + print( + MSVC_FORMAT_UCD_TABLES_HPP_TEMPLATE.lstrip().format( + content=generate_data_tables() + ) + ) diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index 0793c34fd7f0b..6ff16309546ba 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -286,6 +286,12 @@ def _getAndroidDeviceApi(cfg): # Avoid building on platforms that don't support modules properly. or not hasCompileFlag(cfg, "-Wno-reserved-module-identifier"), ), + # The time zone validation tests compare the output of zdump against the + # output generated by 's time zone support. + Feature( + name="has-no-zdump", + when=lambda cfg: runScriptExitCode(cfg, ["zdump --version"]) != 0, + ), ] # Deduce and add the test features that that are implied by the #defines in diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 35e4c68dcda6c..c0739b37aeb0f 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -1726,15 +1726,15 @@ void PDBLinker::commit(codeview::GUID *guid) { } } -static uint32_t getSecrelReloc(llvm::COFF::MachineTypes machine) { - switch (machine) { - case AMD64: +static uint32_t getSecrelReloc(Triple::ArchType arch) { + switch (arch) { + case Triple::x86_64: return COFF::IMAGE_REL_AMD64_SECREL; - case I386: + case Triple::x86: return COFF::IMAGE_REL_I386_SECREL; - case ARMNT: + case Triple::thumb: return COFF::IMAGE_REL_ARM_SECREL; - case ARM64: + case Triple::aarch64: return COFF::IMAGE_REL_ARM64_SECREL; default: llvm_unreachable("unknown machine type"); @@ -1752,7 +1752,7 @@ static bool findLineTable(const SectionChunk *c, uint32_t addr, DebugLinesSubsectionRef &lines, uint32_t &offsetInLinetable) { ExitOnError exitOnErr; - const uint32_t secrelReloc = getSecrelReloc(c->file->ctx.config.machine); + const uint32_t secrelReloc = getSecrelReloc(c->getArch()); for (SectionChunk *dbgC : c->file->getDebugChunks()) { if (dbgC->getSectionName() != ".debug$S") diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h index d56895277bcc9..ada38a043bc22 100644 --- a/lld/ELF/DWARF.h +++ b/lld/ELF/DWARF.h @@ -11,6 +11,7 @@ #include "InputFiles.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/ELF.h" #include @@ -36,34 +37,28 @@ template class LLDDwarfObj final : public llvm::DWARFObject { return cast(infoSection.sec); } + const llvm::DWARFSection &getAddrSection() const override { + return addrSection; + } + const llvm::DWARFSection &getLineSection() const override { + return lineSection; + } const llvm::DWARFSection &getLoclistsSection() const override { return loclistsSection; } - const llvm::DWARFSection &getRangesSection() const override { return rangesSection; } - const llvm::DWARFSection &getRnglistsSection() const override { return rnglistsSection; } - const llvm::DWARFSection &getStrOffsetsSection() const override { return strOffsetsSection; } - const llvm::DWARFSection &getLineSection() const override { - return lineSection; - } - - const llvm::DWARFSection &getAddrSection() const override { - return addrSection; - } - const LLDDWARFSection &getGnuPubnamesSection() const override { return gnuPubnamesSection; } - const LLDDWARFSection &getGnuPubtypesSection() const override { return gnuPubtypesSection; } @@ -86,18 +81,18 @@ template class LLDDwarfObj final : public llvm::DWARFObject { uint64_t pos, ArrayRef rels) const; + LLDDWARFSection addrSection; LLDDWARFSection gnuPubnamesSection; LLDDWARFSection gnuPubtypesSection; LLDDWARFSection infoSection; + LLDDWARFSection lineSection; LLDDWARFSection loclistsSection; LLDDWARFSection rangesSection; LLDDWARFSection rnglistsSection; LLDDWARFSection strOffsetsSection; - LLDDWARFSection lineSection; - LLDDWARFSection addrSection; StringRef abbrevSection; - StringRef strSection; StringRef lineStrSection; + StringRef strSection; }; } // namespace lld::elf diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 8dbff7fb86e76..86cc09621a912 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2767,13 +2767,6 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Create dynamic sections for dynamic linking and static PIE. config->hasDynSymTab = !ctx.sharedFiles.empty() || config->isPic; - script->addScriptReferencedSymbolsToSymTable(); - - // Prevent LTO from removing any definition referenced by -u. - for (StringRef name : config->undefined) - if (Defined *sym = dyn_cast_or_null(symtab.find(name))) - sym->isUsedInRegularObj = true; - // If an entry symbol is in a static archive, pull out that file now. if (Symbol *sym = symtab.find(config->entry)) handleUndefined(sym, "--entry"); @@ -2782,6 +2775,16 @@ template void LinkerDriver::link(opt::InputArgList &args) { for (StringRef pat : args::getStrings(args, OPT_undefined_glob)) handleUndefinedGlob(pat); + // After potential archive member extraction involving ENTRY and + // -u/--undefined-glob, check whether PROVIDE symbols should be defined (the + // RHS may refer to definitions in just extracted object files). + script->addScriptReferencedSymbolsToSymTable(); + + // Prevent LTO from removing any definition referenced by -u. + for (StringRef name : config->undefined) + if (Defined *sym = dyn_cast_or_null(symtab.find(name))) + sym->isUsedInRegularObj = true; + // Mark -init and -fini symbols so that the LTO doesn't eliminate them. if (Symbol *sym = dyn_cast_or_null(symtab.find(config->init))) sym->isUsedInRegularObj = true; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 4427a12008680..550659464a440 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2872,7 +2872,8 @@ createSymbols( } // Returns a newly-created .gdb_index section. -template GdbIndexSection *GdbIndexSection::create() { +template +std::unique_ptr GdbIndexSection::create() { llvm::TimeTraceScope timeScope("Create gdb index"); // Collect InputFiles with .debug_info. See the comment in @@ -2918,7 +2919,7 @@ template GdbIndexSection *GdbIndexSection::create() { nameAttrs[i] = readPubNamesAndTypes(dobj, chunks[i].compilationUnits); }); - auto *ret = make(); + auto ret = std::make_unique(); ret->chunks = std::move(chunks); std::tie(ret->symbols, ret->size) = createSymbols(nameAttrs, ret->chunks); @@ -3860,6 +3861,7 @@ void InStruct::reset() { ppc32Got2.reset(); ibtPlt.reset(); relaPlt.reset(); + gdbIndex.reset(); shStrTab.reset(); strTab.reset(); symTab.reset(); @@ -3986,10 +3988,10 @@ InStruct elf::in; std::vector elf::partitions; Partition *elf::mainPart; -template GdbIndexSection *GdbIndexSection::create(); -template GdbIndexSection *GdbIndexSection::create(); -template GdbIndexSection *GdbIndexSection::create(); -template GdbIndexSection *GdbIndexSection::create(); +template std::unique_ptr GdbIndexSection::create(); +template std::unique_ptr GdbIndexSection::create(); +template std::unique_ptr GdbIndexSection::create(); +template std::unique_ptr GdbIndexSection::create(); template void elf::splitSections(); template void elf::splitSections(); diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 02a669b01d152..68b4cdb1dde04 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -25,6 +25,7 @@ #include "Symbols.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Compiler.h" @@ -821,7 +822,7 @@ class GdbIndexSection final : public SyntheticSection { }; GdbIndexSection(); - template static GdbIndexSection *create(); + template static std::unique_ptr create(); void writeTo(uint8_t *buf) override; size_t getSize() const override { return size; } bool isNeeded() const override; @@ -1358,6 +1359,8 @@ struct InStruct { std::unique_ptr ppc32Got2; std::unique_ptr ibtPlt; std::unique_ptr relaPlt; + // Non-SHF_ALLOC sections + std::unique_ptr gdbIndex; std::unique_ptr shStrTab; std::unique_ptr strTab; std::unique_ptr symTab; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index fc9084f40044d..021b9bb0d5e22 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -541,9 +541,6 @@ template void elf::createSyntheticSections() { in.got->hasGotOffRel = true; } - if (config->gdbIndex) - add(*GdbIndexSection::create()); - // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. in.relaPlt = std::make_unique>( @@ -568,6 +565,11 @@ template void elf::createSyntheticSections() { if (config->andFeatures || !ctx.aarch64PauthAbiCoreInfo.empty()) add(*make()); + if (config->gdbIndex) { + in.gdbIndex = GdbIndexSection::create(); + add(*in.gdbIndex); + } + // .note.GNU-stack is always added when we are creating a re-linkable // object file. Other linkers are using the presence of this marker // section to control the executable-ness of the stack area, but that diff --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp index d6f18ecb85b8a..077a639bf7ab1 100644 --- a/lld/MachO/DriverUtils.cpp +++ b/lld/MachO/DriverUtils.cpp @@ -45,11 +45,21 @@ using namespace lld::macho; // Create table mapping all options defined in Options.td static constexpr OptTable::Info optInfo[] = { #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) \ - {PREFIX, NAME, HELPTEXT, \ - METAVAR, OPT_##ID, opt::Option::KIND##Class, \ - PARAM, FLAGS, VISIBILITY, \ - OPT_##GROUP, OPT_##ALIAS, ALIASARGS, \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ + VALUES) \ + {PREFIX, \ + NAME, \ + HELPTEXT, \ + HELPTEXTSFORVARIANTS, \ + METAVAR, \ + OPT_##ID, \ + opt::Option::KIND##Class, \ + PARAM, \ + FLAGS, \ + VISIBILITY, \ + OPT_##GROUP, \ + OPT_##ALIAS, \ + ALIASARGS, \ VALUES}, #include "Options.inc" #undef OPTION diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp index bb08c77b2e11d..0d55d5b3672a4 100644 --- a/lld/MinGW/Driver.cpp +++ b/lld/MinGW/Driver.cpp @@ -69,11 +69,21 @@ enum { // Create table mapping all options defined in Options.td static constexpr opt::OptTable::Info infoTable[] = { #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) \ - {PREFIX, NAME, HELPTEXT, \ - METAVAR, OPT_##ID, opt::Option::KIND##Class, \ - PARAM, FLAGS, VISIBILITY, \ - OPT_##GROUP, OPT_##ALIAS, ALIASARGS, \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ + VALUES) \ + {PREFIX, \ + NAME, \ + HELPTEXT, \ + HELPTEXTSFORVARIANTS, \ + METAVAR, \ + OPT_##ID, \ + opt::Option::KIND##Class, \ + PARAM, \ + FLAGS, \ + VISIBILITY, \ + OPT_##GROUP, \ + OPT_##ALIAS, \ + ALIASARGS, \ VALUES}, #include "Options.inc" #undef OPTION diff --git a/lld/test/COFF/undefined-symbol-cv.s b/lld/test/COFF/undefined-symbol-cv.s index 08a85826b9f96..502f28044c17f 100644 --- a/lld/test/COFF/undefined-symbol-cv.s +++ b/lld/test/COFF/undefined-symbol-cv.s @@ -1,6 +1,14 @@ -# REQUIRES: x86 -# RUN: llvm-mc -triple=x86_64-windows-msvc -filetype=obj -o %t.obj %s -# RUN: not lld-link /out:%t.exe %t.obj 2>&1 | FileCheck %s +# REQUIRES: aarch64, x86 +# RUN: split-file %s %t.dir && cd %t.dir + +# RUN: llvm-mc -triple=x86_64-windows-msvc -filetype=obj -o test-x86_64.obj test-x86_64.s +# RUN: llvm-mc -triple=aarch64-windows-msvc -filetype=obj -o test-aarch64.obj test-aarch64.s +# RUN: llvm-mc -triple=arm64ec-windows-msvc -filetype=obj -o test-arm64ec.obj test-aarch64.s + +# RUN: not lld-link -out:test-x86_64.exe test-x86_64.obj 2>&1 | FileCheck %s +# RUN: not lld-link -out:test-aarch64.exe test-aarch64.obj 2>&1 | FileCheck %s +# RUN: not lld-link -out:test-arm64ec.exe -machine:arm64ec test-arm64ec.obj 2>&1 | FileCheck %s +# RUN: not lld-link -out:test-arm64ec2.exe -machine:arm64ec test-x86_64.obj 2>&1 | FileCheck %s # CHECK: error: undefined symbol: int __cdecl foo(void) # CHECK-NEXT: >>> referenced by file1.cpp:1 @@ -18,6 +26,7 @@ # CHECK-NEXT: >>> referenced by file1.cpp:5 # CHECK-NEXT: >>> {{.*}}.obj:(f2) +#--- test-x86_64.s .cv_file 1 "file1.cpp" "EDA15C78BB573E49E685D8549286F33C" 1 .cv_file 2 "file2.cpp" "EDA15C78BB573E49E685D8549286F33D" 1 @@ -60,3 +69,47 @@ f2: .long 4 .cv_filechecksums .cv_stringtable + +#--- test-aarch64.s + .cv_file 1 "file1.cpp" "EDA15C78BB573E49E685D8549286F33C" 1 + .cv_file 2 "file2.cpp" "EDA15C78BB573E49E685D8549286F33D" 1 + + .section .text,"xr",one_only,main +.globl main +main: + .cv_func_id 0 + .cv_loc 0 1 1 0 is_stmt 0 + bl "?foo@@YAHXZ" + .cv_loc 0 1 2 0 + bl "?foo@@YAHXZ" + .cv_loc 0 2 3 0 + b "?bar@@YAHXZ" +.Lfunc_end0: + +f1: + .cv_func_id 1 + .cv_loc 1 1 4 0 is_stmt 0 + bl "?bar@@YAHXZ" +.Lfunc_end1: + + .section .text,"xr",one_only,f2 +.globl f2 +f2: + .cv_func_id 2 + .cv_loc 2 1 5 0 is_stmt 0 + bl "?baz@@YAHXZ" +.Lfunc_end2: + + .section .debug$S,"dr",associative,main + .long 4 + .cv_linetable 0, main, .Lfunc_end0 + .cv_linetable 1, f1, .Lfunc_end1 + + .section .debug$S,"dr",associative,f2 + .long 4 + .cv_linetable 2, f2, .Lfunc_end2 + + .section .debug$S,"dr" + .long 4 + .cv_filechecksums + .cv_stringtable diff --git a/lld/test/ELF/aarch64-reloc-implicit-addend.test b/lld/test/ELF/aarch64-reloc-implicit-addend.test new file mode 100644 index 0000000000000..15f42c4d87b57 --- /dev/null +++ b/lld/test/ELF/aarch64-reloc-implicit-addend.test @@ -0,0 +1,86 @@ +## Test certain REL relocation types generated by legacy armasm. +# RUN: yaml2obj %s -o %t.o +# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s + +# CHECK-COUNT-17: internal linker error: cannot read addend + +--- +!ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_AARCH64 +Sections: + - Name: .abs + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Content: fffffefffffffdfffffffffffffffcffffffffffffff + - Name: .rel.abs + Type: SHT_REL + Link: .symtab + Info: .abs + Relocations: + - {Offset: 0, Symbol: abs, Type: R_AARCH64_ABS16} + - {Offset: 2, Symbol: abs, Type: R_AARCH64_ABS32} + - {Offset: 6, Symbol: abs, Type: R_AARCH64_ABS64} + - {Offset: 14, Symbol: abs, Type: R_AARCH64_ADD_ABS_LO12_NC} + + - Name: .uabs + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + AddressAlign: 4 + Content: 00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff + - Name: .rel.uabs + Type: SHT_REL + Link: .symtab + Info: .uabs + Relocations: + - {Offset: 0, Symbol: abs, Type: R_AARCH64_MOVW_UABS_G0} + - {Offset: 4, Symbol: abs, Type: R_AARCH64_MOVW_UABS_G0_NC} + - {Offset: 8, Symbol: abs, Type: R_AARCH64_MOVW_UABS_G1} + - {Offset: 12, Symbol: abs, Type: R_AARCH64_MOVW_UABS_G1_NC} + - {Offset: 16, Symbol: abs, Type: R_AARCH64_MOVW_UABS_G2} + - {Offset: 20, Symbol: abs, Type: R_AARCH64_MOVW_UABS_G2_NC} + + - Name: .prel + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + AddressAlign: 4 + Content: 00ffffff00ffffff00ffffff00ffffff00ffffff00ffffff + - Name: .rel.prel + Type: SHT_REL + Link: .symtab + Info: .prel + Relocations: + - {Offset: 0, Symbol: .prel, Type: R_AARCH64_PREL64} + - {Offset: 4, Symbol: .prel, Type: R_AARCH64_PREL32} + - {Offset: 8, Symbol: .prel, Type: R_AARCH64_PREL16} + - {Offset: 12, Symbol: .prel, Type: R_AARCH64_LD_PREL_LO19} + - {Offset: 16, Symbol: .prel, Type: R_AARCH64_ADR_PREL_PG_HI21} + - {Offset: 20, Symbol: .prel, Type: R_AARCH64_ADR_PREL_PG_HI21_NC} + + - Name: .branch + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + AddressAlign: 4 + Content: f0fffffff0fffffff0fffffff0ffffff + - Name: .rel.branch + Type: SHT_REL + Link: .symtab + Info: .branch + Relocations: + - {Offset: 0, Symbol: .branch, Type: R_AARCH64_TSTBR14} + - {Offset: 4, Symbol: .branch, Type: R_AARCH64_CONDBR19} + - {Offset: 8, Symbol: .branch, Type: R_AARCH64_CALL26} + - {Offset: 12, Symbol: .branch, Type: R_AARCH64_JUMP26} + +Symbols: + - Name: .branch + Section: .branch + - Name: .prel + Section: .prel + - Name: abs + Index: SHN_ABS + Value: 42 + Binding: STB_GLOBAL diff --git a/lld/test/ELF/linkerscript/symbolreferenced.s b/lld/test/ELF/linkerscript/symbolreferenced.s index 6f583d20e2764..6848082690837 100644 --- a/lld/test/ELF/linkerscript/symbolreferenced.s +++ b/lld/test/ELF/linkerscript/symbolreferenced.s @@ -50,6 +50,21 @@ # RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: # ERR-COUNT-3: error: chain2.t:1: symbol not found: undef +## _start in a lazy object file references PROVIDE symbols. We extract _start +## earlier to avoid spurious "symbol not found" errors. +# RUN: llvm-mc -filetype=obj -triple=x86_64 undef.s -o undef.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 start.s -o start.o +# RUN: ld.lld -T chain2.t undef.o --start-lib start.o --end-lib -o lazy +# RUN: llvm-nm lazy | FileCheck %s --check-prefix=LAZY +# RUN: ld.lld -e 0 -T chain2.t --undefined-glob '_start*' undef.o --start-lib start.o --end-lib -o lazy +# RUN: llvm-nm lazy | FileCheck %s --check-prefix=LAZY + +# LAZY: T _start +# LAZY-NEXT: t f1 +# LAZY-NEXT: T f2 +# LAZY-NEXT: T newsym +# LAZY-NEXT: T unde + #--- a.s .global _start _start: @@ -89,3 +104,13 @@ PROVIDE(newsym = f1); PROVIDE(f2 = undef); PROVIDE_HIDDEN(f1 = f2); PROVIDE(newsym = f1); + +#--- undef.s +.globl undef +undef: ret + +#--- start.s +.globl _start +_start: ret +.data +.quad newsym diff --git a/lld/test/ELF/pack-dyn-relocs.s b/lld/test/ELF/pack-dyn-relocs.s index 733ddd4ecad39..dd5d366ae2344 100644 --- a/lld/test/ELF/pack-dyn-relocs.s +++ b/lld/test/ELF/pack-dyn-relocs.s @@ -198,24 +198,24 @@ // RUN: llvm-readobj -r %t2.a64 | FileCheck --check-prefix=UNPACKED64 %s // UNPACKED64: Section ({{.+}}) .rela.dyn { -// UNPACKED64-NEXT: 0x30690 R_AARCH64_RELATIVE - 0x0 -// UNPACKED64-NEXT: 0x30698 R_AARCH64_RELATIVE - 0x1 +// UNPACKED64-NEXT: 0x30690 R_AARCH64_RELATIVE - 0x30690 +// UNPACKED64-NEXT: 0x30698 R_AARCH64_RELATIVE - 0x30691 // UNPACKED64-NEXT: 0x306A0 R_AARCH64_RELATIVE - 0x2 // UNPACKED64-NEXT: 0x306A8 R_AARCH64_RELATIVE - 0xFFFFFFFFFFFFFFFF // UNPACKED64-NEXT: 0x306B0 R_AARCH64_RELATIVE - 0x80000000 // UNPACKED64-NEXT: 0x306B8 R_AARCH64_RELATIVE - 0x6 // UNPACKED64-NEXT: 0x306C0 R_AARCH64_RELATIVE - 0x7 -// UNPACKED64-NEXT: 0x306C8 R_AARCH64_RELATIVE - 0x8 +// UNPACKED64-NEXT: 0x306C8 R_AARCH64_RELATIVE - 0x30698 -// UNPACKED64-NEXT: 0x306D8 R_AARCH64_RELATIVE - 0x1 +// UNPACKED64-NEXT: 0x306D8 R_AARCH64_RELATIVE - 0x30691 // UNPACKED64-NEXT: 0x306E0 R_AARCH64_RELATIVE - 0x2 // UNPACKED64-NEXT: 0x306E8 R_AARCH64_RELATIVE - 0x3 // UNPACKED64-NEXT: 0x306F0 R_AARCH64_RELATIVE - 0x4 // UNPACKED64-NEXT: 0x306F8 R_AARCH64_RELATIVE - 0x5 // UNPACKED64-NEXT: 0x30700 R_AARCH64_RELATIVE - 0x6 -// UNPACKED64-NEXT: 0x30708 R_AARCH64_RELATIVE - 0x7 +// UNPACKED64-NEXT: 0x30708 R_AARCH64_RELATIVE - 0x30697 -// UNPACKED64-NEXT: 0x30720 R_AARCH64_RELATIVE - 0x1 +// UNPACKED64-NEXT: 0x30720 R_AARCH64_RELATIVE - 0x30691 // UNPACKED64-NEXT: 0x30728 R_AARCH64_RELATIVE - 0x2 // UNPACKED64-NEXT: 0x30730 R_AARCH64_RELATIVE - 0x3 // UNPACKED64-NEXT: 0x30738 R_AARCH64_RELATIVE - 0x4 @@ -223,9 +223,9 @@ // UNPACKED64-NEXT: 0x30748 R_AARCH64_RELATIVE - 0x6 // UNPACKED64-NEXT: 0x30750 R_AARCH64_RELATIVE - 0x7 // UNPACKED64-NEXT: 0x30758 R_AARCH64_RELATIVE - 0x8 -// UNPACKED64-NEXT: 0x30760 R_AARCH64_RELATIVE - 0x9 +// UNPACKED64-NEXT: 0x30760 R_AARCH64_RELATIVE - 0x30699 -// UNPACKED64-NEXT: 0x30769 R_AARCH64_RELATIVE - 0xA +// UNPACKED64-NEXT: 0x30769 R_AARCH64_RELATIVE - 0x3069A // UNPACKED64-NEXT: 0x306D0 R_AARCH64_ABS64 bar2 0x1 // UNPACKED64-NEXT: 0x30718 R_AARCH64_ABS64 bar2 0x0 @@ -247,7 +247,7 @@ // ANDROID64: (DEBUG) 0x0 // ANDROID64-NEXT: (ANDROID_RELA) 0x[[#ANDROID]] -// ANDROID64-NEXT: (ANDROID_RELASZ) 122 (bytes) +// ANDROID64-NEXT: (ANDROID_RELASZ) 136 (bytes) // ANDROID64-NEXT: (RELAENT) 24 (bytes) // ANDROID64-HEADERS: 0x0000000060000011 ANDROID_RELA [[ADDR]] @@ -255,39 +255,39 @@ // ANDROID64: Relocation section '.rela.dyn' at offset {{.*}} contains 33 entries: // ANDROID64-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend -// ANDROID64-NEXT: 00000000000303e8 0000000000000403 R_AARCH64_RELATIVE 0 -// ANDROID64-NEXT: 00000000000303f0 0000000000000403 R_AARCH64_RELATIVE 1 -// ANDROID64-NEXT: 00000000000303f8 0000000000000403 R_AARCH64_RELATIVE 2 -// ANDROID64-NEXT: 0000000000030400 0000000000000403 R_AARCH64_RELATIVE ffffffffffffffff -// ANDROID64-NEXT: 0000000000030408 0000000000000403 R_AARCH64_RELATIVE 80000000 -// ANDROID64-NEXT: 0000000000030410 0000000000000403 R_AARCH64_RELATIVE 6 -// ANDROID64-NEXT: 0000000000030418 0000000000000403 R_AARCH64_RELATIVE 7 -// ANDROID64-NEXT: 0000000000030420 0000000000000403 R_AARCH64_RELATIVE 8 -// ANDROID64-NEXT: 0000000000030478 0000000000000403 R_AARCH64_RELATIVE 1 -// ANDROID64-NEXT: 0000000000030480 0000000000000403 R_AARCH64_RELATIVE 2 -// ANDROID64-NEXT: 0000000000030488 0000000000000403 R_AARCH64_RELATIVE 3 -// ANDROID64-NEXT: 0000000000030490 0000000000000403 R_AARCH64_RELATIVE 4 -// ANDROID64-NEXT: 0000000000030498 0000000000000403 R_AARCH64_RELATIVE 5 -// ANDROID64-NEXT: 00000000000304a0 0000000000000403 R_AARCH64_RELATIVE 6 -// ANDROID64-NEXT: 00000000000304a8 0000000000000403 R_AARCH64_RELATIVE 7 -// ANDROID64-NEXT: 00000000000304b0 0000000000000403 R_AARCH64_RELATIVE 8 -// ANDROID64-NEXT: 00000000000304b8 0000000000000403 R_AARCH64_RELATIVE 9 -// ANDROID64-NEXT: 0000000000030430 0000000000000403 R_AARCH64_RELATIVE 1 -// ANDROID64-NEXT: 0000000000030438 0000000000000403 R_AARCH64_RELATIVE 2 -// ANDROID64-NEXT: 0000000000030440 0000000000000403 R_AARCH64_RELATIVE 3 -// ANDROID64-NEXT: 0000000000030448 0000000000000403 R_AARCH64_RELATIVE 4 -// ANDROID64-NEXT: 0000000000030450 0000000000000403 R_AARCH64_RELATIVE 5 -// ANDROID64-NEXT: 0000000000030458 0000000000000403 R_AARCH64_RELATIVE 6 -// ANDROID64-NEXT: 0000000000030460 0000000000000403 R_AARCH64_RELATIVE 7 -// ANDROID64-NEXT: 00000000000304c1 0000000000000403 R_AARCH64_RELATIVE a -// ANDROID64-NEXT: 0000000000030470 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 -// ANDROID64-NEXT: 00000000000304c9 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 +// ANDROID64-NEXT: 00000000000303f0 0000000000000403 R_AARCH64_RELATIVE 303f0 +// ANDROID64-NEXT: 00000000000303f8 0000000000000403 R_AARCH64_RELATIVE 303f1 +// ANDROID64-NEXT: 0000000000030400 0000000000000403 R_AARCH64_RELATIVE 2 +// ANDROID64-NEXT: 0000000000030408 0000000000000403 R_AARCH64_RELATIVE ffffffffffffffff +// ANDROID64-NEXT: 0000000000030410 0000000000000403 R_AARCH64_RELATIVE 80000000 +// ANDROID64-NEXT: 0000000000030418 0000000000000403 R_AARCH64_RELATIVE 6 +// ANDROID64-NEXT: 0000000000030420 0000000000000403 R_AARCH64_RELATIVE 7 +// ANDROID64-NEXT: 0000000000030428 0000000000000403 R_AARCH64_RELATIVE 303f8 +// ANDROID64-NEXT: 0000000000030480 0000000000000403 R_AARCH64_RELATIVE 303f1 +// ANDROID64-NEXT: 0000000000030488 0000000000000403 R_AARCH64_RELATIVE 2 +// ANDROID64-NEXT: 0000000000030490 0000000000000403 R_AARCH64_RELATIVE 3 +// ANDROID64-NEXT: 0000000000030498 0000000000000403 R_AARCH64_RELATIVE 4 +// ANDROID64-NEXT: 00000000000304a0 0000000000000403 R_AARCH64_RELATIVE 5 +// ANDROID64-NEXT: 00000000000304a8 0000000000000403 R_AARCH64_RELATIVE 6 +// ANDROID64-NEXT: 00000000000304b0 0000000000000403 R_AARCH64_RELATIVE 7 +// ANDROID64-NEXT: 00000000000304b8 0000000000000403 R_AARCH64_RELATIVE 8 +// ANDROID64-NEXT: 00000000000304c0 0000000000000403 R_AARCH64_RELATIVE 303f9 +// ANDROID64-NEXT: 0000000000030438 0000000000000403 R_AARCH64_RELATIVE 303f1 +// ANDROID64-NEXT: 0000000000030440 0000000000000403 R_AARCH64_RELATIVE 2 +// ANDROID64-NEXT: 0000000000030448 0000000000000403 R_AARCH64_RELATIVE 3 +// ANDROID64-NEXT: 0000000000030450 0000000000000403 R_AARCH64_RELATIVE 4 +// ANDROID64-NEXT: 0000000000030458 0000000000000403 R_AARCH64_RELATIVE 5 +// ANDROID64-NEXT: 0000000000030460 0000000000000403 R_AARCH64_RELATIVE 6 +// ANDROID64-NEXT: 0000000000030468 0000000000000403 R_AARCH64_RELATIVE 303f7 +// ANDROID64-NEXT: 00000000000304c9 0000000000000403 R_AARCH64_RELATIVE 303fa +// ANDROID64-NEXT: 0000000000030478 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 // ANDROID64-NEXT: 00000000000304d1 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 -// ANDROID64-NEXT: 00000000000304e9 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 -// ANDROID64-NEXT: 0000000000030428 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 1 -// ANDROID64-NEXT: 0000000000030468 0000000200000101 R_AARCH64_ABS64 0000000000000000 zed2 + 0 -// ANDROID64-NEXT: 00000000000304d9 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 1 +// ANDROID64-NEXT: 00000000000304d9 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 +// ANDROID64-NEXT: 00000000000304f1 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 +// ANDROID64-NEXT: 0000000000030430 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 1 +// ANDROID64-NEXT: 0000000000030470 0000000200000101 R_AARCH64_ABS64 0000000000000000 zed2 + 0 // ANDROID64-NEXT: 00000000000304e1 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 1 +// ANDROID64-NEXT: 00000000000304e9 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 1 // ANDROID64-EMPTY: // RUN: ld.lld -pie --pack-dyn-relocs=relr %t.a64.o %t.a64.so -o %t4.a64 @@ -317,7 +317,7 @@ /// Any relative relocations with odd offset stay in SHT_RELA. // RELR64: Relocation section '.rela.dyn' at offset {{.*}} contains 9 entries: // RELR64-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend -// RELR64-NEXT: 0000000000030569 0000000000000403 R_AARCH64_RELATIVE a +// RELR64-NEXT: 0000000000030569 0000000000000403 R_AARCH64_RELATIVE 3049a // RELR64-NEXT: 00000000000304d0 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 1 // RELR64-NEXT: 0000000000030518 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 // RELR64-NEXT: 0000000000030571 0000000100000101 R_AARCH64_ABS64 0000000000000000 bar2 + 0 @@ -355,33 +355,33 @@ // RELR64-NEXT: 0000000000030560 0000000000000403 R_AARCH64_RELATIVE // RELR64-EMPTY: // RELR64-NEXT: Hex dump of section '.data': -// RELR64-NEXT: 0x00030490 00000000 00000000 01000000 00000000 . +// RELR64-NEXT: 0x00030490 90040300 00000000 91040300 00000000 . // RELR64-NEXT: 0x000304a0 02000000 00000000 ffffffff ffffffff . // RELR64-NEXT: 0x000304b0 00000080 00000000 06000000 00000000 . .data .balign 2 -.dc.a __ehdr_start -.dc.a __ehdr_start + 1 +.dc.a .data +.dc.a .data + 1 .dc.a __ehdr_start + 2 .dc.a __ehdr_start - 1 .dc.a __ehdr_start + 0x80000000 .dc.a __ehdr_start + 6 .dc.a __ehdr_start + 7 -.dc.a __ehdr_start + 8 +.dc.a .data + 8 .dc.a bar2 + 1 -.dc.a __ehdr_start + 1 +.dc.a .data + 1 .dc.a __ehdr_start + 2 .dc.a __ehdr_start + 3 .dc.a __ehdr_start + 4 .dc.a __ehdr_start + 5 .dc.a __ehdr_start + 6 -.dc.a __ehdr_start + 7 +.dc.a .data + 7 .dc.a zed2 .dc.a bar2 -.dc.a __ehdr_start + 1 +.dc.a .data + 1 .dc.a __ehdr_start + 2 .dc.a __ehdr_start + 3 .dc.a __ehdr_start + 4 @@ -389,9 +389,9 @@ .dc.a __ehdr_start + 6 .dc.a __ehdr_start + 7 .dc.a __ehdr_start + 8 -.dc.a __ehdr_start + 9 +.dc.a .data + 9 .byte 00 -.dc.a __ehdr_start + 10 +.dc.a .data + 10 .dc.a bar2 .dc.a bar2 .dc.a bar2 + 1 diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index df7d4d1cc3d67..d5d763b0a4ae1 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -132,11 +132,21 @@ bool link(ArrayRef args, llvm::raw_ostream &stdoutOS, // Create table mapping all options defined in Options.td static constexpr opt::OptTable::Info optInfo[] = { #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) \ - {PREFIX, NAME, HELPTEXT, \ - METAVAR, OPT_##ID, opt::Option::KIND##Class, \ - PARAM, FLAGS, VISIBILITY, \ - OPT_##GROUP, OPT_##ALIAS, ALIASARGS, \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ + VALUES) \ + {PREFIX, \ + NAME, \ + HELPTEXT, \ + HELPTEXTSFORVARIANTS, \ + METAVAR, \ + OPT_##ID, \ + opt::Option::KIND##Class, \ + PARAM, \ + FLAGS, \ + VISIBILITY, \ + OPT_##GROUP, \ + OPT_##ALIAS, \ + ALIASARGS, \ VALUES}, #include "Options.inc" #undef OPTION diff --git a/lldb/bindings/interface/SBMemoryRegionInfoDocstrings.i b/lldb/bindings/interface/SBMemoryRegionInfoDocstrings.i index bd80740f3fdd3..d7c68baf100e2 100644 --- a/lldb/bindings/interface/SBMemoryRegionInfoDocstrings.i +++ b/lldb/bindings/interface/SBMemoryRegionInfoDocstrings.i @@ -2,8 +2,7 @@ "API clients can get information about memory regions in processes." ) lldb::SBMemoryRegionInfo; -%feature("autodoc", " - GetRegionEnd(SBMemoryRegionInfo self) -> lldb::addr_t +%feature("docstring", " Returns whether this memory region has a list of modified (dirty) pages available or not. When calling GetNumDirtyPages(), you will have 0 returned for both \"dirty page list is not known\" and @@ -11,8 +10,7 @@ memory region). You must use this method to disambiguate." ) lldb::SBMemoryRegionInfo::HasDirtyMemoryPageList; -%feature("autodoc", " - GetNumDirtyPages(SBMemoryRegionInfo self) -> uint32_t +%feature("docstring", " Return the number of dirty (modified) memory pages in this memory region, if available. You must use the SBMemoryRegionInfo::HasDirtyMemoryPageList() method to @@ -20,16 +18,14 @@ on the target system can provide this information." ) lldb::SBMemoryRegionInfo::GetNumDirtyPages; -%feature("autodoc", " - GetDirtyPageAddressAtIndex(SBMemoryRegionInfo self, uint32_t idx) -> lldb::addr_t +%feature("docstring", " Return the address of a modified, or dirty, page of memory. If the provided index is out of range, or this memory region does not have dirty page information, LLDB_INVALID_ADDRESS is returned." ) lldb::SBMemoryRegionInfo::GetDirtyPageAddressAtIndex; -%feature("autodoc", " - GetPageSize(SBMemoryRegionInfo self) -> int +%feature("docstring", " Return the size of pages in this memory region. 0 will be returned if this information was unavailable." ) lldb::SBMemoryRegionInfo::GetPageSize(); diff --git a/lldb/bindings/interface/SBProcessDocstrings.i b/lldb/bindings/interface/SBProcessDocstrings.i index 3ee17e0c7f2fb..1b98a79e4f6d3 100644 --- a/lldb/bindings/interface/SBProcessDocstrings.i +++ b/lldb/bindings/interface/SBProcessDocstrings.i @@ -20,18 +20,18 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: " ) lldb::SBProcess; -%feature("autodoc", " +%feature("docstring", " Writes data into the current process's stdin. API client specifies a Python string as the only argument." ) lldb::SBProcess::PutSTDIN; -%feature("autodoc", " +%feature("docstring", " Reads data from the current process's stdout stream. API client specifies the size of the buffer to read data into. It returns the byte buffer in a Python string." ) lldb::SBProcess::GetSTDOUT; -%feature("autodoc", " +%feature("docstring", " Reads data from the current process's stderr stream. API client specifies the size of the buffer to read data into. It returns the byte buffer in a Python string." @@ -47,34 +47,34 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: "See SBTarget.Launch for argument description and usage." ) lldb::SBProcess::RemoteLaunch; -%feature("autodoc", " +%feature("docstring", " Returns the INDEX'th thread from the list of current threads. The index of a thread is only valid for the current stop. For a persistent thread identifier use either the thread ID or the IndexID. See help on SBThread for more details." ) lldb::SBProcess::GetThreadAtIndex; -%feature("autodoc", " +%feature("docstring", " Returns the thread with the given thread ID." ) lldb::SBProcess::GetThreadByID; -%feature("autodoc", " +%feature("docstring", " Returns the thread with the given thread IndexID." ) lldb::SBProcess::GetThreadByIndexID; -%feature("autodoc", " +%feature("docstring", " Returns the currently selected thread." ) lldb::SBProcess::GetSelectedThread; -%feature("autodoc", " +%feature("docstring", " Lazily create a thread on demand through the current OperatingSystem plug-in, if the current OperatingSystem plug-in supports it." ) lldb::SBProcess::CreateOSPluginThread; -%feature("autodoc", " +%feature("docstring", " Returns the process ID of the process." ) lldb::SBProcess::GetProcessID; -%feature("autodoc", " +%feature("docstring", " Returns an integer ID that is guaranteed to be unique across all process instances. This is not the process ID, just a unique integer for comparison and caching purposes." ) lldb::SBProcess::GetUniqueID; @@ -95,7 +95,7 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: will always increase, but may increase by more than one per stop." ) lldb::SBProcess::GetStopID; -%feature("autodoc", " +%feature("docstring", " Reads memory from the current process's address space and removes any traps that may have been inserted into the memory. It returns the byte buffer in a Python string. Example: :: @@ -105,7 +105,7 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: new_bytes = bytearray(content)" ) lldb::SBProcess::ReadMemory; -%feature("autodoc", " +%feature("docstring", " Writes memory to the current process's address space and maintains any traps that might be present due to software breakpoints. Example: :: @@ -116,8 +116,8 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: print('SBProcess.WriteMemory() failed!')" ) lldb::SBProcess::WriteMemory; -%feature("autodoc", " - Reads a NULL terminated C string from the current process's address space. +%feature("docstring", " + Reads a NUL terminated C string from the current process's address space. It returns a python string of the exact length, or truncates the string if the maximum character limit is reached. Example: :: @@ -131,7 +131,7 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: ) lldb::SBProcess::ReadCStringFromMemory; -%feature("autodoc", " +%feature("docstring", " Reads an unsigned integer from memory given a byte size and an address. Returns the unsigned integer that was read. Example: :: @@ -145,7 +145,7 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: ) lldb::SBProcess::ReadUnsignedFromMemory; -%feature("autodoc", " +%feature("docstring", " Reads a pointer from memory from an address and returns the value. Example: :: # Read a pointer from address 0x1000 @@ -158,16 +158,16 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: ) lldb::SBProcess::ReadPointerFromMemory; -%feature("autodoc", " +%feature("docstring", " Returns the implementation object of the process plugin if available. None otherwise." ) lldb::SBProcess::GetScriptedImplementation; -%feature("autodoc", " +%feature("docstring", " Returns the process' extended crash information." ) lldb::SBProcess::GetExtendedCrashInformation; -%feature("autodoc", " +%feature("docstring", " Load the library whose filename is given by image_spec looking in all the paths supplied in the paths argument. If successful, return a token that can be passed to UnloadImage and fill loaded_path with the path that was @@ -175,7 +175,7 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: lldb.LLDB_INVALID_IMAGE_TOKEN." ) lldb::SBProcess::LoadImageUsingPaths; -%feature("autodoc", " +%feature("docstring", " Return the number of different thread-origin extended backtraces this process can support as a uint32_t. When the process is stopped and you have an SBThread, lldb may be @@ -184,12 +184,12 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: queue)." ) lldb::SBProcess::GetNumExtendedBacktraceTypes; -%feature("autodoc", " +%feature("docstring", " Takes an index argument, returns the name of one of the thread-origin extended backtrace methods as a str." ) lldb::SBProcess::GetExtendedBacktraceTypeAtIndex; -%feature("autodoc", " +%feature("docstring", " Get information about the process. Valid process info will only be returned when the process is alive, use IsValid() to check if the info returned is valid. :: @@ -199,7 +199,48 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: process_info.GetProcessID()" ) lldb::SBProcess::GetProcessInfo; -%feature("autodoc", " +%feature("docstring", " + Get the current address mask in this Process of a given type. + There are lldb.eAddressMaskTypeCode and lldb.eAddressMaskTypeData address + masks, and on most Targets, the the Data address mask is more general + because there are no alignment restrictions, as there can be with Code + addresses. + lldb.eAddressMaskTypeAny may be used to get the most general mask. + The bits which are not used for addressing are set to 1 in the returned + mask. + In an unusual environment with different address masks for high and low + memory, this may also be specified. This is uncommon, default is + lldb.eAddressMaskRangeLow." +) lldb::SBProcess::GetAddressMask; + +%feature("docstring", " + Set the current address mask in this Process for a given type, + lldb.eAddressMaskTypeCode or lldb.eAddressMaskTypeData. Bits that are not + used for addressing should be set to 1 in the mask. + When setting all masks, lldb.eAddressMaskTypeAll may be specified. + In an unusual environment with different address masks for high and low + memory, this may also be specified. This is uncommon, default is + lldb.eAddressMaskRangeLow." +) lldb::SBProcess::SetAddressMask; + +%feature("docstring", " + Set the number of low bits relevant for addressing in this Process + for a given type, lldb.eAddressMaskTypeCode or lldb.eAddressMaskTypeData. + When setting all masks, lldb.eAddressMaskTypeAll may be specified. + In an unusual environment with different address masks for high and low + memory, the address range may also be specified. This is uncommon, + default is lldb.eAddressMaskRangeLow." +) lldb::SBProcess::SetAddressableBits; + +%feature("docstring", " + Given a virtual address, clear the bits that are not used for addressing + (and may be used for metadata, memory tagging, point authentication, etc). + By default the most general mask, lldb.eAddressMaskTypeAny is used to + process the address, but lldb.eAddressMaskTypeData and + lldb.eAddressMaskTypeCode may be specified if the type of address is known." +) lldb::SBProcess::FixAddress; + +%feature("docstring", " Allocates a block of memory within the process, with size and access permissions specified in the arguments. The permissions argument is an or-combination of zero or more of @@ -209,11 +250,11 @@ SBProcess supports thread iteration. For example (from test/lldbutil.py), :: lldb.LLDB_INVALID_ADDRESS if the allocation failed." ) lldb::SBProcess::AllocateMemory; -%feature("autodoc", "Get default process broadcaster class name (lldb.process)." +%feature("docstring", "Get default process broadcaster class name (lldb.process)." ) lldb::SBProcess::GetBroadcasterClass; -%feature("autodoc", " +%feature("docstring", " Deallocates the block of memory (previously allocated using AllocateMemory) given in the argument." ) lldb::SBProcess::DeallocateMemory; diff --git a/lldb/bindings/interface/SBQueueDocstrings.i b/lldb/bindings/interface/SBQueueDocstrings.i index fa472d5bed175..c3baf39a299db 100644 --- a/lldb/bindings/interface/SBQueueDocstrings.i +++ b/lldb/bindings/interface/SBQueueDocstrings.i @@ -2,14 +2,14 @@ "Represents a libdispatch queue in the process." ) lldb::SBQueue; -%feature("autodoc", " +%feature("docstring", " Returns an lldb::queue_id_t type unique identifier number for this queue that will not be used by any other queue during this process' execution. These ID numbers often start at 1 with the first system-created queues and increment from there." ) lldb::SBQueue::GetQueueID; -%feature("autodoc", " +%feature("docstring", " Returns an lldb::QueueKind enumerated value (e.g. eQueueKindUnknown, eQueueKindSerial, eQueueKindConcurrent) describing the type of this queue." diff --git a/lldb/bindings/interface/SBThreadDocstrings.i b/lldb/bindings/interface/SBThreadDocstrings.i index f307212f0114e..76822e49c6384 100644 --- a/lldb/bindings/interface/SBThreadDocstrings.i +++ b/lldb/bindings/interface/SBThreadDocstrings.i @@ -55,24 +55,24 @@ See also :py:class:`SBFrame` ." eStopReasonPlanComplete 0" ) lldb::SBThread::GetStopReasonDataAtIndex; -%feature("autodoc", " +%feature("docstring", " Collects a thread's stop reason extended information dictionary and prints it into the SBStream in a JSON format. The format of this JSON dictionary depends on the stop reason and is currently used only for instrumentation plugins." ) lldb::SBThread::GetStopReasonExtendedInfoAsJSON; -%feature("autodoc", " +%feature("docstring", " Returns a collection of historical stack traces that are significant to the current stop reason. Used by ThreadSanitizer, where we provide various stack traces that were involved in a data race or other type of detected issue." ) lldb::SBThread::GetStopReasonExtendedBacktraces; -%feature("autodoc", " +%feature("docstring", " Pass only an (int)length and expect to get a Python string describing the stop reason." ) lldb::SBThread::GetStopDescription; -%feature("autodoc", " +%feature("docstring", " Returns a unique thread identifier (type lldb::tid_t, typically a 64-bit type) for the current SBThread that will remain constant throughout the thread's lifetime in this process and will not be reused by another thread during this @@ -81,7 +81,7 @@ See also :py:class:`SBFrame` ." to associate data from those tools with lldb. See related GetIndexID." ) lldb::SBThread::GetThreadID; -%feature("autodoc", " +%feature("docstring", " Return the index number for this SBThread. The index number is the same thing that a user gives as an argument to 'thread select' in the command line lldb. These numbers start at 1 (for the first thread lldb sees in a debug session) @@ -91,12 +91,12 @@ See also :py:class:`SBFrame` ." This method returns a uint32_t index number, takes no arguments." ) lldb::SBThread::GetIndexID; -%feature("autodoc", " +%feature("docstring", " Return the queue name associated with this thread, if any, as a str. For example, with a libdispatch (aka Grand Central Dispatch) queue." ) lldb::SBThread::GetQueueName; -%feature("autodoc", " +%feature("docstring", " Return the dispatch_queue_id for this thread, if any, as a lldb::queue_id_t. For example, with a libdispatch (aka Grand Central Dispatch) queue." ) lldb::SBThread::GetQueueID; @@ -109,7 +109,7 @@ See also :py:class:`SBFrame` ." anything was printed into the stream (true) or not (false)." ) lldb::SBThread::GetInfoItemByPathAsString; -%feature("autodoc", " +%feature("docstring", " Return the SBQueue for this thread. If this thread is not currently associated with a libdispatch queue, the SBQueue object's IsValid() method will return false. If this SBThread is actually a HistoryThread, we may be able to provide QueueID @@ -141,14 +141,14 @@ See also :py:class:`SBFrame` ." "Do an instruction level single step in the currently selected thread." ) lldb::SBThread::StepInstruction; -%feature("autodoc", " +%feature("docstring", " Force a return from the frame passed in (and any frames younger than it) without executing any more code in those frames. If return_value contains a valid SBValue, that will be set as the return value from frame. Note, at present only scalar return values are supported." ) lldb::SBThread::ReturnFromFrame; -%feature("autodoc", " +%feature("docstring", " Unwind the stack frames from the innermost expression evaluation. This API is equivalent to 'thread return -x'." ) lldb::SBThread::UnwindInnermostExpression; @@ -181,7 +181,7 @@ See also :py:class:`SBFrame` ." or thread-stop-format (stop_format = true)." ) lldb::SBThread::GetDescription; -%feature("autodoc"," +%feature("docstring"," Given an argument of str to specify the type of thread-origin extended backtrace to retrieve, query whether the origin of this thread is available. An SBThread is retured; SBThread.IsValid will return true @@ -192,8 +192,7 @@ See also :py:class:`SBFrame` ." the returned thread's own thread origin in turn." ) lldb::SBThread::GetExtendedBacktraceThread; -%feature("autodoc"," - Takes no arguments, returns a uint32_t. +%feature("docstring"," If this SBThread is an ExtendedBacktrace thread, get the IndexID of the original thread that this ExtendedBacktrace thread represents, if available. The thread that was running this backtrace in the past may @@ -202,29 +201,28 @@ See also :py:class:`SBFrame` ." In that case, this ExtendedBacktrace thread's IndexID will be returned." ) lldb::SBThread::GetExtendedBacktraceOriginatingIndexID; -%feature("autodoc"," +%feature("docstring"," Returns an SBValue object represeting the current exception for the thread, if there is any. Currently, this works for Obj-C code and returns an SBValue representing the NSException object at the throw site or that's currently being processes." ) lldb::SBThread::GetCurrentException; -%feature("autodoc"," +%feature("docstring"," Returns a historical (fake) SBThread representing the stack trace of an exception, if there is one for the thread. Currently, this works for Obj-C code, and can retrieve the throw-site backtrace of an NSException object even when the program is no longer at the throw site." ) lldb::SBThread::GetCurrentExceptionBacktrace; -%feature("autodoc"," - Takes no arguments, returns a bool. +%feature("docstring"," lldb may be able to detect that function calls should not be executed on a given thread at a particular point in time. It is recommended that this is checked before performing an inferior function call on a given thread." ) lldb::SBThread::SafeToCallFunctions; -%feature("autodoc"," - Retruns a SBValue object representing the siginfo for the current signal. +%feature("docstring"," + Returns a SBValue object representing the siginfo for the current signal. " ) lldb::SBThread::GetSiginfo; diff --git a/lldb/cmake/caches/Apple-lldb-Linux.cmake b/lldb/cmake/caches/Apple-lldb-Linux.cmake index b2d3cf595fe18..bfa660d8654b7 100644 --- a/lldb/cmake/caches/Apple-lldb-Linux.cmake +++ b/lldb/cmake/caches/Apple-lldb-Linux.cmake @@ -1,4 +1,5 @@ include(${CMAKE_CURRENT_LIST_DIR}/Apple-lldb-base.cmake) +set(LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES ON CACHE BOOL "" FORCE) set(LLVM_DISTRIBUTION_COMPONENTS lldb diff --git a/lldb/cmake/caches/Apple-lldb-base.cmake b/lldb/cmake/caches/Apple-lldb-base.cmake index 4d4f02bfae95b..6c3fa4346d7e8 100644 --- a/lldb/cmake/caches/Apple-lldb-base.cmake +++ b/lldb/cmake/caches/Apple-lldb-base.cmake @@ -3,6 +3,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "") set(LLVM_TARGETS_TO_BUILD X86;ARM;AArch64 CACHE STRING "") set(LLVM_ENABLE_ASSERTIONS ON CACHE BOOL "") +set(LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES OFF CACHE BOOL "") set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "") set(LIBCXX_ENABLE_STATIC OFF CACHE BOOL "") diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index 81729b5f65dbe..3758fd4d81b56 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -57,6 +57,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/TypedPointerType.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -1980,13 +1981,14 @@ static void replaceUsesOfBuiltinVar(Value *V, const APInt &AccumulatedOffset, if (auto *Cast = dyn_cast(U)) { replaceUsesOfBuiltinVar(Cast, AccumulatedOffset, ReplacementFunc, GV); InstsToRemove.push_back(Cast); - } else if (auto *GEP = dyn_cast(U)) { + } else if (auto *GEP = dyn_cast(U)) { APInt NewOffset = AccumulatedOffset.sextOrTrunc( DL.getIndexSizeInBits(GEP->getPointerAddressSpace())); if (!GEP->accumulateConstantOffset(DL, NewOffset)) llvm_unreachable("Illegal GEP of a SPIR-V builtin variable"); replaceUsesOfBuiltinVar(GEP, NewOffset, ReplacementFunc, GV); - InstsToRemove.push_back(GEP); + if (auto *AsInst = dyn_cast(U)) + InstsToRemove.push_back(AsInst); } else if (auto *Load = dyn_cast(U)) { // Figure out which index the accumulated offset corresponds to. If we // have a weird offset (e.g., trying to load byte 7), bail out. diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVDecorate.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVDecorate.h index 3cf1728a33f43..99ef788b8a64d 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVDecorate.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVDecorate.h @@ -100,6 +100,7 @@ class SPIRVDecorateGeneric : public SPIRVAnnotationGeneric { case DecorationMaxByteOffset: return VersionNumber::SPIRV_1_1; case DecorationUserSemantic: + case DecorationCounterBuffer: return VersionNumber::SPIRV_1_4; default: diff --git a/llvm-spirv/test/GEPOperator.spvasm b/llvm-spirv/test/GEPOperator.spvasm new file mode 100644 index 0000000000000..d537c46a1afb2 --- /dev/null +++ b/llvm-spirv/test/GEPOperator.spvasm @@ -0,0 +1,55 @@ +; It's possible that access to builtin variable is represented by +; GetElementPtrConstantExpr rather than GetElementPtrInst. This +; test case presents a pattern that results into a ConstantExpr. +; REQUIRES: spirv-as +; RUN: spirv-as --target-env spv1.0 -o %t.spv %s +; RUN: spirv-val %t.spv +; RUN: llvm-spirv -r -o - %t.spv | llvm-dis | FileCheck %s + + OpCapability Kernel + OpCapability Addresses + OpCapability Int64 + OpCapability Int8 + OpCapability GenericPointer + OpCapability Linkage + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %foo "test" + OpDecorate %__spirv_BuiltInWorkgroupSize Constant + OpDecorate %__spirv_BuiltInWorkgroupSize Alignment 32 + OpDecorate %__spirv_BuiltInWorkgroupSize LinkageAttributes "__spirv_BuiltInWorkgroupSize" Import + OpDecorate %__spirv_BuiltInWorkgroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %ulong = OpTypeInt 64 0 + %uint = OpTypeInt 32 0 + %uchar = OpTypeInt 8 0 + %v3ulong = OpTypeVector %ulong 3 + %_ptr_CW_uchar = OpTypePointer CrossWorkgroup %uchar + %_ptr_CW_v3ulong = OpTypePointer CrossWorkgroup %v3ulong + %_ptr_CW_ulong = OpTypePointer CrossWorkgroup %ulong + %foo_type = OpTypeFunction %void + %uint_0 = OpConstant %uint 0 + %ulong_8 = OpConstant %ulong 8 + %ulong_16 = OpConstant %ulong 16 +%__spirv_BuiltInWorkgroupSize = OpVariable %_ptr_CW_v3ulong CrossWorkgroup + %c1 = OpSpecConstantOp %_ptr_CW_uchar InBoundsPtrAccessChain %__spirv_BuiltInWorkgroupSize %uint_0 %ulong_8 + %c2 = OpSpecConstantOp %_ptr_CW_uchar InBoundsPtrAccessChain %__spirv_BuiltInWorkgroupSize %uint_0 %ulong_16 + %foo = OpFunction %void None %foo_type + %entry = OpLabel + %pv0 = OpBitcast %_ptr_CW_ulong %__spirv_BuiltInWorkgroupSize + %v0 = OpLoad %ulong %pv0 Aligned 32 + %pv1 = OpBitcast %_ptr_CW_ulong %c1 + %v1 = OpLoad %ulong %pv1 Aligned 8 + %idx1 = OpIMul %ulong %v0 %v1 + %pv2 = OpBitcast %_ptr_CW_ulong %c2 + %v2 = OpLoad %ulong %pv2 Aligned 16 + %idx2 = OpIMul %ulong %idx1 %v2 + OpReturn + OpFunctionEnd + +; CHECK-NOT: getelementptr +; CHECK-NOT: load +; CHECK: %[[#V0:]] = call spir_func i64 @_Z14get_local_sizej(i32 0) +; CHECK: %[[#V1:]] = call spir_func i64 @_Z14get_local_sizej(i32 8) +; CHECK: %[[#Idx1:]] = mul i64 %[[#V0]], %[[#V1]] +; CHECK: %[[#V2:]] = call spir_func i64 @_Z14get_local_sizej(i32 16) +; CHECK: %[[#Idx2:]] = mul i64 %[[#Idx1]], %[[#V2]] diff --git a/llvm-spirv/test/debug-label-skip.ll b/llvm-spirv/test/debug-label-skip.ll index cd61281f5676d..cf52ea68aaf1b 100644 --- a/llvm-spirv/test/debug-label-skip.ll +++ b/llvm-spirv/test/debug-label-skip.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as %s -o %t.bc +; RUN: llvm-as --preserve-input-debuginfo-format %s -o %t.bc ; RUN: llvm-spirv %t.bc -o %t.spv source_filename = "debug-label-bitcode.c" diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 5273fdc405aca..4b9ffdfa9e191 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -674,6 +674,9 @@ option(LLVM_USE_OPROFILE option(LLVM_EXTERNALIZE_DEBUGINFO "Generate dSYM files and strip executables and libraries (Darwin Only)" OFF) +option(LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES + "Preserve exported symbols in executables" ON) + set(LLVM_CODESIGNING_IDENTITY "" CACHE STRING "Sign executables and dylibs with the given identity or skip if empty (Darwin Only)") @@ -807,6 +810,9 @@ option (LLVM_BUILD_EXTERNAL_COMPILER_RT option (LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO "Show target and host info when tools are invoked with --version." ON) +option(LLVM_VERSION_PRINTER_SHOW_BUILD_CONFIG + "Show the optional build config flags when tools are invoked with --version." ON) + # You can configure which libraries from LLVM you want to include in the # shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited # list of LLVM components. All component names handled by llvm-config are valid. diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 745935f140517..693fd5669f63f 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -258,15 +258,24 @@ if (NOT DEFINED LLVM_LINKER_DETECTED AND NOT WIN32) endif() endif() - # Apple's linker complains about duplicate libraries, which CMake likes to do - # to support ELF platforms. To silence that warning, we can use - # -no_warn_duplicate_libraries, but only in versions of the linker that - # support that flag. - if(NOT LLVM_USE_LINKER AND ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") include(CheckLinkerFlag) - check_linker_flag(C "-Wl,-no_warn_duplicate_libraries" LLVM_LINKER_SUPPORTS_NO_WARN_DUPLICATE_LIBRARIES) - else() - set(LLVM_LINKER_SUPPORTS_NO_WARN_DUPLICATE_LIBRARIES OFF CACHE INTERNAL "") + # Linkers that support Darwin allow a setting to internalize all symbol exports, + # aiding in reducing binary size and often is applicable for executables. + check_linker_flag(C "-Wl,-no_exported_symbols" LLVM_LINKER_SUPPORTS_NO_EXPORTED_SYMBOLS) + + if (NOT LLVM_USE_LINKER) + # Apple's linker complains about duplicate libraries, which CMake likes to do + # to support ELF platforms. To silence that warning, we can use + # -no_warn_duplicate_libraries, but only in versions of the linker that + # support that flag. + check_linker_flag(C "-Wl,-no_warn_duplicate_libraries" LLVM_LINKER_SUPPORTS_NO_WARN_DUPLICATE_LIBRARIES) + else() + set(LLVM_LINKER_SUPPORTS_NO_WARN_DUPLICATE_LIBRARIES OFF CACHE INTERNAL "") + endif() + + else() + set(LLVM_LINKER_SUPPORTS_NO_EXPORTED_SYMBOLS OFF CACHE INTERNAL "") endif() endif() @@ -1029,6 +1038,17 @@ macro(add_llvm_executable name) add_llvm_symbol_exports( ${name} ${LLVM_EXPORTED_SYMBOL_FILE} ) endif(LLVM_EXPORTED_SYMBOL_FILE) + if (DEFINED LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES AND + NOT LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES) + if(LLVM_LINKER_SUPPORTS_NO_EXPORTED_SYMBOLS) + set_property(TARGET ${name} APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,-no_exported_symbols") + else() + message(FATAL_ERROR + "LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES cannot be disabled when linker does not support \"-no_exported_symbols\"") + endif() + endif() + if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB) set(USE_SHARED USE_SHARED) endif() diff --git a/llvm/docs/Benchmarking.rst b/llvm/docs/Benchmarking.rst index 0f88db745a686..7deb221446067 100644 --- a/llvm/docs/Benchmarking.rst +++ b/llvm/docs/Benchmarking.rst @@ -10,9 +10,9 @@ For benchmarking a patch we want to reduce all possible sources of noise as much as possible. How to do that is very OS dependent. Note that low noise is required, but not sufficient. It does not -exclude measurement bias. See -https://www.cis.upenn.edu/~cis501/papers/producing-wrong-data.pdf for -example. +exclude measurement bias. +See `"Producing Wrong Data Without Doing Anything Obviously Wrong!" by Mytkowicz, Diwan, Hauswith and Sweeney (ASPLOS 2009) `_ +for example. General ================================ diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst index d2f66d71d39af..f7f8ed2e1a852 100644 --- a/llvm/docs/CMake.rst +++ b/llvm/docs/CMake.rst @@ -283,14 +283,14 @@ manual, or execute ``cmake --help-variable VARIABLE_NAME``. The path to install executables, relative to the *CMAKE_INSTALL_PREFIX*. Defaults to "bin". -**CMAKE_INSTALL_INCLUDEDIR**:PATH - The path to install header files, relative to the *CMAKE_INSTALL_PREFIX*. - Defaults to "include". - **CMAKE_INSTALL_DOCDIR**:PATH The path to install documentation, relative to the *CMAKE_INSTALL_PREFIX*. Defaults to "share/doc". +**CMAKE_INSTALL_INCLUDEDIR**:PATH + The path to install header files, relative to the *CMAKE_INSTALL_PREFIX*. + Defaults to "include". + **CMAKE_INSTALL_MANDIR**:PATH The path to install manpage files, relative to the *CMAKE_INSTALL_PREFIX*. Defaults to "share/man". @@ -328,12 +328,6 @@ enabled sub-projects. Nearly all of these variable names begin with allows for them to be specified as values in CMAKE_BUILD_TYPE without encountering a fatal error during the configuration process. -**LLVM_UNREACHABLE_OPTIMIZE**:BOOL - This flag controls the behavior of `llvm_unreachable()` in release build - (when assertions are disabled in general). When ON (default) then - `llvm_unreachable()` is considered "undefined behavior" and optimized as - such. When OFF it is instead replaced with a guaranteed "trap". - **LLVM_APPEND_VC_REV**:BOOL Embed version control revision info (Git revision id). The version info is provided by the ``LLVM_REVISION`` macro in @@ -341,15 +335,15 @@ enabled sub-projects. Nearly all of these variable names begin with need revision info can disable this option to avoid re-linking most binaries after a branch switch. Defaults to ON. +**LLVM_FORCE_VC_REPOSITORY**:STRING + Set the git repository to include in version info rather than calling git to + determine it. + **LLVM_FORCE_VC_REVISION**:STRING Force a specific Git revision id rather than calling to git to determine it. This is useful in environments where git is not available or non-functional but the VC revision is available through other means. -**LLVM_FORCE_VC_REPOSITORY**:STRING - Set the git repository to include in version info rather than calling git to - determine it. - **LLVM_BUILD_32_BITS**:BOOL Build 32-bit executables and libraries on 64-bit systems. This option is available only on some 64-bit Unix systems. Defaults to OFF. @@ -381,22 +375,6 @@ enabled sub-projects. Nearly all of these variable names begin with *LLVM_CODE_COVERAGE_TARGETS* and *LLVM_COVERAGE_SOURCE_DIRS* for more information on configuring code coverage reports. -**LLVM_CODE_COVERAGE_TARGETS**:STRING - If set to a semicolon separated list of targets, those targets will be used - to drive the code coverage reports. If unset, the target list will be - constructed using the LLVM build's CMake export list. - -**LLVM_COVERAGE_SOURCE_DIRS**:STRING - If set to a semicolon separated list of directories, the coverage reports - will limit code coverage summaries to just the listed directories. If unset, - coverage reports will include all sources identified by the tooling. - -**LLVM_INDIVIDUAL_TEST_COVERAGE**:BOOL - Enable individual test case coverage. When set to ON, code coverage data for - each test case will be generated and stored in a separate directory under the - config.test_exec_root path. This feature allows code coverage analysis of each - individual test case. Defaults to OFF. - **LLVM_BUILD_LLVM_DYLIB**:BOOL If enabled, the target for building the libLLVM shared library is added. This library contains all of LLVM's components in a single shared library. @@ -429,15 +407,22 @@ enabled sub-projects. Nearly all of these variable names begin with options, which are passed to the CCACHE_MAXSIZE and CCACHE_DIR environment variables, respectively. +**LLVM_CODE_COVERAGE_TARGETS**:STRING + If set to a semicolon separated list of targets, those targets will be used + to drive the code coverage reports. If unset, the target list will be + constructed using the LLVM build's CMake export list. + +**LLVM_COVERAGE_SOURCE_DIRS**:STRING + If set to a semicolon separated list of directories, the coverage reports + will limit code coverage summaries to just the listed directories. If unset, + coverage reports will include all sources identified by the tooling. + **LLVM_CREATE_XCODE_TOOLCHAIN**:BOOL macOS Only: If enabled CMake will generate a target named 'install-xcode-toolchain'. This target will create a directory at $CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can be used to override the default system tools. -**LLVM__LINKER_FLAGS**:STRING - Defines the set of linker flags that should be applied to a . - **LLVM_DEFAULT_TARGET_TRIPLE**:STRING LLVM target to use for code generation when no target is explicitly specified. It defaults to "host", meaning that it shall pick the architecture @@ -514,11 +499,6 @@ enabled sub-projects. Nearly all of these variable names begin with **LLVM_ENABLE_EXPENSIVE_CHECKS**:BOOL Enable additional time/memory expensive checking. Defaults to OFF. -**LLVM_ENABLE_HTTPLIB**:BOOL - Enables the optional cpp-httplib dependency which is used by llvm-debuginfod - to serve debug info over HTTP. `cpp-httplib `_ - must be installed, or `httplib_ROOT` must be set. Defaults to OFF. - **LLVM_ENABLE_FFI**:BOOL Indicates whether the LLVM Interpreter will be linked with the Foreign Function Interface library (libffi) in order to enable calling external functions. @@ -527,6 +507,11 @@ enabled sub-projects. Nearly all of these variable names begin with FFI_LIBRARY_DIR to the directories where ffi.h and libffi.so can be found, respectively. Defaults to OFF. +**LLVM_ENABLE_HTTPLIB**:BOOL + Enables the optional cpp-httplib dependency which is used by llvm-debuginfod + to serve debug info over HTTP. `cpp-httplib `_ + must be installed, or `httplib_ROOT` must be set. Defaults to OFF. + **LLVM_ENABLE_IDE**:BOOL Tell the build system that an IDE is being used. This in turn disables the creation of certain convenience build system targets, such as the various @@ -539,11 +524,6 @@ enabled sub-projects. Nearly all of these variable names begin with passed to invocations of both so that the project is built using libc++ instead of stdlibc++. Defaults to OFF. -**LLVM_ENABLE_LLVM_LIBC**: BOOL - If the LLVM libc overlay is installed in a location where the host linker - can access it, all built executables will be linked against the LLVM libc - overlay before linking against the system libc. Defaults to OFF. - **LLVM_ENABLE_LIBPFM**:BOOL Enable building with libpfm to support hardware counter measurements in LLVM tools. @@ -554,6 +534,11 @@ enabled sub-projects. Nearly all of these variable names begin with build where a dependency is added from the first stage to the second ensuring that lld is built before stage2 begins. +**LLVM_ENABLE_LLVM_LIBC**: BOOL + If the LLVM libc overlay is installed in a location where the host linker + can access it, all built executables will be linked against the LLVM libc + overlay before linking against the system libc. Defaults to OFF. + **LLVM_ENABLE_LTO**:STRING Add ``-flto`` or ``-flto=`` flags to the compile and link command lines, enabling link-time optimization. Possible values are ``Off``, @@ -581,6 +566,9 @@ enabled sub-projects. Nearly all of these variable names begin with The full list is: ``clang;clang-tools-extra;cross-project-tests;libc;libclc;lld;lldb;openmp;polly;pstl`` +**LLVM_ENABLE_RTTI**:BOOL + Build LLVM with run-time type information. Defaults to OFF. + **LLVM_ENABLE_RUNTIMES**:STRING Build libc++, libc++abi, libunwind or compiler-rt using the just-built compiler. This is the correct way to build runtimes when putting together a toolchain. @@ -593,10 +581,6 @@ enabled sub-projects. Nearly all of these variable names begin with To enable all of them, use: ``LLVM_ENABLE_RUNTIMES=all`` - -**LLVM_ENABLE_RTTI**:BOOL - Build LLVM with run-time type information. Defaults to OFF. - **LLVM_ENABLE_SPHINX**:BOOL If specified, CMake will search for the ``sphinx-build`` executable and will make the ``SPHINX_OUTPUT_HTML`` and ``SPHINX_OUTPUT_MAN`` CMake options available. @@ -634,14 +618,6 @@ enabled sub-projects. Nearly all of these variable names begin with llvm. This will build the experimental target without needing it to add to the list of all the targets available in the LLVM's main CMakeLists.txt. -**LLVM_EXTERNAL_{CLANG,LLD,POLLY}_SOURCE_DIR**:PATH - These variables specify the path to the source directory for the external - LLVM projects Clang, lld, and Polly, respectively, relative to the top-level - source directory. If the in-tree subdirectory for an external project - exists (e.g., llvm/tools/clang for Clang), then the corresponding variable - will not be used. If the variable for an external project does not point - to a valid path, then that project will not be built. - **LLVM_EXTERNAL_PROJECTS**:STRING Semicolon-separated list of additional external projects to build as part of llvm. For each project LLVM_EXTERNAL__SOURCE_DIR have to be specified @@ -650,10 +626,23 @@ enabled sub-projects. Nearly all of these variable names begin with -DLLVM_EXTERNAL_FOO_SOURCE_DIR=/src/foo -DLLVM_EXTERNAL_BAR_SOURCE_DIR=/src/bar``. +**LLVM_EXTERNAL_{CLANG,LLD,POLLY}_SOURCE_DIR**:PATH + These variables specify the path to the source directory for the external + LLVM projects Clang, lld, and Polly, respectively, relative to the top-level + source directory. If the in-tree subdirectory for an external project + exists (e.g., llvm/tools/clang for Clang), then the corresponding variable + will not be used. If the variable for an external project does not point + to a valid path, then that project will not be built. + **LLVM_EXTERNALIZE_DEBUGINFO**:BOOL Generate dSYM files and strip executables and libraries (Darwin Only). Defaults to OFF. +**LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES**:BOOL + When building executables, preserve symbol exports. Defaults to ON. + You can use this option to disable exported symbols from all + executables (Darwin Only). + **LLVM_FORCE_USE_OLD_TOOLCHAIN**:BOOL If enabled, the compiler and standard library versions won't be checked. LLVM may not compile at all, or might fail at runtime due to known bugs in these @@ -675,6 +664,12 @@ enabled sub-projects. Nearly all of these variable names begin with Generate build targets for the LLVM tools. Defaults to ON. You can use this option to disable the generation of build targets for the LLVM tools. +**LLVM_INDIVIDUAL_TEST_COVERAGE**:BOOL + Enable individual test case coverage. When set to ON, code coverage data for + each test case will be generated and stored in a separate directory under the + config.test_exec_root path. This feature allows code coverage analysis of each + individual test case. Defaults to OFF. + **LLVM_INSTALL_BINUTILS_SYMLINKS**:BOOL Install symlinks from the binutils tool names to the corresponding LLVM tools. For example, ar will be symlinked to llvm-ar. @@ -697,6 +692,11 @@ enabled sub-projects. Nearly all of these variable names begin with If enabled, utility binaries like ``FileCheck`` and ``not`` will be installed to CMAKE_INSTALL_PREFIX. +**LLVM_INSTALL_DOXYGEN_HTML_DIR**:STRING + The path to install Doxygen-generated HTML documentation to. This path can + either be absolute or relative to the *CMAKE_INSTALL_PREFIX*. Defaults to + ``${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html``. + **LLVM_INTEGRATED_CRT_ALLOC**:PATH On Windows, allows embedding a different C runtime allocator into the LLVM tools and libraries. Using a lock-free allocator such as the ones listed below @@ -713,17 +713,15 @@ enabled sub-projects. Nearly all of these variable names begin with This flag needs to be used along with the static CRT, ie. if building the Release target, add -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded. -**LLVM_INSTALL_DOXYGEN_HTML_DIR**:STRING - The path to install Doxygen-generated HTML documentation to. This path can - either be absolute or relative to the *CMAKE_INSTALL_PREFIX*. Defaults to - ``${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html``. - **LLVM_LINK_LLVM_DYLIB**:BOOL If enabled, tools will be linked with the libLLVM shared library. Defaults to OFF. Setting LLVM_LINK_LLVM_DYLIB to ON also sets LLVM_BUILD_LLVM_DYLIB to ON. This option is not available on Windows. +**LLVM__LINKER_FLAGS**:STRING + Defines the set of linker flags that should be applied to a . + **LLVM_LIT_ARGS**:STRING Arguments given to lit. ``make check`` and ``make clang-test`` are affected. By default, ``'-sv --no-progress-bar'`` on Visual C++ and Xcode, ``'-sv'`` on @@ -765,6 +763,10 @@ enabled sub-projects. Nearly all of these variable names begin with **LLVM_PARALLEL_TABLEGEN_JOBS**:STRING Define the maximum number of concurrent tablegen jobs. +**LLVM_PROFDATA_FILE**:PATH + Path to a profdata file to pass into clang's -fprofile-instr-use flag. This + can only be specified if you're building with clang. + **LLVM_RAM_PER_COMPILE_JOB**:STRING Calculates the amount of Ninja compile jobs according to available resources. Value has to be in MB, overwrites LLVM_PARALLEL_COMPILE_JOBS. Compile jobs @@ -783,10 +785,6 @@ enabled sub-projects. Nearly all of these variable names begin with Value has to be in MB, overwrites LLVM_PARALLEL_TABLEGEN_JOBS. Tablegen jobs will be between one and amount of logical cores. -**LLVM_PROFDATA_FILE**:PATH - Path to a profdata file to pass into clang's -fprofile-instr-use flag. This - can only be specified if you're building with clang. - **LLVM_REVERSE_ITERATION**:BOOL If enabled, all supported unordered llvm containers would be iterated in reverse order. This is useful for uncovering non-determinism caused by @@ -824,6 +822,12 @@ enabled sub-projects. Nearly all of these variable names begin with ``LLVM_USE_SANITIZER`` contains ``Undefined``. This can be used to override the default set of UBSan flags. +**LLVM_UNREACHABLE_OPTIMIZE**:BOOL + This flag controls the behavior of `llvm_unreachable()` in release build + (when assertions are disabled in general). When ON (default) then + `llvm_unreachable()` is considered "undefined behavior" and optimized as + such. When OFF it is instead replaced with a guaranteed "trap". + **LLVM_USE_INTEL_JITEVENTS**:BOOL Enable building support for Intel JIT Events API. Defaults to OFF. @@ -887,6 +891,11 @@ Advanced variables These are niche, and changing them from their defaults is more likely to cause things to go wrong. They are also unstable across LLVM versions. +**LLVM_EXAMPLES_INSTALL_DIR**:STRING + The path for examples of using LLVM, relative to the *CMAKE_INSTALL_PREFIX*. + Only matters if *LLVM_BUILD_EXAMPLES* is enabled. + Defaults to "examples". + **LLVM_TOOLS_INSTALL_DIR**:STRING The path to install the main LLVM tools, relative to the *CMAKE_INSTALL_PREFIX*. Defaults to *CMAKE_INSTALL_BINDIR*. @@ -896,11 +905,6 @@ things to go wrong. They are also unstable across LLVM versions. Only matters if *LLVM_INSTALL_UTILS* is enabled. Defaults to *LLVM_TOOLS_INSTALL_DIR*. -**LLVM_EXAMPLES_INSTALL_DIR**:STRING - The path for examples of using LLVM, relative to the *CMAKE_INSTALL_PREFIX*. - Only matters if *LLVM_BUILD_EXAMPLES* is enabled. - Defaults to "examples". - CMake Caches ============ diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3cd6ef5bab627..2ef84a9f0fbe2 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5557,6 +5557,8 @@ RISC-V: Sparc: +- ``L``: Print the low-order register of a two-register operand. +- ``H``: Print the high-order register of a two-register operand. - ``r``: No effect. SystemZ: @@ -11110,11 +11112,12 @@ For most of these operations, the type of '' must be an integer type whose bit width is a power of two greater than or equal to eight and less than or equal to a target-specific size limit. For xchg, this may also be a floating point or a pointer type with the same size constraints -as integers. For fadd/fsub/fmax/fmin, this must be a floating point type. The -type of the '````' operand must be a pointer to that type. If -the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not -allowed to modify the number or order of execution of this -``atomicrmw`` with other :ref:`volatile operations `. +as integers. For fadd/fsub/fmax/fmin, this must be a floating-point +or fixed vector of floating-point type. The type of the '````' +operand must be a pointer to that type. If the ``atomicrmw`` is marked +as ``volatile``, then the optimizer is not allowed to modify the +number or order of execution of this ``atomicrmw`` with other +:ref:`volatile operations `. Note: if the alignment is not greater or equal to the size of the `` type, the atomic operation is likely to require a lock and have poor @@ -11445,7 +11448,7 @@ and converts the remaining bits to ``ty2``. Since the source size must be larger than the destination size, ``trunc`` cannot be a *no-op cast*. It will always truncate bits. -If the ``nuw`` keyword is present, and any of the truncated bits are zero, +If the ``nuw`` keyword is present, and any of the truncated bits are non-zero, the result is a :ref:`poison value `. If the ``nsw`` keyword is present, and any of the truncated bits are not the same as the top bit of the truncation result, the result is a :ref:`poison value `. @@ -11748,6 +11751,10 @@ Overview: The '``uitofp``' instruction regards ``value`` as an unsigned integer and converts that value to the ``ty2`` type. +The ``nneg`` (non-negative) flag, if present, specifies that the +operand is non-negative. This property may be used by optimization +passes to later convert the ``uitofp`` into a ``sitofp``. + Arguments: """""""""" @@ -11765,6 +11772,9 @@ integer quantity and converts it to the corresponding floating-point value. If the value cannot be exactly represented, it is rounded using the default rounding mode. +If the ``nneg`` flag is set, and the ``uitofp`` argument is negative, +the result is a poison value. + Example: """""""" @@ -11774,6 +11784,9 @@ Example: %X = uitofp i32 257 to float ; yields float:257.0 %Y = uitofp i8 -1 to double ; yields double:255.0 + %a = uitofp nneg i32 256 to i32 ; yields float:256.0 + %b = uitofp nneg i32 -256 to i32 ; yields i32 poison + '``sitofp .. to``' Instruction ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -29345,7 +29358,7 @@ Syntax: Arguments: """""""""" -The first argument is a pointer, which refers to a thread local global. +The first argument is a thread local :ref:`global variable `. Semantics: """""""""" diff --git a/llvm/docs/NewPassManager.rst b/llvm/docs/NewPassManager.rst index 4554d80043754..b75ab918fda0f 100644 --- a/llvm/docs/NewPassManager.rst +++ b/llvm/docs/NewPassManager.rst @@ -162,10 +162,10 @@ certain parts of the pipeline. For example, .. code-block:: c++ PassBuilder PB; - PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM, - PassBuilder::OptimizationLevel Level) { - MPM.addPass(FooPass()); - }; + PB.registerPipelineStartEPCallback( + [&](ModulePassManager &MPM, PassBuilder::OptimizationLevel Level) { + MPM.addPass(FooPass()); + }); will add ``FooPass`` near the very beginning of the pipeline for pass managers created by that ``PassBuilder``. See the documentation for diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 2f17c9d7dda04..6f5eba263def4 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -131,6 +131,7 @@ on support follow. ``Zcb`` Supported ``Zcd`` Supported ``Zcf`` Supported + ``Zcmop`` Supported ``Zcmp`` Supported ``Zcmt`` Assembly Support ``Zdinx`` Supported @@ -155,6 +156,7 @@ on support follow. ``Zihintntl`` Supported ``Zihintpause`` Assembly Support ``Zihpm`` (`See Note <#riscv-i2p1-note>`__) + ``Zimop`` Supported ``Zkn`` Supported ``Zknd`` Supported (`See note <#riscv-scalar-crypto-note2>`__) ``Zkne`` Supported (`See note <#riscv-scalar-crypto-note2>`__) @@ -271,12 +273,6 @@ The primary goal of experimental support is to assist in the process of ratifica ``experimental-ztso`` LLVM implements the `v0.1 proposed specification `__ (see Chapter 25). The mapping from the C/C++ memory model to Ztso has not yet been ratified in any standards document. There are multiple possible mappings, and they are *not* mutually ABI compatible. The mapping LLVM implements is ABI compatible with the default WMO mapping. This mapping may change and there is *explicitly* no ABI stability offered while the extension remains in experimental status. User beware. -``experimental-zimop`` - LLVM implements the `v0.1 proposed specification `__. - -``experimental-zcmop`` - LLVM implements the `v0.2 proposed specification `__. - ``experimental-zaamo``, ``experimental-zalrsc`` LLVM implements the `v0.2 proposed specification `__. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 7588048334d79..d2d542752b555 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -174,6 +174,10 @@ Changes to the LLVM tools * llvm-ar now allows specifying COFF archive format with ``--format`` argument and uses it by default for COFF targets. +* llvm-ranlib now supports ``-V`` as an alias for ``--version``. + ``-v`` (``--verbose`` in llvm-ar) has been removed. + (`#87661 `_) + * llvm-objcopy now supports ``--set-symbol-visibility`` and ``--set-symbols-visibility`` options for ELF input to change the visibility of symbols. diff --git a/llvm/include/llvm/ADT/ADL.h b/llvm/include/llvm/ADT/ADL.h index ab1f28ff6b9c0..812d9a4b52d81 100644 --- a/llvm/include/llvm/ADT/ADL.h +++ b/llvm/include/llvm/ADT/ADL.h @@ -37,6 +37,22 @@ constexpr auto end_impl(RangeT &&range) return end(std::forward(range)); } +using std::rbegin; + +template +constexpr auto rbegin_impl(RangeT &&range) + -> decltype(rbegin(std::forward(range))) { + return rbegin(std::forward(range)); +} + +using std::rend; + +template +constexpr auto rend_impl(RangeT &&range) + -> decltype(rend(std::forward(range))) { + return rend(std::forward(range)); +} + using std::swap; template @@ -72,6 +88,22 @@ constexpr auto adl_end(RangeT &&range) return adl_detail::end_impl(std::forward(range)); } +/// Returns the reverse-begin iterator to \p range using `std::rbegin` and +/// function found through Argument-Dependent Lookup (ADL). +template +constexpr auto adl_rbegin(RangeT &&range) + -> decltype(adl_detail::rbegin_impl(std::forward(range))) { + return adl_detail::rbegin_impl(std::forward(range)); +} + +/// Returns the reverse-end iterator to \p range using `std::rend` and +/// functions found through Argument-Dependent Lookup (ADL). +template +constexpr auto adl_rend(RangeT &&range) + -> decltype(adl_detail::rend_impl(std::forward(range))) { + return adl_detail::rend_impl(std::forward(range)); +} + /// Swaps \p lhs with \p rhs using `std::swap` and functions found through /// Argument-Dependent Lookup (ADL). template diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index b9b39f3b9dfbc..8d3c029b2e7e9 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -1740,8 +1740,8 @@ class [[nodiscard]] APInt { return *this; } - /// \returns the multiplicative inverse for a given modulo. - APInt multiplicativeInverse(const APInt &modulo) const; + /// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth. + APInt multiplicativeInverse() const; /// @} /// \name Building-block Operations for APInt and APFloat diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 02a3074ae1f0d..08a708e5c5871 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -405,32 +405,23 @@ class mapped_iterator_base } }; -/// Helper to determine if type T has a member called rbegin(). -template class has_rbegin_impl { - using yes = char[1]; - using no = char[2]; - - template - static yes& test(Inner *I, decltype(I->rbegin()) * = nullptr); - - template - static no& test(...); - -public: - static const bool value = sizeof(test(nullptr)) == sizeof(yes); -}; +namespace detail { +template +using check_has_free_function_rbegin = + decltype(adl_rbegin(std::declval())); -/// Metafunction to determine if T& or T has a member called rbegin(). -template -struct has_rbegin : has_rbegin_impl> {}; +template +static constexpr bool HasFreeFunctionRBegin = + is_detected::value; +} // namespace detail // Returns an iterator_range over the given container which iterates in reverse. template auto reverse(ContainerTy &&C) { - if constexpr (has_rbegin::value) - return make_range(C.rbegin(), C.rend()); + if constexpr (detail::HasFreeFunctionRBegin) + return make_range(adl_rbegin(C), adl_rend(C)); else - return make_range(std::make_reverse_iterator(std::end(C)), - std::make_reverse_iterator(std::begin(C))); + return make_range(std::make_reverse_iterator(adl_end(C)), + std::make_reverse_iterator(adl_begin(C))); } /// An iterator adaptor that filters the elements of given inner iterators. @@ -2151,7 +2142,7 @@ template ::value>> inline void interleave(const Container &c, UnaryFunctor each_fn, NullaryFunctor between_fn) { - interleave(c.begin(), c.end(), each_fn, between_fn); + interleave(adl_begin(c), adl_end(c), each_fn, between_fn); } /// Overload of interleave for the common case of string separator. @@ -2159,7 +2150,7 @@ template > inline void interleave(const Container &c, StreamT &os, UnaryFunctor each_fn, const StringRef &separator) { - interleave(c.begin(), c.end(), each_fn, [&] { os << separator; }); + interleave(adl_begin(c), adl_end(c), each_fn, [&] { os << separator; }); } template > diff --git a/llvm/include/llvm/Analysis/Interval.h b/llvm/include/llvm/Analysis/Interval.h deleted file mode 100644 index 9afe659d00dd3..0000000000000 --- a/llvm/include/llvm/Analysis/Interval.h +++ /dev/null @@ -1,138 +0,0 @@ -//===- llvm/Analysis/Interval.h - Interval Class Declaration ----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the Interval class, which -// represents a set of CFG nodes and is a portion of an interval partition. -// -// Intervals have some interesting and useful properties, including the -// following: -// 1. The header node of an interval dominates all of the elements of the -// interval -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_INTERVAL_H -#define LLVM_ANALYSIS_INTERVAL_H - -#include "llvm/ADT/GraphTraits.h" -#include - -namespace llvm { - -class BasicBlock; -class raw_ostream; - -//===----------------------------------------------------------------------===// -// -/// Interval Class - An Interval is a set of nodes defined such that every node -/// in the interval has all of its predecessors in the interval (except for the -/// header) -/// -class Interval { - /// HeaderNode - The header BasicBlock, which dominates all BasicBlocks in this - /// interval. Also, any loops in this interval must go through the HeaderNode. - /// - BasicBlock *HeaderNode; - -public: - using succ_iterator = std::vector::iterator; - using pred_iterator = std::vector::iterator; - using node_iterator = std::vector::iterator; - - inline Interval(BasicBlock *Header) : HeaderNode(Header) { - Nodes.push_back(Header); - } - - inline BasicBlock *getHeaderNode() const { return HeaderNode; } - - /// Nodes - The basic blocks in this interval. - std::vector Nodes; - - /// Successors - List of BasicBlocks that are reachable directly from nodes in - /// this interval, but are not in the interval themselves. - /// These nodes necessarily must be header nodes for other intervals. - std::vector Successors; - - /// Predecessors - List of BasicBlocks that have this Interval's header block - /// as one of their successors. - std::vector Predecessors; - - /// contains - Find out if a basic block is in this interval - inline bool contains(BasicBlock *BB) const { - for (BasicBlock *Node : Nodes) - if (Node == BB) - return true; - return false; - // I don't want the dependency on - //return find(Nodes.begin(), Nodes.end(), BB) != Nodes.end(); - } - - /// isSuccessor - find out if a basic block is a successor of this Interval - inline bool isSuccessor(BasicBlock *BB) const { - for (BasicBlock *Successor : Successors) - if (Successor == BB) - return true; - return false; - // I don't want the dependency on - //return find(Successors.begin(), Successors.end(), BB) != Successors.end(); - } - - /// Equality operator. It is only valid to compare two intervals from the - /// same partition, because of this, all we have to check is the header node - /// for equality. - inline bool operator==(const Interval &I) const { - return HeaderNode == I.HeaderNode; - } - - /// print - Show contents in human readable format... - void print(raw_ostream &O) const; -}; - -/// succ_begin/succ_end - define methods so that Intervals may be used -/// just like BasicBlocks can with the succ_* functions, and *::succ_iterator. -/// -inline Interval::succ_iterator succ_begin(Interval *I) { - return I->Successors.begin(); -} -inline Interval::succ_iterator succ_end(Interval *I) { - return I->Successors.end(); -} - -/// pred_begin/pred_end - define methods so that Intervals may be used -/// just like BasicBlocks can with the pred_* functions, and *::pred_iterator. -/// -inline Interval::pred_iterator pred_begin(Interval *I) { - return I->Predecessors.begin(); -} -inline Interval::pred_iterator pred_end(Interval *I) { - return I->Predecessors.end(); -} - -template <> struct GraphTraits { - using NodeRef = Interval *; - using ChildIteratorType = Interval::succ_iterator; - - static NodeRef getEntryNode(Interval *I) { return I; } - - /// nodes_iterator/begin/end - Allow iteration over all nodes in the graph - static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); } - static ChildIteratorType child_end(NodeRef N) { return succ_end(N); } -}; - -template <> struct GraphTraits> { - using NodeRef = Interval *; - using ChildIteratorType = Interval::pred_iterator; - - static NodeRef getEntryNode(Inverse G) { return G.Graph; } - static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } - static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } -}; - -} // end namespace llvm - -#endif // LLVM_ANALYSIS_INTERVAL_H diff --git a/llvm/include/llvm/Analysis/IntervalIterator.h b/llvm/include/llvm/Analysis/IntervalIterator.h deleted file mode 100644 index 30e91f1734b65..0000000000000 --- a/llvm/include/llvm/Analysis/IntervalIterator.h +++ /dev/null @@ -1,264 +0,0 @@ -//===- IntervalIterator.h - Interval Iterator Declaration -------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines an iterator that enumerates the intervals in a control flow -// graph of some sort. This iterator is parametric, allowing iterator over the -// following types of graphs: -// -// 1. A Function* object, composed of BasicBlock nodes. -// 2. An IntervalPartition& object, composed of Interval nodes. -// -// This iterator is defined to walk the control flow graph, returning intervals -// in depth first order. These intervals are completely filled in except for -// the predecessor fields (the successor information is filled in however). -// -// By default, the intervals created by this iterator are deleted after they -// are no longer any use to the iterator. This behavior can be changed by -// passing a false value into the intervals_begin() function. This causes the -// IOwnMem member to be set, and the intervals to not be deleted. -// -// It is only safe to use this if all of the intervals are deleted by the caller -// and all of the intervals are processed. However, the user of the iterator is -// not allowed to modify or delete the intervals until after the iterator has -// been used completely. The IntervalPartition class uses this functionality. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_INTERVALITERATOR_H -#define LLVM_ANALYSIS_INTERVALITERATOR_H - -#include "llvm/ADT/GraphTraits.h" -#include "llvm/Analysis/Interval.h" -#include "llvm/Analysis/IntervalPartition.h" -#include "llvm/IR/CFG.h" -#include -#include -#include -#include -#include -#include - -namespace llvm { - -class BasicBlock; -class Function; - -// getNodeHeader - Given a source graph node and the source graph, return the -// BasicBlock that is the header node. This is the opposite of -// getSourceGraphNode. -inline BasicBlock *getNodeHeader(BasicBlock *BB) { return BB; } -inline BasicBlock *getNodeHeader(Interval *I) { return I->getHeaderNode(); } - -// getSourceGraphNode - Given a BasicBlock and the source graph, return the -// source graph node that corresponds to the BasicBlock. This is the opposite -// of getNodeHeader. -inline BasicBlock *getSourceGraphNode(Function *, BasicBlock *BB) { - return BB; -} -inline Interval *getSourceGraphNode(IntervalPartition *IP, BasicBlock *BB) { - return IP->getBlockInterval(BB); -} - -// addNodeToInterval - This method exists to assist the generic ProcessNode -// with the task of adding a node to the new interval, depending on the -// type of the source node. In the case of a CFG source graph (BasicBlock -// case), the BasicBlock itself is added to the interval. -inline void addNodeToInterval(Interval *Int, BasicBlock *BB) { - Int->Nodes.push_back(BB); -} - -// addNodeToInterval - This method exists to assist the generic ProcessNode -// with the task of adding a node to the new interval, depending on the -// type of the source node. In the case of a CFG source graph (BasicBlock -// case), the BasicBlock itself is added to the interval. In the case of -// an IntervalPartition source graph (Interval case), all of the member -// BasicBlocks are added to the interval. -inline void addNodeToInterval(Interval *Int, Interval *I) { - // Add all of the nodes in I as new nodes in Int. - llvm::append_range(Int->Nodes, I->Nodes); -} - -template, - class IGT = GraphTraits>> -class IntervalIterator { - std::vector> IntStack; - std::set Visited; - OrigContainer_t *OrigContainer; - bool IOwnMem; // If True, delete intervals when done with them - // See file header for conditions of use - -public: - using iterator_category = std::forward_iterator_tag; - - IntervalIterator() = default; // End iterator, empty stack - - IntervalIterator(Function *M, bool OwnMemory) : IOwnMem(OwnMemory) { - OrigContainer = M; - if (!ProcessInterval(&M->front())) { - llvm_unreachable("ProcessInterval should never fail for first interval!"); - } - } - - IntervalIterator(IntervalIterator &&x) - : IntStack(std::move(x.IntStack)), Visited(std::move(x.Visited)), - OrigContainer(x.OrigContainer), IOwnMem(x.IOwnMem) { - x.IOwnMem = false; - } - - IntervalIterator(IntervalPartition &IP, bool OwnMemory) : IOwnMem(OwnMemory) { - OrigContainer = &IP; - if (!ProcessInterval(IP.getRootInterval())) { - llvm_unreachable("ProcessInterval should never fail for first interval!"); - } - } - - ~IntervalIterator() { - if (IOwnMem) - while (!IntStack.empty()) { - delete operator*(); - IntStack.pop_back(); - } - } - - bool operator==(const IntervalIterator &x) const { - return IntStack == x.IntStack; - } - bool operator!=(const IntervalIterator &x) const { return !(*this == x); } - - const Interval *operator*() const { return IntStack.back().first; } - Interval *operator*() { return IntStack.back().first; } - const Interval *operator->() const { return operator*(); } - Interval *operator->() { return operator*(); } - - IntervalIterator &operator++() { // Preincrement - assert(!IntStack.empty() && "Attempting to use interval iterator at end!"); - do { - // All of the intervals on the stack have been visited. Try visiting - // their successors now. - Interval::succ_iterator &SuccIt = IntStack.back().second, - EndIt = succ_end(IntStack.back().first); - while (SuccIt != EndIt) { // Loop over all interval succs - bool Done = ProcessInterval(getSourceGraphNode(OrigContainer, *SuccIt)); - ++SuccIt; // Increment iterator - if (Done) return *this; // Found a new interval! Use it! - } - - // Free interval memory... if necessary - if (IOwnMem) delete IntStack.back().first; - - // We ran out of successors for this interval... pop off the stack - IntStack.pop_back(); - } while (!IntStack.empty()); - - return *this; - } - - IntervalIterator operator++(int) { // Postincrement - IntervalIterator tmp = *this; - ++*this; - return tmp; - } - -private: - // ProcessInterval - This method is used during the construction of the - // interval graph. It walks through the source graph, recursively creating - // an interval per invocation until the entire graph is covered. This uses - // the ProcessNode method to add all of the nodes to the interval. - // - // This method is templated because it may operate on two different source - // graphs: a basic block graph, or a preexisting interval graph. - bool ProcessInterval(NodeTy *Node) { - BasicBlock *Header = getNodeHeader(Node); - if (!Visited.insert(Header).second) - return false; - - Interval *Int = new Interval(Header); - - // Check all of our successors to see if they are in the interval... - for (typename GT::ChildIteratorType I = GT::child_begin(Node), - E = GT::child_end(Node); I != E; ++I) - ProcessNode(Int, getSourceGraphNode(OrigContainer, *I)); - - IntStack.push_back(std::make_pair(Int, succ_begin(Int))); - return true; - } - - // ProcessNode - This method is called by ProcessInterval to add nodes to the - // interval being constructed, and it is also called recursively as it walks - // the source graph. A node is added to the current interval only if all of - // its predecessors are already in the graph. This also takes care of keeping - // the successor set of an interval up to date. - // - // This method is templated because it may operate on two different source - // graphs: a basic block graph, or a preexisting interval graph. - void ProcessNode(Interval *Int, NodeTy *Node) { - assert(Int && "Null interval == bad!"); - assert(Node && "Null Node == bad!"); - - BasicBlock *NodeHeader = getNodeHeader(Node); - - if (Visited.count(NodeHeader)) { // Node already been visited? - if (Int->contains(NodeHeader)) { // Already in this interval... - return; - } else { // In other interval, add as successor - if (!Int->isSuccessor(NodeHeader)) // Add only if not already in set - Int->Successors.push_back(NodeHeader); - } - } else { // Otherwise, not in interval yet - for (typename IGT::ChildIteratorType I = IGT::child_begin(Node), - E = IGT::child_end(Node); I != E; ++I) { - if (!Int->contains(*I)) { // If pred not in interval, we can't be - if (!Int->isSuccessor(NodeHeader)) // Add only if not already in set - Int->Successors.push_back(NodeHeader); - return; // See you later - } - } - - // If we get here, then all of the predecessors of BB are in the interval - // already. In this case, we must add BB to the interval! - addNodeToInterval(Int, Node); - Visited.insert(NodeHeader); // The node has now been visited! - - if (Int->isSuccessor(NodeHeader)) { - // If we were in the successor list from before... remove from succ list - llvm::erase(Int->Successors, NodeHeader); - } - - // Now that we have discovered that Node is in the interval, perhaps some - // of its successors are as well? - for (typename GT::ChildIteratorType It = GT::child_begin(Node), - End = GT::child_end(Node); It != End; ++It) - ProcessNode(Int, getSourceGraphNode(OrigContainer, *It)); - } - } -}; - -using function_interval_iterator = IntervalIterator; -using interval_part_interval_iterator = - IntervalIterator; - -inline function_interval_iterator intervals_begin(Function *F, - bool DeleteInts = true) { - return function_interval_iterator(F, DeleteInts); -} -inline function_interval_iterator intervals_end(Function *) { - return function_interval_iterator(); -} - -inline interval_part_interval_iterator - intervals_begin(IntervalPartition &IP, bool DeleteIntervals = true) { - return interval_part_interval_iterator(IP, DeleteIntervals); -} - -inline interval_part_interval_iterator intervals_end(IntervalPartition &IP) { - return interval_part_interval_iterator(); -} - -} // end namespace llvm - -#endif // LLVM_ANALYSIS_INTERVALITERATOR_H diff --git a/llvm/include/llvm/Analysis/IntervalPartition.h b/llvm/include/llvm/Analysis/IntervalPartition.h deleted file mode 100644 index 66a99fb15bfb9..0000000000000 --- a/llvm/include/llvm/Analysis/IntervalPartition.h +++ /dev/null @@ -1,108 +0,0 @@ -//===- IntervalPartition.h - Interval partition Calculation -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the IntervalPartition class, which -// calculates and represents the interval partition of a function, or a -// preexisting interval partition. -// -// In this way, the interval partition may be used to reduce a flow graph down -// to its degenerate single node interval partition (unless it is irreducible). -// -// TODO: The IntervalPartition class should take a bool parameter that tells -// whether it should add the "tails" of an interval to an interval itself or if -// they should be represented as distinct intervals. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_INTERVALPARTITION_H -#define LLVM_ANALYSIS_INTERVALPARTITION_H - -#include "llvm/Pass.h" -#include -#include - -namespace llvm { - -class BasicBlock; -class Interval; - -//===----------------------------------------------------------------------===// -// -// IntervalPartition - This class builds and holds an "interval partition" for -// a function. This partition divides the control flow graph into a set of -// maximal intervals, as defined with the properties above. Intuitively, an -// interval is a (possibly nonexistent) loop with a "tail" of non-looping -// nodes following it. -// -class IntervalPartition : public FunctionPass { - using IntervalMapTy = std::map; - IntervalMapTy IntervalMap; - - using IntervalListTy = std::vector; - Interval *RootInterval = nullptr; - std::vector Intervals; - -public: - static char ID; // Pass identification, replacement for typeid - - IntervalPartition(); - - // run - Calculate the interval partition for this function - bool runOnFunction(Function &F) override; - - // IntervalPartition ctor - Build a reduced interval partition from an - // existing interval graph. This takes an additional boolean parameter to - // distinguish it from a copy constructor. Always pass in false for now. - IntervalPartition(IntervalPartition &I, bool); - - // print - Show contents in human readable format... - void print(raw_ostream &O, const Module* = nullptr) const override; - - // getRootInterval() - Return the root interval that contains the starting - // block of the function. - inline Interval *getRootInterval() { return RootInterval; } - - // isDegeneratePartition() - Returns true if the interval partition contains - // a single interval, and thus cannot be simplified anymore. - bool isDegeneratePartition() { return Intervals.size() == 1; } - - // TODO: isIrreducible - look for triangle graph. - - // getBlockInterval - Return the interval that a basic block exists in. - inline Interval *getBlockInterval(BasicBlock *BB) { - IntervalMapTy::iterator I = IntervalMap.find(BB); - return I != IntervalMap.end() ? I->second : nullptr; - } - - // getAnalysisUsage - Implement the Pass API - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } - - // Interface to Intervals vector... - const std::vector &getIntervals() const { return Intervals; } - - // releaseMemory - Reset state back to before function was analyzed - void releaseMemory() override; - -private: - // addIntervalToPartition - Add an interval to the internal list of intervals, - // and then add mappings from all of the basic blocks in the interval to the - // interval itself (in the IntervalMap). - void addIntervalToPartition(Interval *I); - - // updatePredecessors - Interval generation only sets the successor fields of - // the interval data structures. After interval generation is complete, - // run through all of the intervals and propagate successor info as - // predecessor info. - void updatePredecessors(Interval *Int); -}; - -} // end namespace llvm - -#endif // LLVM_ANALYSIS_INTERVALPARTITION_H diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index bad0a77b0f2da..58c69ac939763 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -190,7 +190,10 @@ enum class TailFoldingStyle { /// Use predicate to control both data and control flow, but modify /// the trip count so that a runtime overflow check can be avoided /// and such that the scalar epilogue loop can always be removed. - DataAndControlFlowWithoutRuntimeCheck + DataAndControlFlowWithoutRuntimeCheck, + /// Use predicated EVL instructions for tail-folding. + /// Indicates that VP intrinsics should be used. + DataWithEVL, }; struct TailFoldingInfo { @@ -1288,12 +1291,11 @@ class TargetTransformInfo { /// passed through \p Args, which helps improve the cost estimation in some /// cases, like in broadcast loads. /// NOTE: For subvector extractions Tp represents the source type. - InstructionCost - getShuffleCost(ShuffleKind Kind, VectorType *Tp, - ArrayRef Mask = std::nullopt, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - int Index = 0, VectorType *SubTp = nullptr, - ArrayRef Args = std::nullopt) const; + InstructionCost getShuffleCost( + ShuffleKind Kind, VectorType *Tp, ArrayRef Mask = std::nullopt, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0, + VectorType *SubTp = nullptr, ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr) const; /// Represents a hint about the context in which a cast is used. /// @@ -2005,11 +2007,10 @@ class TargetTransformInfo::Concept { const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0; - virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, - ArrayRef Mask, - TTI::TargetCostKind CostKind, - int Index, VectorType *SubTp, - ArrayRef Args) = 0; + virtual InstructionCost + getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, + TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, + ArrayRef Args, const Instruction *CxtI) = 0; virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, @@ -2644,8 +2645,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args) override { - return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args); + ArrayRef Args, + const Instruction *CxtI) override { + return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args, + CxtI); } InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 63c2ef8912b29..5b40e49714069 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -579,10 +579,12 @@ class TargetTransformInfoImplBase { return InstructionCost::getInvalid(); } - InstructionCost - getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef Mask, - TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = std::nullopt) const { + InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, + ArrayRef Mask, + TTI::TargetCostKind CostKind, int Index, + VectorType *SubTp, + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr) const { return 1; } @@ -1341,13 +1343,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { if (Shuffle->isExtractSubvectorMask(SubIndex)) return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, Mask, CostKind, SubIndex, VecTy, - Operands); + Operands, Shuffle); if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) return TargetTTI->getShuffleCost( TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts), - Operands); + Operands, Shuffle); int ReplicationFactor, VF; if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { @@ -1374,7 +1376,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { return TargetTTI->getShuffleCost( IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy, - AdjustMask, CostKind, 0, nullptr); + AdjustMask, CostKind, 0, nullptr, {}, Shuffle); } // Narrowing shuffle - perform shuffle at original wider width and @@ -1383,13 +1385,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { InstructionCost ShuffleCost = TargetTTI->getShuffleCost( IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, - VecSrcTy, AdjustMask, CostKind, 0, nullptr); + VecSrcTy, AdjustMask, CostKind, 0, nullptr, {}, Shuffle); SmallVector ExtractMask(Mask.size()); std::iota(ExtractMask.begin(), ExtractMask.end(), 0); - return ShuffleCost + TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, - VecSrcTy, ExtractMask, - CostKind, 0, VecTy); + return ShuffleCost + TargetTTI->getShuffleCost( + TTI::SK_ExtractSubvector, VecSrcTy, + ExtractMask, CostKind, 0, VecTy, {}, Shuffle); } if (Shuffle->isIdentity()) @@ -1397,35 +1399,39 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { if (Shuffle->isReverse()) return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind, - 0, nullptr, Operands); + 0, nullptr, Operands, Shuffle); if (Shuffle->isSelect()) return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind, - 0, nullptr, Operands); + 0, nullptr, Operands, Shuffle); if (Shuffle->isTranspose()) return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask, - CostKind, 0, nullptr, Operands); + CostKind, 0, nullptr, Operands, + Shuffle); if (Shuffle->isZeroEltSplat()) return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask, - CostKind, 0, nullptr, Operands); + CostKind, 0, nullptr, Operands, + Shuffle); if (Shuffle->isSingleSource()) return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask, - CostKind, 0, nullptr, Operands); + CostKind, 0, nullptr, Operands, + Shuffle); if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) return TargetTTI->getShuffleCost( TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex, - FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands); + FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands, + Shuffle); if (Shuffle->isSplice(SubIndex)) return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind, - SubIndex, nullptr, Operands); + SubIndex, nullptr, Operands, Shuffle); return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask, - CostKind, 0, nullptr, Operands); + CostKind, 0, nullptr, Operands, Shuffle); } case Instruction::ExtractElement: { auto *EEI = dyn_cast(U); diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 394e4a05fbc0c..10f1333cf8885 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -1005,6 +1005,8 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED, " TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8") TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4") +TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8") +TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4") TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") @@ -1033,9 +1035,13 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_Z TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8") TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") +TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") +TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8") TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") +TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") +TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 8ebd0d3409c89..b4e971fea1a13 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -335,6 +335,7 @@ namespace llvm { // Top-Level Entities bool parseTopLevelEntities(); + bool finalizeDebugInfoFormat(Module *M); void dropUnknownMetadataReferences(); bool validateEndOfModule(bool UpgradeDebugInfo); bool validateEndOfIndex(); diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index a53e79bf6e39c..298700c8941ee 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -613,23 +613,13 @@ enum AcceleratorTable { DW_hash_function_djb = 0u }; -// Uniquify the string hashes and calculate the bucket count for the -// DWARF v5 Accelerator Table. NOTE: This function effectively consumes the -// 'Hashes' input parameter. -inline std::pair -getDebugNamesBucketAndHashCount(MutableArrayRef Hashes) { - uint32_t BucketCount = 0; - - sort(Hashes); - uint32_t UniqueHashCount = llvm::unique(Hashes) - Hashes.begin(); +// Return a suggested bucket count for the DWARF v5 Accelerator Table. +inline uint32_t getDebugNamesBucketCount(uint32_t UniqueHashCount) { if (UniqueHashCount > 1024) - BucketCount = UniqueHashCount / 4; - else if (UniqueHashCount > 16) - BucketCount = UniqueHashCount / 2; - else - BucketCount = std::max(UniqueHashCount, 1); - - return {BucketCount, UniqueHashCount}; + return UniqueHashCount / 4; + if (UniqueHashCount > 16) + return UniqueHashCount / 2; + return std::max(UniqueHashCount, 1); } // Constants for the GNU pubnames/pubtypes extensions supporting gdb index. diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index a7fbf4aeb7449..81c3e4be95e9f 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -868,6 +868,9 @@ class AsmPrinter : public MachineFunctionPass { /// This method emits a comment next to header for the current function. virtual void emitFunctionHeaderComment(); + /// This method emits prefix-like data before the current function. + void emitFunctionPrefix(ArrayRef Prefix); + /// Emit a blob of inline asm to the output streamer. void emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 92fa726c31df1..06a19c75cf873 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -209,28 +209,31 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Align Alignment, bool VariableMask, bool IsGatherScatter, - TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind, + unsigned AddressSpace = 0) { // We cannot scalarize scalable vectors, so return Invalid. if (isa(DataTy)) return InstructionCost::getInvalid(); auto *VT = cast(DataTy); + unsigned VF = VT->getNumElements(); + // Assume the target does not have support for gather/scatter operations // and provide a rough estimate. // // First, compute the cost of the individual memory operations. InstructionCost AddrExtractCost = IsGatherScatter - ? getVectorInstrCost(Instruction::ExtractElement, - FixedVectorType::get( - PointerType::get(VT->getElementType(), 0), - VT->getNumElements()), - CostKind, -1, nullptr, nullptr) + ? getScalarizationOverhead( + FixedVectorType::get( + PointerType::get(VT->getElementType(), 0), VF), + /*Insert=*/false, /*Extract=*/true, CostKind) : 0; - InstructionCost LoadCost = - VT->getNumElements() * - (AddrExtractCost + - getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind)); + + // The cost of the scalar loads/stores. + InstructionCost MemoryOpCost = + VF * getMemoryOpCost(Opcode, VT->getElementType(), Alignment, + AddressSpace, CostKind); // Next, compute the cost of packing the result in a vector. InstructionCost PackingCost = @@ -246,17 +249,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // operations accurately is quite difficult and the current solution // provides a very rough estimate only. ConditionalCost = - VT->getNumElements() * - (getVectorInstrCost( - Instruction::ExtractElement, - FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()), - VT->getNumElements()), - CostKind, -1, nullptr, nullptr) + - getCFInstrCost(Instruction::Br, CostKind) + - getCFInstrCost(Instruction::PHI, CostKind)); + getScalarizationOverhead( + FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()), VF), + /*Insert=*/false, /*Extract=*/true, CostKind) + + VF * (getCFInstrCost(Instruction::Br, CostKind) + + getCFInstrCost(Instruction::PHI, CostKind)); } - return LoadCost + PackingCost + ConditionalCost; + return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost; } protected: @@ -1018,7 +1018,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = std::nullopt) { + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr) { switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) { case TTI::SK_Broadcast: if (auto *FVT = dyn_cast(Tp)) @@ -1369,6 +1370,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) { + // TODO: Pass on AddressSpace when we have test coverage. return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false, CostKind); } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 4c9d85fd9f514..be39eb7891f3b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Module.h" @@ -1300,8 +1301,10 @@ class MachineIRBuilder { MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx) { - return buildExtractVectorElement(Res, Val, - buildConstant(LLT::scalar(64), Idx)); + auto TLI = getMF().getSubtarget().getTargetLowering(); + unsigned VecIdxWidth = TLI->getVectorIdxTy(getDataLayout()).getSizeInBits(); + return buildExtractVectorElement( + Res, Val, buildConstant(LLT::scalar(VecIdxWidth), Idx)); } /// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index fc1f9bf342f8d..977c182e9d2b0 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -290,6 +290,9 @@ /* Whether tools show host and target info when invoked with --version */ #cmakedefine01 LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO +/* Whether tools show optional build config flags when invoked with --version */ +#cmakedefine01 LLVM_VERSION_PRINTER_SHOW_BUILD_CONFIG + /* Define if libxml2 is supported on this platform. */ #cmakedefine LLVM_ENABLE_LIBXML2 ${LLVM_ENABLE_LIBXML2} diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index f1d4fc72d5a72..9543b78ea6130 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -804,9 +804,11 @@ class DWARFDebugNames : public DWARFAcceleratorTable { /// Calculates the starting offsets for various sections within the /// .debug_names section. -void findDebugNamesOffsets(DWARFDebugNames::DWARFDebugNamesOffsets &Offsets, - uint64_t HdrSize, const dwarf::DwarfFormat Format, - const DWARFDebugNames::Header &Hdr); +namespace dwarf { +DWARFDebugNames::DWARFDebugNamesOffsets +findDebugNamesOffsets(uint64_t EndOfHeaderOffset, + const DWARFDebugNames::Header &Hdr); +} /// If `Name` is the name of a templated function that includes template /// parameters, returns a substring of `Name` containing no template diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h index 45bf9adcf2a4e..7121b3fe76274 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h @@ -227,14 +227,14 @@ class SymbolLookupSet { } /// Construct a SymbolLookupSet from DenseMap keys. - template + template static SymbolLookupSet - fromMapKeys(const DenseMap &M, + fromMapKeys(const DenseMap &M, SymbolLookupFlags Flags = SymbolLookupFlags::RequiredSymbol) { SymbolLookupSet Result; Result.Symbols.reserve(M.size()); - for (const auto &KV : M) - Result.add(KV.first, Flags); + for (const auto &[Name, Val] : M) + Result.add(Name, Flags); return Result; } @@ -1237,6 +1237,8 @@ class JITDylib : public ThreadSafeRefCountedBase, // * Pending queries holds any not-yet-completed queries that include this // symbol. struct MaterializingInfo { + friend class ExecutionSession; + std::shared_ptr DefiningEDU; DenseSet DependantEDUs; @@ -1331,6 +1333,10 @@ class JITDylib : public ThreadSafeRefCountedBase, void unlinkMaterializationResponsibility(MaterializationResponsibility &MR); + /// Attempt to reduce memory usage from empty \c UnmaterializedInfos and + /// \c MaterializingInfos tables. + void shrinkMaterializationInfoMemory(); + ExecutionSession &ES; enum { Open, Closing, Closed } State = Open; std::mutex GeneratorsMutex; @@ -1746,6 +1752,11 @@ class ExecutionSession { /// Dump the state of all the JITDylibs in this session. void dump(raw_ostream &OS); + /// Check the internal consistency of ExecutionSession data structures. +#ifdef EXPENSIVE_CHECKS + bool verifySessionState(Twine Phase); +#endif + private: static void logErrorsToStdErr(Error Err) { logAllUnhandledErrors(std::move(Err), errs(), "JIT session error: "); diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index 0c5a07bde4ecf..e1220966e7e6e 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -92,6 +92,7 @@ class BasicBlock final : public Value, // Basic blocks are data objects also /// in the new format (\p NewFlag == true), converting to the desired format /// if necessary. void setIsNewDbgInfoFormat(bool NewFlag); + void setNewDbgInfoFormatFlag(bool NewFlag); /// Record that the collection of DbgRecords in \p M "trails" after the last /// instruction of this block. These are equivalent to dbg.value intrinsics diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index d96d506a9b05d..60f41b30e91c2 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -120,6 +120,7 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, void convertFromNewDbgValues(); void setIsNewDbgInfoFormat(bool NewVal); + void setNewDbgInfoFormatFlag(bool NewVal); private: friend class TargetLibraryInfoImpl; diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index ce140efca1140..33770cecc6239 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -2073,11 +2073,17 @@ class IRBuilderBase { return CreateCast(Instruction::FPToSI, V, DestTy, Name); } - Value *CreateUIToFP(Value *V, Type *DestTy, const Twine &Name = ""){ + Value *CreateUIToFP(Value *V, Type *DestTy, const Twine &Name = "", + bool IsNonNeg = false) { if (IsFPConstrained) return CreateConstrainedFPCast(Intrinsic::experimental_constrained_uitofp, V, DestTy, nullptr, Name); - return CreateCast(Instruction::UIToFP, V, DestTy, Name); + if (Value *Folded = Folder.FoldCast(Instruction::UIToFP, V, DestTy)) + return Folded; + Instruction *I = Insert(new UIToFPInst(V, DestTy), Name); + if (IsNonNeg) + I->setNonNeg(); + return I; } Value *CreateSIToFP(Value *V, Type *DestTy, const Twine &Name = ""){ diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index e4e5fa15c399e..cfe1b11ade5a4 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -927,13 +927,19 @@ class CastInst : public UnaryInstruction { } }; -/// Instruction that can have a nneg flag (only zext). +/// Instruction that can have a nneg flag (zext/uitofp). class PossiblyNonNegInst : public CastInst { public: enum { NonNeg = (1 << 0) }; static bool classof(const Instruction *I) { - return I->getOpcode() == Instruction::ZExt; + switch (I->getOpcode()) { + case Instruction::ZExt: + case Instruction::UIToFP: + return true; + default: + return false; + } } static bool classof(const Value *V) { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 41735192ed49c..cc283cd9a05e8 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1835,8 +1835,7 @@ def int_ubsantrap : Intrinsic<[], [llvm_i8_ty], // Return true if ubsan check is allowed. def int_allow_ubsan_check : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i8_ty], - [IntrInaccessibleMemOnly, IntrWriteMem, ImmArg>, NoUndef]>, - ClangBuiltin<"__builtin_allow_ubsan_check">; + [IntrInaccessibleMemOnly, IntrWriteMem, ImmArg>, NoUndef]>; // Return true if runtime check is allowed. def int_allow_runtime_check : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_metadata_ty], diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index a871fac46b9fd..a7f212da2f5b6 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -20,7 +20,8 @@ def int_dx_flattened_thread_id_in_group : Intrinsic<[llvm_i32_ty], [], [IntrNoMe def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">, Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>; -def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; +def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; +def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index ee9a04241ac2e..aff1fc7f085c4 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -182,10 +182,6 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_fctuwz : ClangBuiltin<"__builtin_ppc_fctuwz">, DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; - def int_ppc_rldimi - : ClangBuiltin<"__builtin_ppc_rldimi">, - DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], - [IntrNoMem, ImmArg>, ImmArg>]>; def int_ppc_rlwimi : ClangBuiltin<"__builtin_ppc_rlwimi">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], @@ -194,6 +190,9 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". : ClangBuiltin<"__builtin_ppc_rlwnm">, DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; + def int_ppc_rldimi + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, ImmArg>, ImmArg>]>; // XL compatible select functions // TODO: Add llvm_f128_ty support. diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 0eb09b1699aff..f843383f0b00e 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -56,4 +56,5 @@ let TargetPrefix = "spv" in { def int_spv_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>; def int_spv_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">, Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>; + def int_spv_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>; } diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h index bb2e667ef6f41..6135e15fd030f 100644 --- a/llvm/include/llvm/IR/Module.h +++ b/llvm/include/llvm/IR/Module.h @@ -245,6 +245,12 @@ class LLVM_EXTERNAL_VISIBILITY Module { else if (!UseNewFormat && IsNewDbgInfoFormat) convertFromNewDbgValues(); } + void setNewDbgInfoFormatFlag(bool NewFlag) { + for (auto &F : *this) { + F.setNewDbgInfoFormatFlag(NewFlag); + } + IsNewDbgInfoFormat = NewFlag; + } /// The Module constructor. Note that there is no default constructor. You /// must provide a name for the module upon construction. diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 8e1aab43d37f9..766c90106cc41 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -138,7 +138,6 @@ void initializeInstructionCombiningPassPass(PassRegistry&); void initializeInstructionSelectPass(PassRegistry&); void initializeInterleavedAccessPass(PassRegistry&); void initializeInterleavedLoadCombinePass(PassRegistry &); -void initializeIntervalPartitionPass(PassRegistry&); void initializeJMCInstrumenterPass(PassRegistry&); void initializeKCFIPass(PassRegistry &); void initializeLCSSAVerificationPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 62ee316324f51..ea2bccca51a80 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -21,7 +21,6 @@ #include "llvm/Analysis/CallPrinter.h" #include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/IntervalPartition.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/RegionPass.h" @@ -145,7 +144,6 @@ namespace { (void) llvm::createFixIrreduciblePass(); (void)llvm::createSelectOptimizePass(); - (void)new llvm::IntervalPartition(); (void)new llvm::ScalarEvolutionWrapperPass(); llvm::Function::Create(nullptr, llvm::GlobalValue::ExternalLinkage)->viewCFGOnly(); llvm::RGPassManager RGM; diff --git a/llvm/include/llvm/Object/GOFF.h b/llvm/include/llvm/Object/GOFF.h index 91762457ae056..9fb8876e893d5 100644 --- a/llvm/include/llvm/Object/GOFF.h +++ b/llvm/include/llvm/Object/GOFF.h @@ -73,6 +73,26 @@ class Record { } }; +class TXTRecord : public Record { +public: + /// \brief Maximum length of data; any more must go in continuation. + static const uint8_t TXTMaxDataLength = 56; + + static Error getData(const uint8_t *Record, SmallString<256> &CompleteData); + + static void getElementEsdId(const uint8_t *Record, uint32_t &EsdId) { + get(Record, 4, EsdId); + } + + static void getOffset(const uint8_t *Record, uint32_t &Offset) { + get(Record, 12, Offset); + } + + static void getDataLength(const uint8_t *Record, uint16_t &Length) { + get(Record, 22, Length); + } +}; + class HDRRecord : public Record { public: static Error getData(const uint8_t *Record, SmallString<256> &CompleteData); diff --git a/llvm/include/llvm/Object/GOFFObjectFile.h b/llvm/include/llvm/Object/GOFFObjectFile.h index 7e1ceb95f6672..6871641e97ec8 100644 --- a/llvm/include/llvm/Object/GOFFObjectFile.h +++ b/llvm/include/llvm/Object/GOFFObjectFile.h @@ -29,7 +29,10 @@ namespace llvm { namespace object { class GOFFObjectFile : public ObjectFile { + friend class GOFFSymbolRef; + IndexedMap EsdPtrs; // Indexed by EsdId. + SmallVector TextPtrs; mutable DenseMap>> EsdNamesCache; @@ -38,7 +41,7 @@ class GOFFObjectFile : public ObjectFile { // (EDID, 0) code, r/o data section // (EDID,PRID) r/w data section SmallVector SectionList; - mutable DenseMap SectionDataCache; + mutable DenseMap> SectionDataCache; public: Expected getSymbolName(SymbolRef Symbol) const; @@ -66,6 +69,10 @@ class GOFFObjectFile : public ObjectFile { return true; } + bool isSectionNoLoad(DataRefImpl Sec) const; + bool isSectionReadOnlyData(DataRefImpl Sec) const; + bool isSectionZeroInit(DataRefImpl Sec) const; + private: // SymbolRef. Expected getSymbolName(DataRefImpl Symb) const override; @@ -75,27 +82,24 @@ class GOFFObjectFile : public ObjectFile { Expected getSymbolFlags(DataRefImpl Symb) const override; Expected getSymbolType(DataRefImpl Symb) const override; Expected getSymbolSection(DataRefImpl Symb) const override; + uint64_t getSymbolSize(DataRefImpl Symb) const; const uint8_t *getSymbolEsdRecord(DataRefImpl Symb) const; bool isSymbolUnresolved(DataRefImpl Symb) const; bool isSymbolIndirect(DataRefImpl Symb) const; // SectionRef. - void moveSectionNext(DataRefImpl &Sec) const override {} - virtual Expected getSectionName(DataRefImpl Sec) const override { - return StringRef(); - } - uint64_t getSectionAddress(DataRefImpl Sec) const override { return 0; } - uint64_t getSectionSize(DataRefImpl Sec) const override { return 0; } + void moveSectionNext(DataRefImpl &Sec) const override; + virtual Expected getSectionName(DataRefImpl Sec) const override; + uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionSize(DataRefImpl Sec) const override; virtual Expected> - getSectionContents(DataRefImpl Sec) const override { - return ArrayRef(); - } - uint64_t getSectionIndex(DataRefImpl Sec) const override { return 0; } - uint64_t getSectionAlignment(DataRefImpl Sec) const override { return 0; } + getSectionContents(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override { return Sec.d.a; } + uint64_t getSectionAlignment(DataRefImpl Sec) const override; bool isSectionCompressed(DataRefImpl Sec) const override { return false; } - bool isSectionText(DataRefImpl Sec) const override { return false; } - bool isSectionData(DataRefImpl Sec) const override { return false; } + bool isSectionText(DataRefImpl Sec) const override; + bool isSectionData(DataRefImpl Sec) const override; bool isSectionBSS(DataRefImpl Sec) const override { return false; } bool isSectionVirtual(DataRefImpl Sec) const override { return false; } relocation_iterator section_rel_begin(DataRefImpl Sec) const override { @@ -109,6 +113,7 @@ class GOFFObjectFile : public ObjectFile { const uint8_t *getSectionPrEsdRecord(DataRefImpl &Sec) const; const uint8_t *getSectionEdEsdRecord(uint32_t SectionIndex) const; const uint8_t *getSectionPrEsdRecord(uint32_t SectionIndex) const; + uint32_t getSectionDefEsdId(DataRefImpl &Sec) const; // RelocationRef. void moveRelocationNext(DataRefImpl &Rel) const override {} @@ -122,6 +127,29 @@ class GOFFObjectFile : public ObjectFile { SmallVectorImpl &Result) const override {} }; +class GOFFSymbolRef : public SymbolRef { +public: + GOFFSymbolRef(const SymbolRef &B) : SymbolRef(B) { + assert(isa(SymbolRef::getObject())); + } + + const GOFFObjectFile *getObject() const { + return cast(BasicSymbolRef::getObject()); + } + + Expected getSymbolGOFFFlags() const { + return getObject()->getSymbolFlags(getRawDataRefImpl()); + } + + Expected getSymbolGOFFType() const { + return getObject()->getSymbolType(getRawDataRefImpl()); + } + + uint64_t getSize() const { + return getObject()->getSymbolSize(getRawDataRefImpl()); + } +}; + } // namespace object } // namespace llvm diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td index 7bbee1da643b8..9fd606b0d6fcb 100644 --- a/llvm/include/llvm/Option/OptParser.td +++ b/llvm/include/llvm/Option/OptParser.td @@ -93,6 +93,11 @@ class OptionGroup { // Define the option class. +class HelpTextVariant visibilities, string text> { + list Visibilities = visibilities; + string Text = text; +} + class Option prefixes, string name, OptionKind kind> { string EnumName = ?; // Uses the def name if undefined. list Prefixes = prefixes; @@ -101,6 +106,7 @@ class Option prefixes, string name, OptionKind kind> { // Used by MultiArg option kind. int NumArgs = 0; string HelpText = ?; + list HelpTextsForVariants = []; string MetaVarName = ?; string Values = ?; code ValuesCode = ?; @@ -155,6 +161,12 @@ class Visibility visibility> { } class Group { OptionGroup Group = group; } class HelpText { string HelpText = text; } +class HelpTextForVariants Visibilities, string text> { + list HelpTextsForVariants = [ + HelpTextVariant + ]; +} + class MetaVarName { string MetaVarName = name; } class Values { string Values = value; } class ValuesCode { code ValuesCode = valuecode; } diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h index bb3b665a16319..d8bf292bac21a 100644 --- a/llvm/include/llvm/Option/OptTable.h +++ b/llvm/include/llvm/Option/OptTable.h @@ -58,6 +58,17 @@ class OptTable { ArrayRef Prefixes; StringLiteral PrefixedName; const char *HelpText; + // Help text for specific visibilities. A list of pairs, where each pair + // is a list of visibilities and a specific help string for those + // visibilities. If no help text is found in this list for the visibility of + // the program, HelpText is used instead. This cannot use std::vector + // because OptTable is used in constexpr contexts. Increase the array sizes + // here if you need more entries and adjust the constants in + // OptParserEmitter::EmitHelpTextsForVariants. + std::array, + const char *>, + 1 /*MaxVisibilityHelp*/> + HelpTextsForVariants; const char *MetaVar; unsigned ID; unsigned char Kind; @@ -145,7 +156,20 @@ class OptTable { /// Get the help text to use to describe this option. const char *getOptionHelpText(OptSpecifier id) const { - return getInfo(id).HelpText; + return getOptionHelpText(id, Visibility(0)); + } + + // Get the help text to use to describe this option. + // If it has visibility specific help text and that visibility is in the + // visibility mask, use that text instead of the generic text. + const char *getOptionHelpText(OptSpecifier id, + Visibility VisibilityMask) const { + auto Info = getInfo(id); + for (auto [Visibilities, Text] : Info.HelpTextsForVariants) + for (auto Visibility : Visibilities) + if (VisibilityMask & Visibility) + return Text; + return Info.HelpText; } /// Get the meta-variable name to use when describing @@ -323,7 +347,8 @@ class OptTable { private: void internalPrintHelp(raw_ostream &OS, const char *Usage, const char *Title, bool ShowHidden, bool ShowAllAliases, - std::function ExcludeOption) const; + std::function ExcludeOption, + Visibility VisibilityMask) const; }; /// Specialization of OptTable @@ -358,30 +383,30 @@ class PrecomputedOptTable : public OptTable { #define LLVM_MAKE_OPT_ID_WITH_ID_PREFIX( \ ID_PREFIX, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ ID_PREFIX##ID #define LLVM_MAKE_OPT_ID(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, \ ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, \ - METAVAR, VALUES) \ - LLVM_MAKE_OPT_ID_WITH_ID_PREFIX(OPT_, PREFIX, PREFIXED_NAME, ID, KIND, \ - GROUP, ALIAS, ALIASARGS, FLAGS, VISIBILITY, \ - PARAM, HELPTEXT, METAVAR, VALUE) + HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ + LLVM_MAKE_OPT_ID_WITH_ID_PREFIX( \ + OPT_, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUE) #define LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX( \ ID_PREFIX, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, \ - FLAGS, VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) \ + FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ llvm::opt::OptTable::Info { \ - PREFIX, PREFIXED_NAME, HELPTEXT, METAVAR, ID_PREFIX##ID, \ - llvm::opt::Option::KIND##Class, PARAM, FLAGS, VISIBILITY, \ - ID_PREFIX##GROUP, ID_PREFIX##ALIAS, ALIASARGS, VALUES \ + PREFIX, PREFIXED_NAME, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, \ + ID_PREFIX##ID, llvm::opt::Option::KIND##Class, PARAM, FLAGS, \ + VISIBILITY, ID_PREFIX##GROUP, ID_PREFIX##ALIAS, ALIASARGS, VALUES \ } #define LLVM_CONSTRUCT_OPT_INFO(PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, \ ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, \ - METAVAR, VALUES) \ + HELPTEXTSFORVARIANTS, METAVAR, VALUES) \ LLVM_CONSTRUCT_OPT_INFO_WITH_ID_PREFIX( \ OPT_, PREFIX, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES) #endif // LLVM_OPTION_OPTTABLE_H diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index dfbaafb953fa9..10ff68857c6ed 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -177,9 +177,6 @@ template class CodeGenPassBuilder { // Add Function Pass if constexpr (is_detected::value) { FPM.addPass(std::forward(Pass)); - - for (auto &C : PB.AfterCallbacks) - C(Name); } else { // Add Module Pass if (!FPM.isEmpty()) { @@ -188,9 +185,6 @@ template class CodeGenPassBuilder { } MPM.addPass(std::forward(Pass)); - - for (auto &C : PB.AfterCallbacks) - C(Name); } } @@ -223,9 +217,6 @@ template class CodeGenPassBuilder { // Add Function Pass if constexpr (is_detected::value) { MFPM.addPass(std::forward(Pass)); - - for (auto &C : PB.AfterCallbacks) - C(Name); } else { // Add Module Pass if (!MFPM.isEmpty()) { @@ -235,10 +226,10 @@ template class CodeGenPassBuilder { } MPM.addPass(std::forward(Pass)); - - for (auto &C : PB.AfterCallbacks) - C(Name); } + + for (auto &C : PB.AfterCallbacks) + C(Name, MFPM); } private: @@ -462,6 +453,24 @@ template class CodeGenPassBuilder { Error addRegAssignmentFast(AddMachinePass &) const; Error addRegAssignmentOptimized(AddMachinePass &) const; + /// Allow the target to disable a specific pass by default. + /// Backend can declare unwanted passes in constructor. + template void disablePass() { + BeforeCallbacks.emplace_back( + [](StringRef Name) { return ((Name != PassTs::name()) && ...); }); + } + + /// Insert InsertedPass pass after TargetPass pass. + /// Only machine function passes are supported. + template + void insertPass(InsertedPassT &&Pass) { + AfterCallbacks.emplace_back( + [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable { + if (Name == TargetPassT::name()) + MFPM.addPass(std::forward(Pass)); + }); + } + private: DerivedT &derived() { return static_cast(*this); } const DerivedT &derived() const { @@ -481,7 +490,9 @@ template class CodeGenPassBuilder { mutable SmallVector, 4> BeforeCallbacks; - mutable SmallVector, 4> AfterCallbacks; + mutable SmallVector< + llvm::unique_function, 4> + AfterCallbacks; /// Helper variable for `-start-before/-start-after/-stop-before/-stop-after` mutable bool Started = true; diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 110e697702641..0431c182276ec 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -502,7 +502,7 @@ class RecordLookupTrait { }; // Trait for writing IndexedMemProfRecord data to the on-disk hash table. -template class RecordWriterTrait { +class RecordWriterTrait { public: using key_type = uint64_t; using key_type_ref = uint64_t; @@ -517,12 +517,16 @@ template class RecordWriterTrait { // we must use a default constructor with no params for the writer trait so we // have a public member which must be initialized by the user. MemProfSchema *Schema = nullptr; + // The MemProf version to use for the serialization. + IndexedVersion Version; - RecordWriterTrait() = default; + // We do not support the default constructor, which does not set Version. + RecordWriterTrait() = delete; + RecordWriterTrait(IndexedVersion V) : Version(V) {} static hash_value_type ComputeHash(key_type_ref K) { return K; } - static std::pair + std::pair EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { using namespace support; diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h index 99dc9aefbd7d6..b035209406b68 100644 --- a/llvm/include/llvm/Support/CommandLine.h +++ b/llvm/include/llvm/Support/CommandLine.h @@ -2002,6 +2002,16 @@ void PrintVersionMessage(); /// \param Categorized if true print options in categories void PrintHelpMessage(bool Hidden = false, bool Categorized = false); +/// An array of optional enabled settings in the LLVM build configuration, +/// which may be of interest to compiler developers. For example, includes +/// "+assertions" if assertions are enabled. Used by printBuildConfig. +ArrayRef getCompilerBuildConfig(); + +/// Prints the compiler build configuration. +/// Designed for compiler developers, not compiler end-users. +/// Intended to be used in --version output when enabled. +void printBuildConfig(raw_ostream &OS); + //===----------------------------------------------------------------------===// // Public interface for accessing registered options. // diff --git a/llvm/include/llvm/Support/ToolOutputFile.h b/llvm/include/llvm/Support/ToolOutputFile.h index e3fb83fdfd2c3..c16fb03d9b22b 100644 --- a/llvm/include/llvm/Support/ToolOutputFile.h +++ b/llvm/include/llvm/Support/ToolOutputFile.h @@ -18,6 +18,19 @@ namespace llvm { +class CleanupInstaller { +public: + /// The name of the file. + std::string Filename; + + /// The flag which indicates whether we should not delete the file. + bool Keep; + + StringRef getFilename() { return Filename; } + explicit CleanupInstaller(StringRef Filename); + ~CleanupInstaller(); +}; + /// This class contains a raw_fd_ostream and adds a few extra features commonly /// needed for compiler-like tool output files: /// - The file is automatically deleted if the process is killed. @@ -28,18 +41,7 @@ class ToolOutputFile { /// before the raw_fd_ostream is constructed and destructed after the /// raw_fd_ostream is destructed. It installs cleanups in its constructor and /// uninstalls them in its destructor. - class CleanupInstaller { - public: - /// The name of the file. - std::string Filename; - - /// The flag which indicates whether we should not delete the file. - bool Keep; - - StringRef getFilename() { return Filename; } - explicit CleanupInstaller(StringRef Filename); - ~CleanupInstaller(); - } Installer; + CleanupInstaller Installer; /// Storage for the stream, if we're owning our own stream. This is /// intentionally declared after Installer. diff --git a/llvm/include/llvm/Support/raw_socket_stream.h b/llvm/include/llvm/Support/raw_socket_stream.h index c219792d82465..bddd47eb75e1a 100644 --- a/llvm/include/llvm/Support/raw_socket_stream.h +++ b/llvm/include/llvm/Support/raw_socket_stream.h @@ -17,12 +17,17 @@ #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" +#include +#include + namespace llvm { class raw_socket_stream; -// Make sure that calls to WSAStartup and WSACleanup are balanced. #ifdef _WIN32 +/// Ensures proper initialization and cleanup of winsock resources +/// +/// Make sure that calls to WSAStartup and WSACleanup are balanced. class WSABalancer { public: WSABalancer(); @@ -30,22 +35,87 @@ class WSABalancer { }; #endif // _WIN32 +/// Manages a passive (i.e., listening) UNIX domain socket +/// +/// The ListeningSocket class encapsulates a UNIX domain socket that can listen +/// and accept incoming connections. ListeningSocket is portable and supports +/// Windows builds begining with Insider Build 17063. ListeningSocket is +/// designed for server-side operations, working alongside \p raw_socket_streams +/// that function as client connections. +/// +/// Usage example: +/// \code{.cpp} +/// std::string Path = "/path/to/socket" +/// Expected S = ListeningSocket::createUnix(Path); +/// +/// if (S) { +/// Expected> connection = S->accept(); +/// if (connection) { +/// // Use the accepted raw_socket_stream for communication. +/// } +/// } +/// \endcode +/// class ListeningSocket { - int FD; - std::string SocketPath; - ListeningSocket(int SocketFD, StringRef SocketPath); + + std::atomic FD; + std::string SocketPath; // Not modified after construction + + /// If a seperate thread calls ListeningSocket::shutdown, the ListeningSocket + /// file descriptor (FD) could be closed while ::poll is waiting for it to be + /// ready to perform a I/O operations. ::poll will continue to block even + /// after FD is closed so use a self-pipe mechanism to get ::poll to return + int PipeFD[2]; // Not modified after construction other then move constructor + + ListeningSocket(int SocketFD, StringRef SocketPath, int PipeFD[2]); + #ifdef _WIN32 WSABalancer _; #endif // _WIN32 public: + ~ListeningSocket(); + ListeningSocket(ListeningSocket &&LS); + ListeningSocket(const ListeningSocket &LS) = delete; + ListeningSocket &operator=(const ListeningSocket &) = delete; + + /// Closes the FD, unlinks the socket file, and writes to PipeFD. + /// + /// After the construction of the ListeningSocket, shutdown is signal safe if + /// it is called during the lifetime of the object. shutdown can be called + /// concurrently with ListeningSocket::accept as writing to PipeFD will cause + /// a blocking call to ::poll to return. + /// + /// Once shutdown is called there is no way to reinitialize ListeningSocket. + void shutdown(); + + /// Accepts an incoming connection on the listening socket. This method can + /// optionally either block until a connection is available or timeout after a + /// specified amount of time has passed. By default the method will block + /// until the socket has recieved a connection. + /// + /// \param Timeout An optional timeout duration in milliseconds. Setting + /// Timeout to -1 causes accept to block indefinitely + /// + Expected> + accept(std::chrono::milliseconds Timeout = std::chrono::milliseconds(-1)); + + /// Creates a listening socket bound to the specified file system path. + /// Handles the socket creation, binding, and immediately starts listening for + /// incoming connections. + /// + /// \param SocketPath The file system path where the socket will be created + /// \param MaxBacklog The max number of connections in a socket's backlog + /// static Expected createUnix( StringRef SocketPath, int MaxBacklog = llvm::hardware_concurrency().compute_thread_count()); - Expected> accept(); - ListeningSocket(ListeningSocket &&LS); - ~ListeningSocket(); }; + +//===----------------------------------------------------------------------===// +// raw_socket_stream +//===----------------------------------------------------------------------===// + class raw_socket_stream : public raw_fd_stream { uint64_t current_pos() const override { return 0; } #ifdef _WIN32 @@ -54,7 +124,7 @@ class raw_socket_stream : public raw_fd_stream { public: raw_socket_stream(int SocketFD); - /// Create a \p raw_socket_stream connected to the Unix domain socket at \p + /// Create a \p raw_socket_stream connected to the UNIX domain socket at \p /// SocketPath. static Expected> createConnectedUnix(StringRef SocketPath); diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 3208c63fb42d9..dd4e7d790bc6b 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -142,6 +142,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/include/llvm/TextAPI/InterfaceFile.h b/llvm/include/llvm/TextAPI/InterfaceFile.h index 10a37e3a0c2f3..23c27cb0f4745 100644 --- a/llvm/include/llvm/TextAPI/InterfaceFile.h +++ b/llvm/include/llvm/TextAPI/InterfaceFile.h @@ -299,9 +299,9 @@ class InterfaceFile { } /// Set the runpath search paths. - /// \param InputTarget The target applicable to runpath search path. /// \param RPath The name of runpath. - void addRPath(const Target &InputTarget, StringRef RPath); + /// \param InputTarget The target applicable to runpath search path. + void addRPath(StringRef RPath, const Target &InputTarget); /// Get the list of runpath search paths. /// diff --git a/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h b/llvm/include/llvm/Transforms/Instrumentation/LowerAllowCheckPass.h similarity index 73% rename from llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h rename to llvm/include/llvm/Transforms/Instrumentation/LowerAllowCheckPass.h index 58f6bbcec5dc9..af974818fec5f 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h +++ b/llvm/include/llvm/Transforms/Instrumentation/LowerAllowCheckPass.h @@ -1,4 +1,4 @@ -//===- RemoveTrapsPass.h ----------------------------------------*- C++ -*-===// +//===- LowerAllowCheckPass.h ------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_UBSANOPTIMIZATIONPASS_H -#define LLVM_TRANSFORMS_INSTRUMENTATION_UBSANOPTIMIZATIONPASS_H +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_LOWERALLOWCHECKPASS_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_LOWERALLOWCHECKPASS_H #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" @@ -22,9 +22,11 @@ namespace llvm { // This pass is responsible for removing optional traps, like llvm.ubsantrap // from the hot code. -class RemoveTrapsPass : public PassInfoMixin { +class LowerAllowCheckPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool IsRequested(); }; } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 187ace3a0cbed..345e09dce0b2b 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -372,15 +372,6 @@ RecurKind getMinMaxReductionRecurKind(Intrinsic::ID RdxID); /// Returns the comparison predicate used when expanding a min/max reduction. CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); -/// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we -/// are trying to match. In this pattern, we are only ever selecting between two -/// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a -/// loop invariant value. If any of lane value in \p Left, \p Right is not equal -/// to \p StartVal, select the loop invariant value. This is done by selecting -/// \p Right iff \p Left is equal to \p StartVal. -Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, - Value *Left, Value *Right); - /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. /// The Builder's fast-math-flags must be set to propagate the expected values. Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 581d354fc4766..7c725a3c1216c 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -86,12 +86,9 @@ template <> struct IRTraits { // SampleProfileProber. class PseudoProbeManager { DenseMap GUIDToProbeDescMap; - const ThinOrFullLTOPhase LTOPhase; public: - PseudoProbeManager(const Module &M, - ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None) - : LTOPhase(LTOPhase) { + PseudoProbeManager(const Module &M) { if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { for (const auto *Operand : FuncInfo->operands()) { @@ -140,13 +137,9 @@ class PseudoProbeManager { // be different and result in different checksums. So we should use the // state from the new (available_externally) function, which is saved in its // attribute. - assert((LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || - IsAvailableExternallyLinkage || !Desc || - profileIsHashMismatched(*Desc, Samples) == - F.hasFnAttribute("profile-checksum-mismatch")) && - "In post-link, profile checksum matching state doesn't match the " - "internal function's 'profile-checksum-mismatch' attribute."); - (void)LTOPhase; + // TODO: If the function's profile only exists as nested inlinee profile in + // a different module, we don't have the attr mismatch state(unknown), we + // need to fix it later. if (IsAvailableExternallyLinkage || !Desc) return !F.hasFnAttribute("profile-checksum-mismatch"); diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index 44d2ff18a6949..11cc6cfccea6a 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -38,7 +38,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeAAResultsWrapperPassPass(Registry); initializeGlobalsAAWrapperPassPass(Registry); initializeIVUsersWrapperPassPass(Registry); - initializeIntervalPartitionPass(Registry); initializeIRSimilarityIdentifierWrapperPassPass(Registry); initializeLazyBranchProbabilityInfoPassPass(Registry); initializeLazyBlockFrequencyInfoPassPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 35ea03f42f82b..474b8d20fde16 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -76,8 +76,6 @@ add_llvm_component_library(LLVMAnalysis InstructionPrecedenceTracking.cpp InstructionSimplify.cpp InteractiveModelRunner.cpp - Interval.cpp - IntervalPartition.cpp LazyBranchProbabilityInfo.cpp LazyBlockFrequencyInfo.cpp LazyCallGraph.cpp diff --git a/llvm/lib/Analysis/Interval.cpp b/llvm/lib/Analysis/Interval.cpp deleted file mode 100644 index f7fffcb3d5e62..0000000000000 --- a/llvm/lib/Analysis/Interval.cpp +++ /dev/null @@ -1,39 +0,0 @@ -//===- Interval.cpp - Interval class code ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the definition of the Interval class, which represents a -// partition of a control flow graph of some kind. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Interval.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Interval Implementation -//===----------------------------------------------------------------------===// - -void Interval::print(raw_ostream &OS) const { - OS << "-------------------------------------------------------------\n" - << "Interval Contents:\n"; - - // Print out all of the basic blocks in the interval... - for (const BasicBlock *Node : Nodes) - OS << *Node << "\n"; - - OS << "Interval Predecessors:\n"; - for (const BasicBlock *Predecessor : Predecessors) - OS << *Predecessor << "\n"; - - OS << "Interval Successors:\n"; - for (const BasicBlock *Successor : Successors) - OS << *Successor << "\n"; -} diff --git a/llvm/lib/Analysis/IntervalPartition.cpp b/llvm/lib/Analysis/IntervalPartition.cpp deleted file mode 100644 index d9620fd405bc2..0000000000000 --- a/llvm/lib/Analysis/IntervalPartition.cpp +++ /dev/null @@ -1,118 +0,0 @@ -//===- IntervalPartition.cpp - Interval Partition module code -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file contains the definition of the IntervalPartition class, which -// calculates and represent the interval partition of a function. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/IntervalPartition.h" -#include "llvm/Analysis/Interval.h" -#include "llvm/Analysis/IntervalIterator.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" -#include -#include - -using namespace llvm; - -char IntervalPartition::ID = 0; - -IntervalPartition::IntervalPartition() : FunctionPass(ID) { - initializeIntervalPartitionPass(*PassRegistry::getPassRegistry()); -} - -INITIALIZE_PASS(IntervalPartition, "intervals", - "Interval Partition Construction", true, true) - -//===----------------------------------------------------------------------===// -// IntervalPartition Implementation -//===----------------------------------------------------------------------===// - -// releaseMemory - Reset state back to before function was analyzed -void IntervalPartition::releaseMemory() { - for (Interval *I : Intervals) - delete I; - IntervalMap.clear(); - Intervals.clear(); - RootInterval = nullptr; -} - -void IntervalPartition::print(raw_ostream &O, const Module*) const { - for (const Interval *I : Intervals) - I->print(O); -} - -// addIntervalToPartition - Add an interval to the internal list of intervals, -// and then add mappings from all of the basic blocks in the interval to the -// interval itself (in the IntervalMap). -void IntervalPartition::addIntervalToPartition(Interval *I) { - Intervals.push_back(I); - - // Add mappings for all of the basic blocks in I to the IntervalPartition - for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end(); - It != End; ++It) - IntervalMap.insert(std::make_pair(*It, I)); -} - -// updatePredecessors - Interval generation only sets the successor fields of -// the interval data structures. After interval generation is complete, -// run through all of the intervals and propagate successor info as -// predecessor info. -void IntervalPartition::updatePredecessors(Interval *Int) { - BasicBlock *Header = Int->getHeaderNode(); - for (BasicBlock *Successor : Int->Successors) - getBlockInterval(Successor)->Predecessors.push_back(Header); -} - -// IntervalPartition ctor - Build the first level interval partition for the -// specified function... -bool IntervalPartition::runOnFunction(Function &F) { - // Pass false to intervals_begin because we take ownership of it's memory - function_interval_iterator I = intervals_begin(&F, false); - assert(I != intervals_end(&F) && "No intervals in function!?!?!"); - - addIntervalToPartition(RootInterval = *I); - - ++I; // After the first one... - - // Add the rest of the intervals to the partition. - for (function_interval_iterator E = intervals_end(&F); I != E; ++I) - addIntervalToPartition(*I); - - // Now that we know all of the successor information, propagate this to the - // predecessors for each block. - for (Interval *I : Intervals) - updatePredecessors(I); - return false; -} - -// IntervalPartition ctor - Build a reduced interval partition from an -// existing interval graph. This takes an additional boolean parameter to -// distinguish it from a copy constructor. Always pass in false for now. -IntervalPartition::IntervalPartition(IntervalPartition &IP, bool) - : FunctionPass(ID) { - assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!"); - - // Pass false to intervals_begin because we take ownership of it's memory - interval_part_interval_iterator I = intervals_begin(IP, false); - assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!"); - - addIntervalToPartition(RootInterval = *I); - - ++I; // After the first one... - - // Add the rest of the intervals to the partition. - for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I) - addIntervalToPartition(*I); - - // Now that we know all of the successor information, propagate this to the - // predecessors for each block. - for (Interval *I : Intervals) - updatePredecessors(I); -} diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index c25eede96a185..3bfc9700a1455 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1917,14 +1917,30 @@ isLoopVariantIndirectAddress(ArrayRef UnderlyingObjects, }); } -// Get the dependence distance, stride, type size in whether i is a write for +namespace { +struct DepDistanceStrideAndSizeInfo { + const SCEV *Dist; + uint64_t Stride; + uint64_t TypeByteSize; + bool AIsWrite; + bool BIsWrite; + + DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride, + uint64_t TypeByteSize, bool AIsWrite, + bool BIsWrite) + : Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize), + AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} +}; +} // namespace + +// Get the dependence distance, stride, type size and whether it is a write for // the dependence between A and B. Returns a DepType, if we can prove there's // no dependence or the analysis fails. Outlined to lambda to limit he scope // of various temporary variables, like A/BPtr, StrideA/BPtr and others. // Returns either the dependence result, if it could already be determined, or a -// tuple with (Distance, Stride, TypeSize, AIsWrite, BIsWrite). +// struct containing (Distance, Stride, TypeSize, AIsWrite, BIsWrite). static std::variant> + DepDistanceStrideAndSizeInfo> getDependenceDistanceStrideAndSize( const AccessAnalysis::MemAccessInfo &A, Instruction *AInst, const AccessAnalysis::MemAccessInfo &B, Instruction *BInst, @@ -1993,7 +2009,8 @@ getDependenceDistanceStrideAndSize( if (!HasSameSize) TypeByteSize = 0; uint64_t Stride = std::abs(StrideAPtr); - return std::make_tuple(Dist, Stride, TypeByteSize, AIsWrite, BIsWrite); + return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite, + BIsWrite); } MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( @@ -2012,7 +2029,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( return std::get(Res); const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] = - std::get>(Res); + std::get(Res); bool HasSameSize = TypeByteSize > 0; ScalarEvolution &SE = *PSE.getSE(); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 515b9d0744f6e..e030b9fc7dac4 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -944,10 +944,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, // Calculate the multiplicative inverse of K! / 2^T; // this multiplication factor will perform the exact division by // K! / 2^T. - APInt Mod = APInt::getSignedMinValue(W+1); - APInt MultiplyFactor = OddFactorial.zext(W+1); - MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); - MultiplyFactor = MultiplyFactor.trunc(W); + APInt MultiplyFactor = OddFactorial.multiplicativeInverse(); // Calculate the product, at width T+W IntegerType *CalculationTy = IntegerType::get(SE.getContext(), @@ -10086,10 +10083,8 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent // (N / D) in general. The inverse itself always fits into BW bits, though, // so we immediately truncate it. - APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D - APInt Mod(BW + 1, 0); - Mod.setBit(BW - Mult2); // Mod = N / D - APInt I = AD.multiplicativeInverse(Mod).trunc(BW); + APInt AD = A.lshr(Mult2).trunc(BW - Mult2); // AD = A / D + APInt I = AD.multiplicativeInverse().zext(BW); // 4. Compute the minimum unsigned root of the equation: // I * (B / D) mod (N / D) diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 5f933b4587843..33c899fe88999 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -916,9 +916,9 @@ InstructionCost TargetTransformInfo::getAltInstrCost( InstructionCost TargetTransformInfo::getShuffleCost( ShuffleKind Kind, VectorType *Ty, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args) const { - InstructionCost Cost = - TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args); + ArrayRef Args, const Instruction *CxtI) const { + InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, + Index, SubTp, Args, CxtI); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b5e8a1d22f264..ca48cfe773815 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -648,6 +648,7 @@ static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, auto m_V = m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V))); + Value *Y; const APInt *Mask, *C; uint64_t ShAmt; switch (Pred) { @@ -656,16 +657,18 @@ static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, if (match(LHS, m_V) && match(RHS, m_APInt(C))) { Known = Known.unionWith(KnownBits::makeConstant(*C)); // assume(V & Mask = C) - } else if (match(LHS, m_And(m_V, m_APInt(Mask))) && + } else if (match(LHS, m_c_And(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) { // For one bits in Mask, we can propagate bits from C to V. - Known.Zero |= ~*C & *Mask; - Known.One |= *C & *Mask; + Known.One |= *C; + if (match(Y, m_APInt(Mask))) + Known.Zero |= ~*C & *Mask; // assume(V | Mask = C) - } else if (match(LHS, m_Or(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { + } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) { // For zero bits in Mask, we can propagate bits from C to V. - Known.Zero |= ~*C & ~*Mask; - Known.One |= *C & ~*Mask; + Known.Zero |= ~*C; + if (match(Y, m_APInt(Mask))) + Known.One |= *C & ~*Mask; // assume(V ^ Mask = C) } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { @@ -1724,26 +1727,25 @@ static void computeKnownBitsFromOperator(const Operator *I, const Value *Vec = I->getOperand(0); const Value *Elt = I->getOperand(1); auto *CIdx = dyn_cast(I->getOperand(2)); - // Early out if the index is non-constant or out-of-range. unsigned NumElts = DemandedElts.getBitWidth(); - if (!CIdx || CIdx->getValue().uge(NumElts)) { - Known.resetAll(); - return; + APInt DemandedVecElts = DemandedElts; + bool NeedsElt = true; + // If we know the index we are inserting too, clear it from Vec check. + if (CIdx && CIdx->getValue().ult(NumElts)) { + DemandedVecElts.clearBit(CIdx->getZExtValue()); + NeedsElt = DemandedElts[CIdx->getZExtValue()]; } + Known.One.setAllBits(); Known.Zero.setAllBits(); - unsigned EltIdx = CIdx->getZExtValue(); - // Do we demand the inserted element? - if (DemandedElts[EltIdx]) { + if (NeedsElt) { computeKnownBits(Elt, Known, Depth + 1, Q); // If we don't know any bits, early out. if (Known.isUnknown()) break; } - // We don't need the base vector element that has been inserted. - APInt DemandedVecElts = DemandedElts; - DemandedVecElts.clearBit(EltIdx); - if (!!DemandedVecElts) { + + if (!DemandedVecElts.isZero()) { computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q); Known = Known.intersectWith(Known2); } @@ -2813,6 +2815,8 @@ static bool isKnownNonZeroFromOperator(const Operator *I, case Intrinsic::bswap: case Intrinsic::ctpop: return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q); + // NB: We don't do usub_sat here as in any case we can prove its + // non-zero, we will fold it to `sub nuw` in InstCombine. case Intrinsic::ssub_sat: return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, II->getArgOperand(0), II->getArgOperand(1)); @@ -4498,12 +4502,18 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); - if (auto *CFP = dyn_cast_or_null(V)) { + if (auto *CFP = dyn_cast(V)) { Known.KnownFPClasses = CFP->getValueAPF().classify(); Known.SignBit = CFP->isNegative(); return; } + if (isa(V)) { + Known.KnownFPClasses = fcPosZero; + Known.SignBit = false; + return; + } + // Try to handle fixed width vector constants auto *VFVTy = dyn_cast(V->getType()); const Constant *CV = dyn_cast(V); @@ -4515,6 +4525,9 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, // For vectors, verify that each element is not NaN. unsigned NumElts = VFVTy->getNumElements(); for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + Constant *Elt = CV->getAggregateElement(i); if (!Elt) { Known = KnownFPClass(); @@ -5352,14 +5365,17 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, const Value *Vec = Op->getOperand(0); const Value *Elt = Op->getOperand(1); auto *CIdx = dyn_cast(Op->getOperand(2)); - // Early out if the index is non-constant or out-of-range. unsigned NumElts = DemandedElts.getBitWidth(); - if (!CIdx || CIdx->getValue().uge(NumElts)) - return; + APInt DemandedVecElts = DemandedElts; + bool NeedsElt = true; + // If we know the index we are inserting to, clear it from Vec check. + if (CIdx && CIdx->getValue().ult(NumElts)) { + DemandedVecElts.clearBit(CIdx->getZExtValue()); + NeedsElt = DemandedElts[CIdx->getZExtValue()]; + } - unsigned EltIdx = CIdx->getZExtValue(); // Do we demand the inserted element? - if (DemandedElts[EltIdx]) { + if (NeedsElt) { computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q); // If we don't know any bits, early out. if (Known.isUnknown()) @@ -5368,10 +5384,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, Known.KnownFPClasses = fcNone; } - // We don't need the base vector element that has been inserted. - APInt DemandedVecElts = DemandedElts; - DemandedVecElts.clearBit(EltIdx); - if (!!DemandedVecElts) { + // Do we need anymore elements from Vec? + if (!DemandedVecElts.isZero()) { KnownFPClass Known2; computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, Depth + 1, Q); @@ -8390,8 +8404,7 @@ bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P, /// Return true if "icmp Pred LHS RHS" is always true. static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, - const Value *RHS, const DataLayout &DL, - unsigned Depth) { + const Value *RHS) { if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) return true; @@ -8403,8 +8416,26 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, const APInt *C; // LHS s<= LHS +_{nsw} C if C >= 0 - if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) + // LHS s<= LHS | C if C >= 0 + if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))) || + match(RHS, m_Or(m_Specific(LHS), m_APInt(C)))) return !C->isNegative(); + + // LHS s<= smax(LHS, V) for any V + if (match(RHS, m_c_SMax(m_Specific(LHS), m_Value()))) + return true; + + // smin(RHS, V) s<= RHS for any V + if (match(LHS, m_c_SMin(m_Specific(RHS), m_Value()))) + return true; + + // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB) + const Value *X; + const APInt *CLHS, *CRHS; + if (match(LHS, m_NSWAddLike(m_Value(X), m_APInt(CLHS))) && + match(RHS, m_NSWAddLike(m_Specific(X), m_APInt(CRHS)))) + return CLHS->sle(*CRHS); + return false; } @@ -8414,34 +8445,36 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, cast(RHS)->hasNoUnsignedWrap()) return true; + // LHS u<= LHS | V for any V + if (match(RHS, m_c_Or(m_Specific(LHS), m_Value()))) + return true; + + // LHS u<= umax(LHS, V) for any V + if (match(RHS, m_c_UMax(m_Specific(LHS), m_Value()))) + return true; + // RHS >> V u<= RHS for any V if (match(LHS, m_LShr(m_Specific(RHS), m_Value()))) return true; - // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) - auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B, - const Value *&X, - const APInt *&CA, const APInt *&CB) { - if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && - match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) - return true; + // RHS u/ C_ugt_1 u<= RHS + const APInt *C; + if (match(LHS, m_UDiv(m_Specific(RHS), m_APInt(C))) && C->ugt(1)) + return true; - // If X & C == 0 then (X | C) == X +_{nuw} C - if (match(A, m_Or(m_Value(X), m_APInt(CA))) && - match(B, m_Or(m_Specific(X), m_APInt(CB)))) { - KnownBits Known(CA->getBitWidth()); - computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, - /*CxtI*/ nullptr, /*DT*/ nullptr); - if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) - return true; - } + // RHS & V u<= RHS for any V + if (match(LHS, m_c_And(m_Specific(RHS), m_Value()))) + return true; - return false; - }; + // umin(RHS, V) u<= RHS for any V + if (match(LHS, m_c_UMin(m_Specific(RHS), m_Value()))) + return true; + // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) const Value *X; const APInt *CLHS, *CRHS; - if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) + if (match(LHS, m_NUWAddLike(m_Value(X), m_APInt(CLHS))) && + match(RHS, m_NUWAddLike(m_Specific(X), m_APInt(CRHS)))) return CLHS->ule(*CRHS); return false; @@ -8453,37 +8486,36 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, /// ALHS ARHS" is true. Otherwise, return std::nullopt. static std::optional isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, - const Value *ARHS, const Value *BLHS, const Value *BRHS, - const DataLayout &DL, unsigned Depth) { + const Value *ARHS, const Value *BLHS, const Value *BRHS) { switch (Pred) { default: return std::nullopt; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: - if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && - isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) + if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS) && + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS)) return true; return std::nullopt; case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: - if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS, DL, Depth) && - isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS, DL, Depth)) + if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS) && + isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS)) return true; return std::nullopt; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: - if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && - isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) + if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS)) return true; return std::nullopt; case CmpInst::ICMP_UGT: case CmpInst::ICMP_UGE: - if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS, DL, Depth) && - isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS, DL, Depth)) + if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS) && + isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS)) return true; return std::nullopt; } @@ -8527,7 +8559,7 @@ static std::optional isImpliedCondICmps(const ICmpInst *LHS, CmpInst::Predicate RPred, const Value *R0, const Value *R1, const DataLayout &DL, - bool LHSIsTrue, unsigned Depth) { + bool LHSIsTrue) { Value *L0 = LHS->getOperand(0); Value *L1 = LHS->getOperand(1); @@ -8574,7 +8606,7 @@ static std::optional isImpliedCondICmps(const ICmpInst *LHS, return LPred == RPred; if (LPred == RPred) - return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth); + return isImpliedCondOperands(LPred, L0, L1, R0, R1); return std::nullopt; } @@ -8636,8 +8668,7 @@ llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, // Both LHS and RHS are icmps. const ICmpInst *LHSCmp = dyn_cast(LHS); if (LHSCmp) - return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, - Depth); + return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue); /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect /// the RHS to be an icmp. @@ -9276,11 +9307,17 @@ void llvm::findValuesAffectedByCondition( if (ICmpInst::isEquality(Pred)) { if (match(B, m_ConstantInt())) { + Value *Y; // (X & C) or (X | C) or (X ^ C). // (X << C) or (X >>_s C) or (X >>_u C). if (match(A, m_BitwiseLogic(m_Value(X), m_ConstantInt())) || match(A, m_Shift(m_Value(X), m_ConstantInt()))) AddAffected(X); + else if (match(A, m_And(m_Value(X), m_Value(Y))) || + match(A, m_Or(m_Value(X), m_Value(Y)))) { + AddAffected(X); + AddAffected(Y); + } } } else { // Handle (A + C1) u< C2, which is the canonical form of diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index fe49e52ae4283..f546e05a5d37d 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -63,6 +63,9 @@ static cl::opt AllowIncompleteIR( "metadata will be dropped)")); extern llvm::cl::opt UseNewDbgInfoFormat; +extern cl::opt PreserveInputDbgFormat; +extern bool WriteNewDbgInfoFormatToBitcode; +extern cl::opt WriteNewDbgInfoFormat; static std::string getTypeString(Type *T) { std::string Result; @@ -71,12 +74,20 @@ static std::string getTypeString(Type *T) { return Tmp.str(); } -// Currently, we should always process modules in the old debug info format by -// default regardless of the module's format in IR; convert it to the old format -// here. -bool finalizeDebugInfoFormat(Module *M) { - if (M) +// Whatever debug info format we parsed, we should convert to the expected debug +// info format immediately afterwards. +bool LLParser::finalizeDebugInfoFormat(Module *M) { + // We should have already returned an error if we observed both intrinsics and + // records in this IR. + assert(!(SeenNewDbgInfoFormat && SeenOldDbgInfoFormat) && + "Mixed debug intrinsics/records seen without a parsing error?"); + if (PreserveInputDbgFormat == cl::boolOrDefault::BOU_TRUE) { + UseNewDbgInfoFormat = SeenNewDbgInfoFormat; + WriteNewDbgInfoFormatToBitcode = SeenNewDbgInfoFormat; + WriteNewDbgInfoFormat = SeenNewDbgInfoFormat; + } else if (M) { M->setIsNewDbgInfoFormat(false); + } return false; } @@ -6511,10 +6522,10 @@ bool LLParser::parseBasicBlock(PerFunctionState &PFS) { if (SeenOldDbgInfoFormat) return error(Lex.getLoc(), "debug record should not appear in a module " "containing debug info intrinsics"); + if (!SeenNewDbgInfoFormat) + M->setNewDbgInfoFormatFlag(true); SeenNewDbgInfoFormat = true; Lex.Lex(); - if (!M->IsNewDbgInfoFormat) - M->convertToNewDbgValues(); DbgRecord *DR; if (parseDebugRecord(DR, PFS)) @@ -6805,6 +6816,7 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB, } // Casts. + case lltok::kw_uitofp: case lltok::kw_zext: { bool NonNeg = EatIfPresent(lltok::kw_nneg); bool Res = parseCast(Inst, PFS, KeywordVal); @@ -6832,7 +6844,6 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_fpext: case lltok::kw_bitcast: case lltok::kw_addrspacecast: - case lltok::kw_uitofp: case lltok::kw_sitofp: case lltok::kw_fptoui: case lltok::kw_fptosi: @@ -7928,6 +7939,8 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS, return error(CallLoc, "llvm.dbg intrinsic should not appear in a module " "using non-intrinsic debug info"); } + if (!SeenOldDbgInfoFormat) + M->setNewDbgInfoFormatFlag(false); SeenOldDbgInfoFormat = true; } CI->setAttributes(PAL); @@ -8227,6 +8240,8 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { return tokError("atomicrmw cannot be unordered"); if (!Ptr->getType()->isPointerTy()) return error(PtrLoc, "atomicrmw operand must be a pointer"); + if (Val->getType()->isScalableTy()) + return error(ValLoc, "atomicrmw operand may not be scalable"); if (Operation == AtomicRMWInst::Xchg) { if (!Val->getType()->isIntegerTy() && @@ -8238,7 +8253,7 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { " operand must be an integer, floating point, or pointer type"); } } else if (IsFP) { - if (!Val->getType()->isFloatingPointTy()) { + if (!Val->getType()->isFPOrFPVectorTy()) { return error(ValLoc, "atomicrmw " + AtomicRMWInst::getOperationName(Operation) + " operand must be a floating point type"); diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index aa6c9c95ca240..92c349525aff5 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -108,6 +108,9 @@ cl::opt LoadBitcodeIntoNewDbgInfoFormat( "load-bitcode-into-experimental-debuginfo-iterators", cl::Hidden, cl::desc("Load bitcode directly into the new debug info format (regardless " "of input format)")); +extern cl::opt PreserveInputDbgFormat; +extern bool WriteNewDbgInfoFormatToBitcode; +extern cl::opt WriteNewDbgInfoFormat; namespace { @@ -682,6 +685,11 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { /// (e.g.) blockaddress forward references. bool WillMaterializeAllForwardRefs = false; + /// Tracks whether we have seen debug intrinsics or records in this bitcode; + /// seeing both in a single module is currently a fatal error. + bool SeenDebugIntrinsic = false; + bool SeenDebugRecord = false; + bool StripDebugInfo = false; TBAAVerifier TBAAVerifyHelper; @@ -3774,7 +3782,11 @@ Error BitcodeReader::globalCleanup() { for (Function &F : *TheModule) { MDLoader->upgradeDebugIntrinsics(F); Function *NewFn; - if (UpgradeIntrinsicFunction(&F, NewFn)) + // If PreserveInputDbgFormat=true, then we don't know whether we want + // intrinsics or records, and we won't perform any conversions in either + // case, so don't upgrade intrinsics to records. + if (UpgradeIntrinsicFunction( + &F, NewFn, PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE)) UpgradedIntrinsics[&F] = NewFn; // Look for functions that rely on old function attribute behavior. UpgradeFunctionAttributes(F); @@ -4301,10 +4313,13 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata, ParserCallbacks Callbacks) { // Load directly into RemoveDIs format if LoadBitcodeIntoNewDbgInfoFormat - // has been set to true (default action: load into the old debug format). - TheModule->IsNewDbgInfoFormat = - UseNewDbgInfoFormat && - LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_TRUE; + // has been set to true and we aren't attempting to preserve the existing + // format in the bitcode (default action: load into the old debug format). + if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE) { + TheModule->IsNewDbgInfoFormat = + UseNewDbgInfoFormat && + LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_TRUE; + } this->ValueTypeCallback = std::move(Callbacks.ValueType); if (ResumeBit) { @@ -5024,7 +5039,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } if (OpNum < Record.size()) { - if (Opc == Instruction::ZExt) { + if (Opc == Instruction::ZExt || Opc == Instruction::UIToFP) { if (Record[OpNum] & (1 << bitc::PNNI_NON_NEG)) cast(I)->setNonNeg(true); } else if (Opc == Instruction::Trunc) { @@ -6453,6 +6468,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { case bitc::FUNC_CODE_DEBUG_RECORD_ASSIGN: { // DbgVariableRecords are placed after the Instructions that they are // attached to. + SeenDebugRecord = true; Instruction *Inst = getLastInstruction(); if (!Inst) return error("Invalid dbg record: missing instruction"); @@ -6613,6 +6629,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { TCK = CallInst::TCK_NoTail; cast(I)->setTailCallKind(TCK); cast(I)->setAttributes(PAL); + if (isa(I)) + SeenDebugIntrinsic = true; if (Error Err = propagateAttributeTypes(cast(I), ArgTyIDs)) { I->deleteValue(); return Err; @@ -6801,20 +6819,48 @@ Error BitcodeReader::materialize(GlobalValue *GV) { if (Error JumpFailed = Stream.JumpToBit(DFII->second)) return JumpFailed; - // Set the debug info mode to "new", possibly creating a mismatch between - // module and function debug modes. This is okay because we'll convert - // everything back to the old mode after parsing if needed. - // FIXME: Remove this once all tools support RemoveDIs. + // Regardless of the debug info format we want to end up in, we need + // IsNewDbgInfoFormat=true to construct any debug records seen in the bitcode. F->IsNewDbgInfoFormat = true; if (Error Err = parseFunctionBody(F)) return Err; F->setIsMaterializable(false); - // Convert new debug info records into intrinsics. - // FIXME: Remove this once all tools support RemoveDIs. - if (!F->getParent()->IsNewDbgInfoFormat) - F->convertFromNewDbgValues(); + // All parsed Functions should load into the debug info format dictated by the + // Module, unless we're attempting to preserve the input debug info format. + if (SeenDebugIntrinsic && SeenDebugRecord) + return error("Mixed debug intrinsics and debug records in bitcode module!"); + if (PreserveInputDbgFormat == cl::boolOrDefault::BOU_TRUE) { + bool SeenAnyDebugInfo = SeenDebugIntrinsic || SeenDebugRecord; + bool NewDbgInfoFormatDesired = + SeenAnyDebugInfo ? SeenDebugRecord : F->getParent()->IsNewDbgInfoFormat; + if (SeenAnyDebugInfo) { + UseNewDbgInfoFormat = SeenDebugRecord; + WriteNewDbgInfoFormatToBitcode = SeenDebugRecord; + WriteNewDbgInfoFormat = SeenDebugRecord; + } + // If the module's debug info format doesn't match the observed input + // format, then set its format now; we don't need to call the conversion + // function because there must be no existing intrinsics to convert. + // Otherwise, just set the format on this function now. + if (NewDbgInfoFormatDesired != F->getParent()->IsNewDbgInfoFormat) + F->getParent()->setNewDbgInfoFormatFlag(NewDbgInfoFormatDesired); + else + F->setNewDbgInfoFormatFlag(NewDbgInfoFormatDesired); + } else { + // If we aren't preserving formats, we use the Module flag to get our + // desired format instead of reading flags, in case we are lazy-loading and + // the format of the module has been changed since it was set by the flags. + // We only need to convert debug info here if we have debug records but + // desire the intrinsic format; everything else is a no-op or handled by the + // autoupgrader. + bool ModuleIsNewDbgInfoFormat = F->getParent()->IsNewDbgInfoFormat; + if (ModuleIsNewDbgInfoFormat || !SeenDebugRecord) + F->setNewDbgInfoFormatFlag(ModuleIsNewDbgInfoFormat); + else + F->setIsNewDbgInfoFormat(ModuleIsNewDbgInfoFormat); + } if (StripDebugInfo) stripDebugInfo(*F); diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp index 2e8e7d0a88af0..5b679fd3b9f92 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -32,14 +32,13 @@ using namespace llvm; void AccelTableBase::computeBucketCount() { - // First get the number of unique hashes. SmallVector Uniques; Uniques.reserve(Entries.size()); for (const auto &E : Entries) Uniques.push_back(E.second.HashValue); - - std::tie(BucketCount, UniqueHashCount) = - llvm::dwarf::getDebugNamesBucketAndHashCount(Uniques); + llvm::sort(Uniques); + UniqueHashCount = llvm::unique(Uniques) - Uniques.begin(); + BucketCount = dwarf::getDebugNamesBucketCount(UniqueHashCount); } void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) { diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 293bb5a3c6f6e..721d144d7f4c6 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -927,6 +927,27 @@ void AsmPrinter::emitDebugValue(const MCExpr *Value, unsigned Size) const { void AsmPrinter::emitFunctionHeaderComment() {} +void AsmPrinter::emitFunctionPrefix(ArrayRef Prefix) { + const Function &F = MF->getFunction(); + if (!MAI->hasSubsectionsViaSymbols()) { + for (auto &C : Prefix) + emitGlobalConstant(F.getParent()->getDataLayout(), C); + return; + } + // Preserving prefix-like data on platforms which use subsections-via-symbols + // is a bit tricky. Here we introduce a symbol for the prefix-like data + // and use the .alt_entry attribute to mark the function's real entry point + // as an alternative entry point to the symbol that precedes the function.. + OutStreamer->emitLabel(OutContext.createLinkerPrivateTempSymbol()); + + for (auto &C : Prefix) { + emitGlobalConstant(F.getParent()->getDataLayout(), C); + } + + // Emit an .alt_entry directive for the actual function symbol. + OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); +} + /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::emitFunctionHeader() { @@ -966,23 +987,8 @@ void AsmPrinter::emitFunctionHeader() { OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold); // Emit the prefix data. - if (F.hasPrefixData()) { - if (MAI->hasSubsectionsViaSymbols()) { - // Preserving prefix data on platforms which use subsections-via-symbols - // is a bit tricky. Here we introduce a symbol for the prefix data - // and use the .alt_entry attribute to mark the function's real entry point - // as an alternative entry point to the prefix-data symbol. - MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol(); - OutStreamer->emitLabel(PrefixSym); - - emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); - - // Emit an .alt_entry directive for the actual function symbol. - OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); - } else { - emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); - } - } + if (F.hasPrefixData()) + emitFunctionPrefix({F.getPrefixData()}); // Emit KCFI type information before patchable-function-prefix nops. emitKCFITypeId(*MF); @@ -1014,8 +1020,7 @@ void AsmPrinter::emitFunctionHeader() { auto *PrologueSig = mdconst::extract(MD->getOperand(0)); auto *TypeHash = mdconst::extract(MD->getOperand(1)); - emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig); - emitGlobalConstant(F.getParent()->getDataLayout(), TypeHash); + emitFunctionPrefix({PrologueSig, TypeHash}); } if (isVerbose()) { diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index 09177950fc824..ec5fc06d01fb1 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/UniqueVector.h" -#include "llvm/Analysis/Interval.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index d5db79df68622..0aa89ea94335d 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -562,9 +562,9 @@ static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *&Success, Value *&NewLoaded) { Type *OrigTy = NewVal->getType(); - // This code can go away when cmpxchg supports FP types. + // This code can go away when cmpxchg supports FP and vector types. assert(!OrigTy->isPointerTy()); - bool NeedBitcast = OrigTy->isFloatingPointTy(); + bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy(); if (NeedBitcast) { IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits()); NewVal = Builder.CreateBitCast(NewVal, IntTy); @@ -731,7 +731,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, unsigned ValueSize = DL.getTypeStoreSize(ValueType); PMV.ValueType = PMV.IntValueType = ValueType; - if (PMV.ValueType->isFloatingPointTy()) + if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy()) PMV.IntValueType = Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits()); diff --git a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp index 62135304e859b..938dda37b9f63 100644 --- a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp +++ b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp @@ -116,7 +116,8 @@ static void expandFPToI(Instruction *FPToI) { // fp80 conversion is implemented by fpext to fp128 first then do the // conversion. FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth; - unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth); + unsigned FloatWidth = + PowerOf2Ceil(FloatVal->getType()->getScalarSizeInBits()); unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1; unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1; Value *ImplicitBit = Builder.CreateShl( @@ -319,6 +320,7 @@ static void expandIToFP(Instruction *IToFP) { // FIXME: As there is no related builtins added in compliler-rt, // here currently utilized the fp32 <-> fp16 lib calls to implement. FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth; + FPMantissaWidth = FPMantissaWidth == 7 ? 23 : FPMantissaWidth; unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth); bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP; @@ -547,7 +549,7 @@ static void expandIToFP(Instruction *IToFP) { Value *A40 = Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext())); A4 = Builder.CreateFPTrunc(A40, IToFP->getType()); - } else if (IToFP->getType()->isHalfTy()) { + } else if (IToFP->getType()->isHalfTy() || IToFP->getType()->isBFloatTy()) { // Deal with "half" situation. This is a workaround since we don't have // floattihf.c currently as referring. Value *A40 = diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 062132c8304b0..40c5119ee7fb3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -597,8 +597,8 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, UseMI.getOpcode() == TargetOpcode::G_ZEXT || (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) { const auto &MMO = LoadMI->getMMO(); - // For atomics, only form anyextending loads. - if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT) + // Don't do anything for atomics. + if (MMO.isAtomic()) continue; // Check for legality. if (!isPreLegalize()) { @@ -2925,8 +2925,10 @@ bool CombinerHelper::matchCombineInsertVecElts( } return true; } - // If we didn't end in a G_IMPLICIT_DEF, bail out. - return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF; + // If we didn't end in a G_IMPLICIT_DEF and the source is not fully + // overwritten, bail out. + return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF || + all_of(MatchInfo, [](Register Reg) { return !!Reg; }); } void CombinerHelper::applyCombineInsertVecElts( @@ -5201,10 +5203,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { // Calculate the multiplicative inverse modulo BW. // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = Divisor.getBitWidth(); - APInt Factor = Divisor.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); + APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0)); Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0)); return true; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 47e980e05281f..312e564f5d802 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3087,7 +3087,21 @@ bool IRTranslator::translateInsertElement(const User &U, Register Res = getOrCreateVReg(U); Register Val = getOrCreateVReg(*U.getOperand(0)); Register Elt = getOrCreateVReg(*U.getOperand(1)); - Register Idx = getOrCreateVReg(*U.getOperand(2)); + unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits(); + Register Idx; + if (auto *CI = dyn_cast(U.getOperand(2))) { + if (CI->getBitWidth() != PreferredVecIdxWidth) { + APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth); + auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx); + Idx = getOrCreateVReg(*NewIdxCI); + } + } + if (!Idx) + Idx = getOrCreateVReg(*U.getOperand(2)); + if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) { + const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth); + Idx = MIRBuilder.buildZExtOrTrunc(VecIdxTy, Idx).getReg(0); + } MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx); return true; } diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 4410fb7ecd23b..b2114c250ac09 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1137,7 +1137,6 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( MachineFunction *MF = getParent(); MachineBasicBlock *PrevFallthrough = getNextNode(); - DebugLoc DL; // FIXME: this is nowhere MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); NMBB->setCallFrameSize(Succ->getCallFrameSize()); @@ -1218,6 +1217,15 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( SlotIndexUpdateDelegate SlotUpdater(*MF, Indexes); SmallVector Cond; const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); + + // In original 'this' BB, there must be a branch instruction targeting at + // Succ. We can not find it out since currently getBranchDestBlock was not + // implemented for all targets. However, if the merged DL has column or line + // number, the scope and non-zero column and line number is same with that + // branch instruction so we can safely use it. + DebugLoc DL, MergedDL = findBranchDebugLoc(); + if (MergedDL && (MergedDL.getLine() || MergedDL.getCol())) + DL = MergedDL; TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL); } diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index b9c6765be445a..9305040678324 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1247,7 +1247,7 @@ class HighRegisterPressureDetector { for (auto &MI : *OrigMBB) { if (MI.isDebugInstr()) continue; - for (auto Use : ROMap[&MI].Uses) { + for (auto &Use : ROMap[&MI].Uses) { auto Reg = Use.RegUnit; // Ignore the variable that appears only on one side of phi instruction // because it's used only at the first iteration. @@ -1345,7 +1345,7 @@ class HighRegisterPressureDetector { DenseMap LastUseMI; for (MachineInstr *MI : llvm::reverse(OrderedInsts)) { - for (auto Use : ROMap.find(MI)->getSecond().Uses) { + for (auto &Use : ROMap.find(MI)->getSecond().Uses) { auto Reg = Use.RegUnit; if (!TargetRegs.contains(Reg)) continue; diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index fd7ea28426470..0744089486313 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -55,6 +55,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -1788,6 +1789,60 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); + LLT IdxTy = MRI->getType(MI->getOperand(2).getReg()); + + if (!DstTy.isScalar() && !DstTy.isPointer()) { + report("Destination type must be a scalar or pointer", MI); + break; + } + + if (!SrcTy.isVector()) { + report("First source must be a vector", MI); + break; + } + + auto TLI = MF->getSubtarget().getTargetLowering(); + if (IdxTy.getSizeInBits() != + TLI->getVectorIdxTy(MF->getDataLayout()).getFixedSizeInBits()) { + report("Index type must match VectorIdxTy", MI); + break; + } + + break; + } + case TargetOpcode::G_INSERT_VECTOR_ELT: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT VecTy = MRI->getType(MI->getOperand(1).getReg()); + LLT ScaTy = MRI->getType(MI->getOperand(2).getReg()); + LLT IdxTy = MRI->getType(MI->getOperand(3).getReg()); + + if (!DstTy.isVector()) { + report("Destination type must be a vector", MI); + break; + } + + if (VecTy != DstTy) { + report("Destination type and vector type must match", MI); + break; + } + + if (!ScaTy.isScalar() && !ScaTy.isPointer()) { + report("Inserted element must be a scalar or pointer", MI); + break; + } + + auto TLI = MF->getSubtarget().getTargetLowering(); + if (IdxTy.getSizeInBits() != + TLI->getVectorIdxTy(MF->getDataLayout()).getFixedSizeInBits()) { + report("Index type must match VectorIdxTy", MI); + break; + } + + break; + } case TargetOpcode::G_DYN_STACKALLOC: { const MachineOperand &DstOp = MI->getOperand(0); const MachineOperand &AllocOp = MI->getOperand(1); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0a473180538a5..8fe074666a3dc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1164,19 +1164,20 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N01 = N0.getOperand(1); if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { + SDNodeFlags NewFlags; + if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && + Flags.hasNoUnsignedWrap()) + NewFlags.setNoUnsignedWrap(true); + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) - return DAG.getNode(Opc, DL, VT, N00, OpNode); + return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags); return SDValue(); } if (TLI.isReassocProfitable(DAG, N0, N1)) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - SDNodeFlags NewFlags; - if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && - Flags.hasNoUnsignedWrap()) - NewFlags.setNoUnsignedWrap(true); SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags); return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags); } @@ -11823,8 +11824,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { !MST->isCompressingStore() && !MST->isTruncatingStore()) return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(), MST->getBasePtr(), MST->getPointerInfo(), - MST->getOriginalAlign(), MachineMemOperand::MOStore, - MST->getAAInfo()); + MST->getOriginalAlign(), + MST->getMemOperand()->getFlags(), MST->getAAInfo()); // Try transforming N to an indexed store. if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) @@ -11961,7 +11962,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { SDValue NewLd = DAG.getLoad( N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(), MLD->getPointerInfo(), MLD->getOriginalAlign(), - MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges()); + MLD->getMemOperand()->getFlags(), MLD->getAAInfo(), MLD->getRanges()); return CombineTo(N, NewLd, NewLd.getValue(1)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index a8b1f41ee40d9..7685bc73cf965 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -61,7 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif report_fatal_error("Do not know how to soften the result of this " "operator!"); - + case ISD::EXTRACT_ELEMENT: R = SoftenFloatRes_EXTRACT_ELEMENT(N); break; case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; @@ -258,6 +258,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) { } } +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_ELEMENT(SDNode *N) { + SDValue Src = N->getOperand(0); + assert(Src.getValueType() == MVT::ppcf128 && + "In floats only ppcf128 can be extracted by element!"); + return DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(N), + N->getValueType(0).changeTypeToInteger(), + DAG.getBitcast(MVT::i128, Src), N->getOperand(1)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e08acd36b41d4..919c0d4fd2007 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -530,6 +530,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(SDNode *N); + SDValue SoftenFloatRes_EXTRACT_ELEMENT(SDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FABS(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 618bdee7f4053..3f69f7ad54477 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3882,7 +3882,11 @@ void SelectionDAGBuilder::visitUIToFP(const User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); + SDNodeFlags Flags; + if (auto *PNI = dyn_cast(&I)) + Flags.setNonNeg(PNI->hasNonNeg()); + + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N, Flags)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { @@ -4754,8 +4758,12 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, EVT VT = Src0.getValueType(); + auto MMOFlags = MachineMemOperand::MOStore; + if (I.hasMetadata(LLVMContext::MD_nontemporal)) + MMOFlags |= MachineMemOperand::MONonTemporal; + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, @@ -4924,8 +4932,12 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + auto MMOFlags = MachineMemOperand::MOLoad; + if (I.hasMetadata(LLVMContext::MD_nontemporal)) + MMOFlags |= MachineMemOperand::MONonTemporal; + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MachinePointerInfo(PtrOperand), MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment, AAInfo, Ranges); SDValue Load = diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5e053f97675d7..409d66adfd67d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6071,11 +6071,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, Divisor.ashrInPlace(Shift); UseSRA = true; } - // Calculate the multiplicative inverse, using Newton's method. - APInt t; - APInt Factor = Divisor; - while ((t = Divisor * Factor) != 1) - Factor *= APInt(Divisor.getBitWidth(), 2) - t; + APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); Factors.push_back(DAG.getConstant(Factor, dl, SVT)); return true; @@ -6664,10 +6660,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + APInt P = D0.multiplicativeInverse(); assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // Q = floor((2^W - 1) u/ D) @@ -6922,10 +6915,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + APInt P = D0.multiplicativeInverse(); assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // A = floor((2^(W - 1) - 1) / D0) & -2^K @@ -7651,7 +7641,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // // For division, we can compute the remainder using the algorithm described // above, subtract it from the dividend to get an exact multiple of Constant. -// Then multiply that extact multiply by the multiplicative inverse modulo +// Then multiply that exact multiply by the multiplicative inverse modulo // (1 << (BitWidth / 2)) to get the quotient. // If Constant is even, we can shift right the dividend and the divisor by the @@ -7786,10 +7776,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N, // Multiply by the multiplicative inverse of the divisor modulo // (1 << BitWidth). - APInt Mod = APInt::getSignedMinValue(BitWidth + 1); - APInt MulFactor = Divisor.zext(BitWidth + 1); - MulFactor = MulFactor.multiplicativeInverse(Mod); - MulFactor = MulFactor.trunc(BitWidth); + APInt MulFactor = Divisor.multiplicativeInverse(); SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend, DAG.getConstant(MulFactor, dl, VT)); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 9c65d85985f1b..22c9e8cd143c2 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -552,31 +552,22 @@ DWARFDebugNames::NameIndex::extractAbbrev(uint64_t *Offset) { return Abbrev(Code, dwarf::Tag(Tag), AbbrevOffset, std::move(*AttrEncOr)); } -void llvm::findDebugNamesOffsets( - DWARFDebugNames::DWARFDebugNamesOffsets &Offsets, uint64_t HdrSize, - dwarf::DwarfFormat Format, const DWARFDebugNames::Header &Hdr) { - uint32_t DwarfSize = (Format == llvm::dwarf::DwarfFormat::DWARF64) ? 8 : 4; - uint64_t Offset = HdrSize; - Offsets.CUsBase = Offset; - Offset += Hdr.CompUnitCount * DwarfSize; - Offset += Hdr.LocalTypeUnitCount * DwarfSize; - Offset += Hdr.ForeignTypeUnitCount * 8; - - Offsets.BucketsBase = Offset; - Offset += Hdr.BucketCount * 4; - - Offsets.HashesBase = Offset; - if (Hdr.BucketCount > 0) - Offset += Hdr.NameCount * 4; - - Offsets.StringOffsetsBase = Offset; - Offset += Hdr.NameCount * DwarfSize; - - Offsets.EntryOffsetsBase = Offset; - Offset += Hdr.NameCount * DwarfSize; - - Offset += Hdr.AbbrevTableSize; - Offsets.EntriesBase = Offset; +DWARFDebugNames::DWARFDebugNamesOffsets +dwarf::findDebugNamesOffsets(uint64_t EndOfHeaderOffset, + const DWARFDebugNames::Header &Hdr) { + uint64_t DwarfSize = getDwarfOffsetByteSize(Hdr.Format); + DWARFDebugNames::DWARFDebugNamesOffsets Ret; + Ret.CUsBase = EndOfHeaderOffset; + Ret.BucketsBase = Ret.CUsBase + Hdr.CompUnitCount * DwarfSize + + Hdr.LocalTypeUnitCount * DwarfSize + + Hdr.ForeignTypeUnitCount * 8; + Ret.HashesBase = Ret.BucketsBase + Hdr.BucketCount * 4; + Ret.StringOffsetsBase = + Ret.HashesBase + (Hdr.BucketCount > 0 ? Hdr.NameCount * 4 : 0); + Ret.EntryOffsetsBase = Ret.StringOffsetsBase + Hdr.NameCount * DwarfSize; + Ret.EntriesBase = + Ret.EntryOffsetsBase + Hdr.NameCount * DwarfSize + Hdr.AbbrevTableSize; + return Ret; } Error DWARFDebugNames::NameIndex::extract() { @@ -586,7 +577,7 @@ Error DWARFDebugNames::NameIndex::extract() { return E; const unsigned SectionOffsetSize = dwarf::getDwarfOffsetByteSize(Hdr.Format); - findDebugNamesOffsets(Offsets, hdrSize, Hdr.Format, Hdr); + Offsets = dwarf::findDebugNamesOffsets(hdrSize, Hdr); uint64_t Offset = Offsets.EntryOffsetsBase + (Hdr.NameCount * SectionOffsetSize); diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 37c32f86e8d87..4841a2d8c4fdb 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -87,13 +87,13 @@ FailedToMaterialize::FailedToMaterialize( // FIXME: Use a new dep-map type for FailedToMaterialize errors so that we // don't have to manually retain/release. - for (auto &KV : *this->Symbols) - KV.first->Retain(); + for (auto &[JD, Syms] : *this->Symbols) + JD->Retain(); } FailedToMaterialize::~FailedToMaterialize() { - for (auto &KV : *Symbols) - KV.first->Release(); + for (auto &[JD, Syms] : *Symbols) + JD->Release(); } std::error_code FailedToMaterialize::convertToErrorCode() const { @@ -1002,6 +1002,17 @@ void JITDylib::unlinkMaterializationResponsibility( }); } +void JITDylib::shrinkMaterializationInfoMemory() { + // DenseMap::erase never shrinks its storage; use clear to heuristically free + // memory since we may have long-lived JDs after linking is done. + + if (UnmaterializedInfos.empty()) + UnmaterializedInfos.clear(); + + if (MaterializingInfos.empty()) + MaterializingInfos.clear(); +} + void JITDylib::setLinkOrder(JITDylibSearchOrder NewLinkOrder, bool LinkAgainstThisJITDylibFirst) { ES.runSessionLocked([&]() { @@ -1112,6 +1123,8 @@ Error JITDylib::remove(const SymbolNameSet &Names) { Symbols.erase(SymI); } + shrinkMaterializationInfoMemory(); + return Error::success(); }); } @@ -1313,6 +1326,8 @@ JITDylib::removeTracker(ResourceTracker &RT) { Symbols.erase(I); } + shrinkMaterializationInfoMemory(); + return Result; } @@ -1392,7 +1407,6 @@ void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) { } Error JITDylib::defineImpl(MaterializationUnit &MU) { - LLVM_DEBUG({ dbgs() << " " << MU.getSymbols() << "\n"; }); SymbolNameSet Duplicates; @@ -1605,6 +1619,11 @@ Error ExecutionSession::endSession() { LLVM_DEBUG(dbgs() << "Ending ExecutionSession " << this << "\n"); auto JDsToRemove = runSessionLocked([&] { + +#ifdef EXPENSIVE_CHECKS + verifySessionState("Entering ExecutionSession::endSession"); +#endif + SessionOpen = false; return JDs; }); @@ -1662,7 +1681,6 @@ Expected ExecutionSession::createJITDylib(std::string Name) { } Error ExecutionSession::removeJITDylibs(std::vector JDsToRemove) { - // Set JD to 'Closing' state and remove JD from the ExecutionSession. runSessionLocked([&] { for (auto &JD : JDsToRemove) { @@ -1951,6 +1969,196 @@ void ExecutionSession::dump(raw_ostream &OS) { }); } +#ifdef EXPENSIVE_CHECKS +bool ExecutionSession::verifySessionState(Twine Phase) { + return runSessionLocked([&]() { + bool AllOk = true; + + // We'll collect these and verify them later to avoid redundant checks. + DenseSet EDUsToCheck; + + for (auto &JD : JDs) { + + auto LogFailure = [&]() -> raw_fd_ostream & { + auto &Stream = errs(); + if (AllOk) + Stream << "ERROR: Bad ExecutionSession state detected " << Phase + << "\n"; + Stream << " In JITDylib " << JD->getName() << ", "; + AllOk = false; + return Stream; + }; + + if (JD->State != JITDylib::Open) { + LogFailure() + << "state is not Open, but JD is in ExecutionSession list."; + } + + // Check symbol table. + // 1. If the entry state isn't resolved then check that no address has + // been set. + // 2. Check that if the hasMaterializerAttached flag is set then there is + // an UnmaterializedInfo entry, and vice-versa. + for (auto &[Sym, Entry] : JD->Symbols) { + // Check that unresolved symbols have null addresses. + if (Entry.getState() < SymbolState::Resolved) { + if (Entry.getAddress()) { + LogFailure() << "symbol " << Sym << " has state " + << Entry.getState() + << " (not-yet-resolved) but non-null address " + << Entry.getAddress() << ".\n"; + } + } + + // Check that the hasMaterializerAttached flag is correct. + auto UMIItr = JD->UnmaterializedInfos.find(Sym); + if (Entry.hasMaterializerAttached()) { + if (UMIItr == JD->UnmaterializedInfos.end()) { + LogFailure() << "symbol " << Sym + << " entry claims materializer attached, but " + "UnmaterializedInfos has no corresponding entry.\n"; + } + } else if (UMIItr != JD->UnmaterializedInfos.end()) { + LogFailure() + << "symbol " << Sym + << " entry claims no materializer attached, but " + "UnmaterializedInfos has an unexpected entry for it.\n"; + } + } + + // Check that every UnmaterializedInfo entry has a corresponding entry + // in the Symbols table. + for (auto &[Sym, UMI] : JD->UnmaterializedInfos) { + auto SymItr = JD->Symbols.find(Sym); + if (SymItr == JD->Symbols.end()) { + LogFailure() + << "symbol " << Sym + << " has UnmaterializedInfos entry, but no Symbols entry.\n"; + } + } + + // Check consistency of the MaterializingInfos table. + for (auto &[Sym, MII] : JD->MaterializingInfos) { + + auto SymItr = JD->Symbols.find(Sym); + if (SymItr == JD->Symbols.end()) { + // If there's no Symbols entry for this MaterializingInfos entry then + // report that. + LogFailure() + << "symbol " << Sym + << " has MaterializingInfos entry, but no Symbols entry.\n"; + } else { + // Otherwise check consistency between Symbols and MaterializingInfos. + + // Ready symbols should not have MaterializingInfos. + if (SymItr->second.getState() == SymbolState::Ready) { + LogFailure() + << "symbol " << Sym + << " is in Ready state, should not have MaterializingInfo.\n"; + } + + // Pending queries should be for subsequent states. + auto CurState = static_cast( + static_cast>( + SymItr->second.getState()) + 1); + for (auto &Q : MII.PendingQueries) { + if (Q->getRequiredState() != CurState) { + if (Q->getRequiredState() > CurState) + CurState = Q->getRequiredState(); + else + LogFailure() << "symbol " << Sym + << " has stale or misordered queries.\n"; + } + } + + // If there's a DefiningEDU then check that... + // 1. The JD matches. + // 2. The symbol is in the EDU's Symbols map. + // 3. The symbol table entry is in the Emitted state. + if (MII.DefiningEDU) { + + EDUsToCheck.insert(MII.DefiningEDU.get()); + + if (MII.DefiningEDU->JD != JD.get()) { + LogFailure() << "symbol " << Sym + << " has DefiningEDU with incorrect JD" + << (llvm::is_contained(JDs, MII.DefiningEDU->JD) + ? " (JD not currently in ExecutionSession" + : "") + << "\n"; + } + + if (SymItr->second.getState() != SymbolState::Emitted) { + LogFailure() + << "symbol " << Sym + << " has DefiningEDU, but is not in Emitted state.\n"; + } + } + + // Check that JDs for any DependantEDUs are also in the session -- + // that guarantees that we'll also visit them during this loop. + for (auto &DepEDU : MII.DependantEDUs) { + if (!llvm::is_contained(JDs, DepEDU->JD)) { + LogFailure() << "symbol " << Sym << " has DependantEDU " + << (void *)DepEDU << " with JD (" << DepEDU->JD + << ") that isn't in ExecutionSession.\n"; + } + } + } + } + } + + // Check EDUs. + for (auto *EDU : EDUsToCheck) { + assert(EDU->JD->State == JITDylib::Open && "EDU->JD is not Open"); + + auto LogFailure = [&]() -> raw_fd_ostream & { + AllOk = false; + auto &Stream = errs(); + Stream << "In EDU defining " << EDU->JD->getName() << ": { "; + for (auto &[Sym, Flags] : EDU->Symbols) + Stream << Sym << " "; + Stream << "}, "; + return Stream; + }; + + if (EDU->Symbols.empty()) + LogFailure() << "no symbols defined.\n"; + else { + for (auto &[Sym, Flags] : EDU->Symbols) { + if (!Sym) + LogFailure() << "null symbol defined.\n"; + else { + if (!EDU->JD->Symbols.count(SymbolStringPtr(Sym))) { + LogFailure() << "symbol " << Sym + << " isn't present in JD's symbol table.\n"; + } + } + } + } + + for (auto &[DepJD, Symbols] : EDU->Dependencies) { + if (!llvm::is_contained(JDs, DepJD)) { + LogFailure() << "dependant symbols listed for JD that isn't in " + "ExecutionSession.\n"; + } else { + for (auto &DepSym : Symbols) { + if (!DepJD->Symbols.count(SymbolStringPtr(DepSym))) { + LogFailure() + << "dependant symbol " << DepSym + << " does not appear in symbol table for dependant JD " + << DepJD->getName() << ".\n"; + } + } + } + } + } + + return AllOk; + }); +} +#endif // EXPENSIVE_CHECKS + void ExecutionSession::dispatchOutstandingMUs() { LLVM_DEBUG(dbgs() << "Dispatching MaterializationUnits...\n"); while (true) { @@ -2482,6 +2690,8 @@ void ExecutionSession::OL_completeLookup( return true; }); + JD.shrinkMaterializationInfoMemory(); + // Handle failure. if (Err) { @@ -2925,6 +3135,8 @@ void ExecutionSession::IL_makeEDUReady( JD.MaterializingInfos.erase(MII); } + + JD.shrinkMaterializationInfoMemory(); } void ExecutionSession::IL_makeEDUEmitted( @@ -3060,6 +3272,9 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, return make_error("JITDylib " + TargetJD.getName() + " is defunct", inconvertibleErrorCode()); +#ifdef EXPENSIVE_CHECKS + verifySessionState("entering ExecutionSession::IL_emit"); +#endif // Walk all EDUs: // 1. Verifying that dependencies are available (not removed or in the error @@ -3217,6 +3432,10 @@ ExecutionSession::IL_emit(MaterializationResponsibility &MR, IL_makeEDUEmitted(std::move(EDUInfo.EDU), CompletedQueries); } +#ifdef EXPENSIVE_CHECKS + verifySessionState("exiting ExecutionSession::IL_emit"); +#endif + return std::move(CompletedQueries); } @@ -3305,6 +3524,11 @@ std::pair> ExecutionSession::IL_failSymbols(JITDylib &JD, const SymbolNameVector &SymbolsToFail) { + +#ifdef EXPENSIVE_CHECKS + verifySessionState("entering ExecutionSession::IL_failSymbols"); +#endif + JITDylib::AsynchronousSymbolQuerySet FailedQueries; auto FailedSymbolsMap = std::make_shared(); auto ExtractFailedQueries = [&](JITDylib::MaterializingInfo &MI) { @@ -3379,12 +3603,17 @@ ExecutionSession::IL_failSymbols(JITDylib &JD, for (auto &DependantEDU : MI.DependantEDUs) { // Remove DependantEDU from all of its users DependantEDUs lists. - for (auto &[JD, Syms] : DependantEDU->Dependencies) { - for (auto Sym : Syms) { - assert(JD->Symbols.count(SymbolStringPtr(Sym)) && "Sym not in JD?"); - assert(JD->MaterializingInfos.count(SymbolStringPtr(Sym)) && + for (auto &[DepJD, DepSyms] : DependantEDU->Dependencies) { + for (auto DepSym : DepSyms) { + // Skip self-reference to avoid invalidating the MI.DependantEDUs + // map. We'll clear this later. + if (DepJD == &JD && DepSym == Name) + continue; + assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) && + "DepSym not in DepJD?"); + assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) && "DependantEDU not registered with symbol it depends on"); - auto SymMI = JD->MaterializingInfos[SymbolStringPtr(Sym)]; + auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)]; assert(SymMI.DependantEDUs.count(DependantEDU) && "DependantEDU missing from DependantEDUs list"); SymMI.DependantEDUs.erase(DependantEDU); @@ -3422,6 +3651,8 @@ ExecutionSession::IL_failSymbols(JITDylib &JD, ExtractFailedQueries(DepMI); DepJD.MaterializingInfos.erase(SymbolStringPtr(DepName)); } + + DepJD.shrinkMaterializationInfoMemory(); } MI.DependantEDUs.clear(); @@ -3435,6 +3666,12 @@ ExecutionSession::IL_failSymbols(JITDylib &JD, JD.MaterializingInfos.erase(Name); } + JD.shrinkMaterializationInfoMemory(); + +#ifdef EXPENSIVE_CHECKS + verifySessionState("exiting ExecutionSession::IL_failSymbols"); +#endif + return std::make_pair(std::move(FailedQueries), std::move(FailedSymbolsMap)); } diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp index a0d9dfa9e2b55..919b9462e32d4 100644 --- a/llvm/lib/Frontend/Offloading/Utility.cpp +++ b/llvm/lib/Frontend/Offloading/Utility.cpp @@ -40,8 +40,8 @@ offloading::getOffloadingEntryInitializer(Module &M, Constant *Addr, Constant *AddrName = ConstantDataArray::getString(M.getContext(), Name); - StringRef Prefix = Triple.isNVPTX() ? "$omp_offloading$entry_name" - : ".omp_offloading.entry_name"; + StringRef Prefix = + Triple.isNVPTX() ? "$offloading$entry_name" : ".offloading.entry_name"; // Create the constant string used to look up the symbol in the device. auto *Str = @@ -70,7 +70,7 @@ void offloading::emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, getOffloadingEntryInitializer(M, Addr, Name, Size, Flags, Data); StringRef Prefix = - Triple.isNVPTX() ? "$omp_offloading$entry$" : ".omp_offloading.entry."; + Triple.isNVPTX() ? "$offloading$entry$" : ".offloading.entry."; auto *Entry = new GlobalVariable( M, getEntryTy(M), /*isConstant=*/true, GlobalValue::WeakAnyLinkage, EntryInitializer, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 16507a69ea850..7fd8474c2ec89 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4791,6 +4791,9 @@ void OpenMPIRBuilder::writeTeamsForKernel(const Triple &T, Function &Kernel, updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true); updateNVPTXMetadata(Kernel, "minctasm", LB, false); } + if (T.isAMDGPU()) + Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1"); + Kernel.addFnAttr("omp_target_num_teams", std::to_string(LB)); } diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index ae99267f5ba87..6e62767c99e2a 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -30,11 +30,19 @@ using namespace llvm; #define DEBUG_TYPE "ir" STATISTIC(NumInstrRenumberings, "Number of renumberings across all blocks"); -cl::opt - UseNewDbgInfoFormat("experimental-debuginfo-iterators", - cl::desc("Enable communicating debuginfo positions " - "through iterators, eliminating intrinsics"), - cl::init(true)); +cl::opt UseNewDbgInfoFormat( + "experimental-debuginfo-iterators", + cl::desc("Enable communicating debuginfo positions through iterators, " + "eliminating intrinsics. Has no effect if " + "--preserve-input-debuginfo-format=true."), + cl::init(true)); +cl::opt PreserveInputDbgFormat( + "preserve-input-debuginfo-format", cl::Hidden, + cl::desc("When set to true, IR files will be processed and printed in " + "their current debug info format, regardless of default behaviour " + "or other flags passed. Has no effect if input IR does not " + "contain debug records or intrinsics. Ignored in llvm-link, " + "llvm-lto, and llvm-lto2.")); bool WriteNewDbgInfoFormatToBitcode /*set default value in cl::init() below*/; cl::opt WriteNewDbgInfoFormatToBitcode2( @@ -147,6 +155,9 @@ void BasicBlock::setIsNewDbgInfoFormat(bool NewFlag) { else if (!NewFlag && IsNewDbgInfoFormat) convertFromNewDbgValues(); } +void BasicBlock::setNewDbgInfoFormatFlag(bool NewFlag) { + IsNewDbgInfoFormat = NewFlag; +} ValueSymbolTable *BasicBlock::getValueSymbolTable() { if (Function *F = getParent()) diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index eb126f182eadc..b5fda9bb3d129 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -103,6 +103,12 @@ void Function::setIsNewDbgInfoFormat(bool NewFlag) { else if (!NewFlag && IsNewDbgInfoFormat) convertFromNewDbgValues(); } +void Function::setNewDbgInfoFormatFlag(bool NewFlag) { + for (auto &BB : *this) { + BB.setNewDbgInfoFormatFlag(NewFlag); + } + IsNewDbgInfoFormat = NewFlag; +} //===----------------------------------------------------------------------===// // Argument Implementation diff --git a/llvm/lib/IR/IRPrintingPasses.cpp b/llvm/lib/IR/IRPrintingPasses.cpp index 84fb8e6c66b8f..43252c57afca9 100644 --- a/llvm/lib/IR/IRPrintingPasses.cpp +++ b/llvm/lib/IR/IRPrintingPasses.cpp @@ -25,7 +25,8 @@ using namespace llvm; cl::opt WriteNewDbgInfoFormat( "write-experimental-debuginfo", - cl::desc("Write debug info in the new non-intrinsic format"), + cl::desc("Write debug info in the new non-intrinsic format. Has no effect " + "if --preserve-input-debuginfo-format=true."), cl::init(false)); namespace { diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 0602a55b9fe7f..b9efe9cdcfe31 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -388,7 +388,7 @@ void Instruction::setIsExact(bool b) { } void Instruction::setNonNeg(bool b) { - assert(isa(this) && "Must be zext"); + assert(isa(this) && "Must be zext/uitofp"); SubclassOptionalData = (SubclassOptionalData & ~PossiblyNonNegInst::NonNeg) | (b * PossiblyNonNegInst::NonNeg); } @@ -408,7 +408,7 @@ bool Instruction::hasNoSignedWrap() const { } bool Instruction::hasNonNeg() const { - assert(isa(this) && "Must be zext"); + assert(isa(this) && "Must be zext/uitofp"); return (SubclassOptionalData & PossiblyNonNegInst::NonNeg) != 0; } @@ -441,6 +441,7 @@ void Instruction::dropPoisonGeneratingFlags() { cast(this)->setIsInBounds(false); break; + case Instruction::UIToFP: case Instruction::ZExt: setNonNeg(false); break; diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 7b4449cd825f9..ccc624d854429 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -44,6 +44,7 @@ bool Operator::hasPoisonGeneratingFlags() const { // Note: inrange exists on constexpr only return GEP->isInBounds() || GEP->getInRange() != std::nullopt; } + case Instruction::UIToFP: case Instruction::ZExt: if (auto *NNI = dyn_cast(this)) return NNI->hasNonNeg(); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index f7d97b2b00a3f..4f8dddbd3f25d 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1734,8 +1734,28 @@ void Verifier::visitModuleFlags() { // Scan each flag, and track the flags and requirements. DenseMap SeenIDs; SmallVector Requirements; - for (const MDNode *MDN : Flags->operands()) + uint64_t PAuthABIPlatform = -1; + uint64_t PAuthABIVersion = -1; + for (const MDNode *MDN : Flags->operands()) { visitModuleFlag(MDN, SeenIDs, Requirements); + if (MDN->getNumOperands() != 3) + continue; + if (const auto *FlagName = dyn_cast_or_null(MDN->getOperand(1))) { + if (FlagName->getString() == "aarch64-elf-pauthabi-platform") { + if (const auto *PAP = + mdconst::dyn_extract_or_null(MDN->getOperand(2))) + PAuthABIPlatform = PAP->getZExtValue(); + } else if (FlagName->getString() == "aarch64-elf-pauthabi-version") { + if (const auto *PAV = + mdconst::dyn_extract_or_null(MDN->getOperand(2))) + PAuthABIVersion = PAV->getZExtValue(); + } + } + } + + if ((PAuthABIPlatform == uint64_t(-1)) != (PAuthABIVersion == uint64_t(-1))) + CheckFailed("either both or no 'aarch64-elf-pauthabi-platform' and " + "'aarch64-elf-pauthabi-version' module flags must be present"); // Validate that the requirements in the module are valid. for (const MDNode *Requirement : Requirements) { @@ -4247,9 +4267,10 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) { " operand must have integer or floating point type!", &RMWI, ElTy); } else if (AtomicRMWInst::isFPOperation(Op)) { - Check(ElTy->isFloatingPointTy(), + Check(ElTy->isFPOrFPVectorTy() && !isa(ElTy), "atomicrmw " + AtomicRMWInst::getOperationName(Op) + - " operand must have floating point type!", + " operand must have floating-point or fixed vector of floating-point " + "type!", &RMWI, ElTy); } else { Check(ElTy->isIntegerTy(), @@ -4342,6 +4363,11 @@ void Verifier::visitEHPadPredecessors(Instruction &I) { if (auto *II = dyn_cast(TI)) { Check(II->getUnwindDest() == BB && II->getNormalDest() != BB, "EH pad must be jumped to via an unwind edge", ToPad, II); + auto *CalledFn = + dyn_cast(II->getCalledOperand()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && II->doesNotThrow() && + !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID())) + continue; if (auto Bundle = II->getOperandBundle(LLVMContext::OB_funclet)) FromPad = Bundle->Inputs[0]; else @@ -6197,6 +6223,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { &Call); break; } + case Intrinsic::threadlocal_address: { + const Value &Arg0 = *Call.getArgOperand(0); + Check(isa(Arg0), + "llvm.threadlocal.address first argument must be a GlobalVariable"); + Check(cast(Arg0).isThreadLocal(), + "llvm.threadlocal.address operand isThreadLocal() must no be false"); + break; + } }; // Verify that there aren't any unmediated control transfers between funclets. diff --git a/llvm/lib/Object/GOFFObjectFile.cpp b/llvm/lib/Object/GOFFObjectFile.cpp index d3dfd5d1540cf..3b8704f28fdbb 100644 --- a/llvm/lib/Object/GOFFObjectFile.cpp +++ b/llvm/lib/Object/GOFFObjectFile.cpp @@ -165,6 +165,11 @@ GOFFObjectFile::GOFFObjectFile(MemoryBufferRef Object, Error &Err) LLVM_DEBUG(dbgs() << " -- ESD " << EsdId << "\n"); break; } + case GOFF::RT_TXT: + // Save TXT records. + TextPtrs.emplace_back(I); + LLVM_DEBUG(dbgs() << " -- TXT\n"); + break; case GOFF::RT_END: LLVM_DEBUG(dbgs() << " -- END (GOFF record type) unhandled\n"); break; @@ -361,6 +366,13 @@ GOFFObjectFile::getSymbolSection(DataRefImpl Symb) const { std::to_string(SymEdId)); } +uint64_t GOFFObjectFile::getSymbolSize(DataRefImpl Symb) const { + const uint8_t *Record = getSymbolEsdRecord(Symb); + uint32_t Length; + ESDRecord::getLength(Record, Length); + return Length; +} + const uint8_t *GOFFObjectFile::getSectionEdEsdRecord(DataRefImpl &Sec) const { SectionEntryImpl EsdIds = SectionList[Sec.d.a]; const uint8_t *EsdRecord = EsdPtrs[EsdIds.d.a]; @@ -391,6 +403,154 @@ GOFFObjectFile::getSectionPrEsdRecord(uint32_t SectionIndex) const { return EsdRecord; } +uint32_t GOFFObjectFile::getSectionDefEsdId(DataRefImpl &Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + uint32_t Length; + ESDRecord::getLength(EsdRecord, Length); + if (Length == 0) { + const uint8_t *PrEsdRecord = getSectionPrEsdRecord(Sec); + if (PrEsdRecord) + EsdRecord = PrEsdRecord; + } + + uint32_t DefEsdId; + ESDRecord::getEsdId(EsdRecord, DefEsdId); + LLVM_DEBUG(dbgs() << "Got def EsdId: " << DefEsdId << '\n'); + return DefEsdId; +} + +void GOFFObjectFile::moveSectionNext(DataRefImpl &Sec) const { + Sec.d.a++; + if ((Sec.d.a) >= SectionList.size()) + Sec.d.a = 0; +} + +Expected GOFFObjectFile::getSectionName(DataRefImpl Sec) const { + DataRefImpl EdSym; + SectionEntryImpl EsdIds = SectionList[Sec.d.a]; + EdSym.d.a = EsdIds.d.a; + Expected Name = getSymbolName(EdSym); + if (Name) { + StringRef Res = *Name; + LLVM_DEBUG(dbgs() << "Got section: " << Res << '\n'); + LLVM_DEBUG(dbgs() << "Final section name: " << Res << '\n'); + Name = Res; + } + return Name; +} + +uint64_t GOFFObjectFile::getSectionAddress(DataRefImpl Sec) const { + uint32_t Offset; + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + ESDRecord::getOffset(EsdRecord, Offset); + return Offset; +} + +uint64_t GOFFObjectFile::getSectionSize(DataRefImpl Sec) const { + uint32_t Length; + uint32_t DefEsdId = getSectionDefEsdId(Sec); + const uint8_t *EsdRecord = EsdPtrs[DefEsdId]; + ESDRecord::getLength(EsdRecord, Length); + LLVM_DEBUG(dbgs() << "Got section size: " << Length << '\n'); + return static_cast(Length); +} + +// Unravel TXT records and expand fill characters to produce +// a contiguous sequence of bytes. +Expected> +GOFFObjectFile::getSectionContents(DataRefImpl Sec) const { + if (SectionDataCache.count(Sec.d.a)) { + auto &Buf = SectionDataCache[Sec.d.a]; + return ArrayRef(Buf); + } + uint64_t SectionSize = getSectionSize(Sec); + uint32_t DefEsdId = getSectionDefEsdId(Sec); + + const uint8_t *EdEsdRecord = getSectionEdEsdRecord(Sec); + bool FillBytePresent; + ESDRecord::getFillBytePresent(EdEsdRecord, FillBytePresent); + uint8_t FillByte = '\0'; + if (FillBytePresent) + ESDRecord::getFillByteValue(EdEsdRecord, FillByte); + + // Initialize section with fill byte. + SmallVector Data(SectionSize, FillByte); + + // Replace section with content from text records. + for (const uint8_t *TxtRecordInt : TextPtrs) { + const uint8_t *TxtRecordPtr = TxtRecordInt; + uint32_t TxtEsdId; + TXTRecord::getElementEsdId(TxtRecordPtr, TxtEsdId); + LLVM_DEBUG(dbgs() << "Got txt EsdId: " << TxtEsdId << '\n'); + + if (TxtEsdId != DefEsdId) + continue; + + uint32_t TxtDataOffset; + TXTRecord::getOffset(TxtRecordPtr, TxtDataOffset); + + uint16_t TxtDataSize; + TXTRecord::getDataLength(TxtRecordPtr, TxtDataSize); + + LLVM_DEBUG(dbgs() << "Record offset " << TxtDataOffset << ", data size " + << TxtDataSize << "\n"); + + SmallString<256> CompleteData; + CompleteData.reserve(TxtDataSize); + if (Error Err = TXTRecord::getData(TxtRecordPtr, CompleteData)) + return std::move(Err); + assert(CompleteData.size() == TxtDataSize && "Wrong length of data"); + std::copy(CompleteData.data(), CompleteData.data() + TxtDataSize, + Data.begin() + TxtDataOffset); + } + SectionDataCache[Sec.d.a] = Data; + return ArrayRef(SectionDataCache[Sec.d.a]); +} + +uint64_t GOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDAlignment Pow2Alignment; + ESDRecord::getAlignment(EsdRecord, Pow2Alignment); + return 1ULL << static_cast(Pow2Alignment); +} + +bool GOFFObjectFile::isSectionText(DataRefImpl Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDExecutable Executable; + ESDRecord::getExecutable(EsdRecord, Executable); + return Executable == GOFF::ESD_EXE_CODE; +} + +bool GOFFObjectFile::isSectionData(DataRefImpl Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDExecutable Executable; + ESDRecord::getExecutable(EsdRecord, Executable); + return Executable == GOFF::ESD_EXE_DATA; +} + +bool GOFFObjectFile::isSectionNoLoad(DataRefImpl Sec) const { + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDLoadingBehavior LoadingBehavior; + ESDRecord::getLoadingBehavior(EsdRecord, LoadingBehavior); + return LoadingBehavior == GOFF::ESD_LB_NoLoad; +} + +bool GOFFObjectFile::isSectionReadOnlyData(DataRefImpl Sec) const { + if (!isSectionData(Sec)) + return false; + + const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec); + GOFF::ESDLoadingBehavior LoadingBehavior; + ESDRecord::getLoadingBehavior(EsdRecord, LoadingBehavior); + return LoadingBehavior == GOFF::ESD_LB_Initial; +} + +bool GOFFObjectFile::isSectionZeroInit(DataRefImpl Sec) const { + // GOFF uses fill characters and fill characters are applied + // on getSectionContents() - so we say false to zero init. + return false; +} + section_iterator GOFFObjectFile::section_begin() const { DataRefImpl Sec; moveSectionNext(Sec); @@ -473,6 +633,13 @@ Error ESDRecord::getData(const uint8_t *Record, return getContinuousData(Record, DataSize, 72, CompleteData); } +Error TXTRecord::getData(const uint8_t *Record, + SmallString<256> &CompleteData) { + uint16_t Length; + getDataLength(Record, Length); + return getContinuousData(Record, Length, 24, CompleteData); +} + Error ENDRecord::getData(const uint8_t *Record, SmallString<256> &CompleteData) { uint16_t Length = getNameLength(Record); diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp index cf69f6173b6d4..b8b6b90c253f2 100644 --- a/llvm/lib/Option/OptTable.cpp +++ b/llvm/lib/Option/OptTable.cpp @@ -710,7 +710,8 @@ void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title, OS, Usage, Title, ShowHidden, ShowAllAliases, [VisibilityMask](const Info &CandidateInfo) -> bool { return (CandidateInfo.Visibility & VisibilityMask) == 0; - }); + }, + VisibilityMask); } void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title, @@ -726,13 +727,14 @@ void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title, if (CandidateInfo.Flags & FlagsToExclude) return true; return false; - }); + }, + Visibility(0)); } void OptTable::internalPrintHelp( raw_ostream &OS, const char *Usage, const char *Title, bool ShowHidden, - bool ShowAllAliases, - std::function ExcludeOption) const { + bool ShowAllAliases, std::function ExcludeOption, + Visibility VisibilityMask) const { OS << "OVERVIEW: " << Title << "\n\n"; OS << "USAGE: " << Usage << "\n\n"; @@ -754,11 +756,11 @@ void OptTable::internalPrintHelp( // If an alias doesn't have a help text, show a help text for the aliased // option instead. - const char *HelpText = getOptionHelpText(Id); + const char *HelpText = getOptionHelpText(Id, VisibilityMask); if (!HelpText && ShowAllAliases) { const Option Alias = getOption(Id).getAlias(); if (Alias.isValid()) - HelpText = getOptionHelpText(Alias.getID()); + HelpText = getOptionHelpText(Alias.getID(), VisibilityMask); } if (HelpText && (strlen(HelpText) != 0)) { diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index e99f7db1e368c..5095a58f007fd 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -183,13 +183,13 @@ #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/KCFI.h" +#include "llvm/Transforms/Instrumentation/LowerAllowCheckPass.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" #include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" -#include "llvm/Transforms/Instrumentation/RemoveTrapsPass.h" #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 51bbcda05688f..2bf88cc4b3e15 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -380,6 +380,7 @@ FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-sink", LoopSinkPass()) FUNCTION_PASS("loop-versioning", LoopVersioningPass()) +FUNCTION_PASS("lower-allow-check", LowerAllowCheckPass()) FUNCTION_PASS("lower-atomic", LowerAtomicPass()) FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) @@ -439,7 +440,6 @@ FUNCTION_PASS("print", UniformityInfoPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass()) FUNCTION_PASS("reg2mem", RegToMemPass()) -FUNCTION_PASS("remove-traps", RemoveTrapsPass()) FUNCTION_PASS("safe-stack", SafeStackPass(TM)) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index a1bc180a53ca3..7c56cde3e6ced 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -414,6 +414,144 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary, TheSummary->setEntry(I, Res[I]); } +// Serialize Schema. +static void writeMemProfSchema(ProfOStream &OS, + const memprof::MemProfSchema &Schema) { + OS.write(static_cast(Schema.size())); + for (const auto Id : Schema) + OS.write(static_cast(Id)); +} + +// Serialize MemProfRecordData. Return RecordTableOffset. +static uint64_t writeMemProfRecords( + ProfOStream &OS, + llvm::MapVector + &MemProfRecordData, + memprof::MemProfSchema *Schema) { + auto RecordWriter = + std::make_unique(memprof::Version1); + RecordWriter->Schema = Schema; + OnDiskChainedHashTableGenerator + RecordTableGenerator; + for (auto &[GUID, Record] : MemProfRecordData) { + // Insert the key (func hash) and value (memprof record). + RecordTableGenerator.insert(GUID, Record, *RecordWriter.get()); + } + // Release the memory of this MapVector as it is no longer needed. + MemProfRecordData.clear(); + + // The call to Emit invokes RecordWriterTrait::EmitData which destructs + // the memprof record copies owned by the RecordTableGenerator. This works + // because the RecordTableGenerator is not used after this point. + return RecordTableGenerator.Emit(OS.OS, *RecordWriter); +} + +// Serialize MemProfFrameData. Return FrameTableOffset. +static uint64_t writeMemProfFrames( + ProfOStream &OS, + llvm::MapVector &MemProfFrameData) { + auto FrameWriter = std::make_unique(); + OnDiskChainedHashTableGenerator + FrameTableGenerator; + for (auto &[FrameId, Frame] : MemProfFrameData) { + // Insert the key (frame id) and value (frame contents). + FrameTableGenerator.insert(FrameId, Frame); + } + // Release the memory of this MapVector as it is no longer needed. + MemProfFrameData.clear(); + + return FrameTableGenerator.Emit(OS.OS, *FrameWriter); +} + +static Error writeMemProfV0( + ProfOStream &OS, + llvm::MapVector + &MemProfRecordData, + llvm::MapVector &MemProfFrameData) { + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + + auto Schema = memprof::PortableMemInfoBlock::getSchema(); + writeMemProfSchema(OS, Schema); + + uint64_t RecordTableOffset = + writeMemProfRecords(OS, MemProfRecordData, &Schema); + + uint64_t FramePayloadOffset = OS.tell(); + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData); + + uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; + OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + + return Error::success(); +} + +static Error writeMemProfV1( + ProfOStream &OS, + llvm::MapVector + &MemProfRecordData, + llvm::MapVector &MemProfFrameData) { + OS.write(memprof::Version1); + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + + auto Schema = memprof::PortableMemInfoBlock::getSchema(); + writeMemProfSchema(OS, Schema); + + uint64_t RecordTableOffset = + writeMemProfRecords(OS, MemProfRecordData, &Schema); + + uint64_t FramePayloadOffset = OS.tell(); + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData); + + uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; + OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + + return Error::success(); +} + +// The MemProf profile data includes a simple schema +// with the format described below followed by the hashtable: +// uint64_t Version +// uint64_t RecordTableOffset = RecordTableGenerator.Emit +// uint64_t FramePayloadOffset = Stream offset before emitting the frame table +// uint64_t FrameTableOffset = FrameTableGenerator.Emit +// uint64_t Num schema entries +// uint64_t Schema entry 0 +// uint64_t Schema entry 1 +// .... +// uint64_t Schema entry N - 1 +// OnDiskChainedHashTable MemProfRecordData +// OnDiskChainedHashTable MemProfFrameData +static Error writeMemProf( + ProfOStream &OS, + llvm::MapVector + &MemProfRecordData, + llvm::MapVector &MemProfFrameData, + memprof::IndexedVersion MemProfVersionRequested) { + + switch (MemProfVersionRequested) { + case memprof::Version0: + return writeMemProfV0(OS, MemProfRecordData, MemProfFrameData); + case memprof::Version1: + return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData); + case memprof::Version2: + // TODO: Implement. Fall through to the error handling below for now. + break; + } + + return make_error( + instrprof_error::unsupported_version, + formatv("MemProf version {} not supported; " + "requires version between {} and {}, inclusive", + MemProfVersionRequested, memprof::MinimumSupportedVersion, + memprof::MaximumSupportedVersion)); +} + Error InstrProfWriter::writeImpl(ProfOStream &OS) { using namespace IndexedInstrProf; using namespace support; @@ -517,86 +655,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); - // Write the MemProf profile data if we have it. This includes a simple schema - // with the format described below followed by the hashtable: - // uint64_t Version - // uint64_t RecordTableOffset = RecordTableGenerator.Emit - // uint64_t FramePayloadOffset = Stream offset before emitting the frame table - // uint64_t FrameTableOffset = FrameTableGenerator.Emit - // uint64_t Num schema entries - // uint64_t Schema entry 0 - // uint64_t Schema entry 1 - // .... - // uint64_t Schema entry N - 1 - // OnDiskChainedHashTable MemProfRecordData - // OnDiskChainedHashTable MemProfFrameData + // Write the MemProf profile data if we have it. uint64_t MemProfSectionStart = 0; if (static_cast(ProfileKind & InstrProfKind::MemProf)) { - if (MemProfVersionRequested < memprof::MinimumSupportedVersion || - MemProfVersionRequested > memprof::MaximumSupportedVersion) { - return make_error( - instrprof_error::unsupported_version, - formatv("MemProf version {} not supported; " - "requires version between {} and {}, inclusive", - MemProfVersionRequested, memprof::MinimumSupportedVersion, - memprof::MaximumSupportedVersion)); - } - MemProfSectionStart = OS.tell(); - - if (MemProfVersionRequested >= memprof::Version1) - OS.write(MemProfVersionRequested); - - OS.write(0ULL); // Reserve space for the memprof record table offset. - OS.write(0ULL); // Reserve space for the memprof frame payload offset. - OS.write(0ULL); // Reserve space for the memprof frame table offset. - - auto Schema = memprof::PortableMemInfoBlock::getSchema(); - OS.write(static_cast(Schema.size())); - for (const auto Id : Schema) { - OS.write(static_cast(Id)); - } - - auto RecordWriter = - std::make_unique>(); - RecordWriter->Schema = &Schema; - OnDiskChainedHashTableGenerator< - memprof::RecordWriterTrait> - RecordTableGenerator; - for (auto &I : MemProfRecordData) { - // Insert the key (func hash) and value (memprof record). - RecordTableGenerator.insert(I.first, I.second); - } - // Release the memory of this MapVector as it is no longer needed. - MemProfRecordData.clear(); - - // The call to Emit invokes RecordWriterTrait::EmitData which destructs - // the memprof record copies owned by the RecordTableGenerator. This works - // because the RecordTableGenerator is not used after this point. - uint64_t RecordTableOffset = - RecordTableGenerator.Emit(OS.OS, *RecordWriter); - - uint64_t FramePayloadOffset = OS.tell(); - - auto FrameWriter = std::make_unique(); - OnDiskChainedHashTableGenerator - FrameTableGenerator; - for (auto &I : MemProfFrameData) { - // Insert the key (frame id) and value (frame contents). - FrameTableGenerator.insert(I.first, I.second); - } - // Release the memory of this MapVector as it is no longer needed. - MemProfFrameData.clear(); - - uint64_t FrameTableOffset = FrameTableGenerator.Emit(OS.OS, *FrameWriter); - - uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, - FrameTableOffset}; - uint64_t HeaderUpdatePos = MemProfSectionStart; - if (MemProfVersionRequested >= memprof::Version1) - // The updates go just after the version field. - HeaderUpdatePos += sizeof(uint64_t); - OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData, + MemProfVersionRequested)) + return E; } // BinaryIdSection has two parts: diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index ac0a8702c3f9c..96aeedf2e6913 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -10,8 +10,7 @@ namespace llvm { namespace memprof { -namespace { -size_t serializedSizeV0(const IndexedAllocationInfo &IAI) { +static size_t serializedSizeV0(const IndexedAllocationInfo &IAI) { size_t Size = 0; // The number of frames to serialize. Size += sizeof(uint64_t); @@ -22,7 +21,7 @@ size_t serializedSizeV0(const IndexedAllocationInfo &IAI) { return Size; } -size_t serializedSizeV2(const IndexedAllocationInfo &IAI) { +static size_t serializedSizeV2(const IndexedAllocationInfo &IAI) { size_t Size = 0; // The CallStackId Size += sizeof(CallStackId); @@ -30,7 +29,6 @@ size_t serializedSizeV2(const IndexedAllocationInfo &IAI) { Size += PortableMemInfoBlock::serializedSize(); return Size; } -} // namespace size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const { switch (Version) { @@ -43,8 +41,7 @@ size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const { llvm_unreachable("unsupported MemProf version"); } -namespace { -size_t serializedSizeV0(const IndexedMemProfRecord &Record) { +static size_t serializedSizeV0(const IndexedMemProfRecord &Record) { size_t Result = sizeof(GlobalValue::GUID); for (const IndexedAllocationInfo &N : Record.AllocSites) Result += N.serializedSize(Version0); @@ -59,7 +56,7 @@ size_t serializedSizeV0(const IndexedMemProfRecord &Record) { return Result; } -size_t serializedSizeV2(const IndexedMemProfRecord &Record) { +static size_t serializedSizeV2(const IndexedMemProfRecord &Record) { size_t Result = sizeof(GlobalValue::GUID); for (const IndexedAllocationInfo &N : Record.AllocSites) Result += N.serializedSize(Version2); @@ -70,7 +67,6 @@ size_t serializedSizeV2(const IndexedMemProfRecord &Record) { Result += Record.CallSiteIds.size() * sizeof(CallStackId); return Result; } -} // namespace size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const { switch (Version) { @@ -83,9 +79,8 @@ size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const { llvm_unreachable("unsupported MemProf version"); } -namespace { -void serializeV0(const IndexedMemProfRecord &Record, - const MemProfSchema &Schema, raw_ostream &OS) { +static void serializeV0(const IndexedMemProfRecord &Record, + const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; endian::Writer LE(OS, llvm::endianness::little); @@ -107,8 +102,8 @@ void serializeV0(const IndexedMemProfRecord &Record, } } -void serializeV2(const IndexedMemProfRecord &Record, - const MemProfSchema &Schema, raw_ostream &OS) { +static void serializeV2(const IndexedMemProfRecord &Record, + const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; endian::Writer LE(OS, llvm::endianness::little); @@ -124,7 +119,6 @@ void serializeV2(const IndexedMemProfRecord &Record, for (const auto &CSId : Record.CallSiteIds) LE.write(CSId); } -} // namespace void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version) { @@ -140,9 +134,8 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, llvm_unreachable("unsupported MemProf version"); } -namespace { -IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, - const unsigned char *Ptr) { +static IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; IndexedMemProfRecord Record; @@ -185,8 +178,8 @@ IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, return Record; } -IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, - const unsigned char *Ptr) { +static IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; IndexedMemProfRecord Record; @@ -214,7 +207,6 @@ IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, return Record; } -} // namespace IndexedMemProfRecord IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp index 5dc1ff8978154..e93fbc72f54eb 100644 --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -614,11 +614,11 @@ Error RawMemProfReader::readRawProfile( // Read in the MemInfoBlocks. Merge them based on stack id - we assume that // raw profiles in the same binary file are from the same process so the // stackdepot ids are the same. - for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { - if (CallstackProfileData.count(Value.first)) { - CallstackProfileData[Value.first].Merge(Value.second); + for (const auto &[Id, MIB] : readMemInfoBlocks(Next + Header->MIBOffset)) { + if (CallstackProfileData.count(Id)) { + CallstackProfileData[Id].Merge(MIB); } else { - CallstackProfileData[Value.first] = Value.second; + CallstackProfileData[Id] = MIB; } } diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index c20609748dc97..8825025ec3213 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -1240,53 +1240,17 @@ APInt APInt::sqrt() const { return x_old + 1; } -/// Computes the multiplicative inverse of this APInt for a given modulo. The -/// iterative extended Euclidean algorithm is used to solve for this value, -/// however we simplify it to speed up calculating only the inverse, and take -/// advantage of div+rem calculations. We also use some tricks to avoid copying -/// (potentially large) APInts around. -/// WARNING: a value of '0' may be returned, -/// signifying that no multiplicative inverse exists! -APInt APInt::multiplicativeInverse(const APInt& modulo) const { - assert(ult(modulo) && "This APInt must be smaller than the modulo"); - - // Using the properties listed at the following web page (accessed 06/21/08): - // http://www.numbertheory.org/php/euclid.html - // (especially the properties numbered 3, 4 and 9) it can be proved that - // BitWidth bits suffice for all the computations in the algorithm implemented - // below. More precisely, this number of bits suffice if the multiplicative - // inverse exists, but may not suffice for the general extended Euclidean - // algorithm. - - APInt r[2] = { modulo, *this }; - APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; - APInt q(BitWidth, 0); - - unsigned i; - for (i = 0; r[i^1] != 0; i ^= 1) { - // An overview of the math without the confusing bit-flipping: - // q = r[i-2] / r[i-1] - // r[i] = r[i-2] % r[i-1] - // t[i] = t[i-2] - t[i-1] * q - udivrem(r[i], r[i^1], q, r[i]); - t[i] -= t[i^1] * q; - } - - // If this APInt and the modulo are not coprime, there is no multiplicative - // inverse, so return 0. We check this by looking at the next-to-last - // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean - // algorithm. - if (r[i] != 1) - return APInt(BitWidth, 0); - - // The next-to-last t is the multiplicative inverse. However, we are - // interested in a positive inverse. Calculate a positive one from a negative - // one if necessary. A simple addition of the modulo suffices because - // abs(t[i]) is known to be less than *this/2 (see the link above). - if (t[i].isNegative()) - t[i] += modulo; - - return std::move(t[i]); +/// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth. +APInt APInt::multiplicativeInverse() const { + assert((*this)[0] && + "multiplicative inverse is only defined for odd numbers!"); + + // Use Newton's method. + APInt Factor = *this; + APInt T; + while (!(T = *this * Factor).isOne()) + Factor *= 2 - std::move(T); + return Factor; } /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp index c076ae8b84317..056340bbab5ae 100644 --- a/llvm/lib/Support/CommandLine.cpp +++ b/llvm/lib/Support/CommandLine.cpp @@ -2734,6 +2734,52 @@ void cl::PrintHelpMessage(bool Hidden, bool Categorized) { CommonOptions->CategorizedHiddenPrinter.printHelp(); } +ArrayRef cl::getCompilerBuildConfig() { + static const StringRef Config[] = { + // Placeholder to ensure the array always has elements, since it's an + // error to have a zero-sized array. Slice this off before returning. + "", + // Actual compiler build config feature list: +#if LLVM_IS_DEBUG_BUILD + "+unoptimized", +#endif +#ifndef NDEBUG + "+assertions", +#endif +#ifdef EXPENSIVE_CHECKS + "+expensive-checks", +#endif +#if __has_feature(address_sanitizer) + "+asan", +#endif +#if __has_feature(dataflow_sanitizer) + "+dfsan", +#endif +#if __has_feature(hwaddress_sanitizer) + "+hwasan", +#endif +#if __has_feature(memory_sanitizer) + "+msan", +#endif +#if __has_feature(thread_sanitizer) + "+tsan", +#endif +#if __has_feature(undefined_behavior_sanitizer) + "+ubsan", +#endif + }; + return ArrayRef(Config).drop_front(1); +} + +// Utility function for printing the build config. +void cl::printBuildConfig(raw_ostream &OS) { +#if LLVM_VERSION_PRINTER_SHOW_BUILD_CONFIG + OS << "Build config: "; + llvm::interleaveComma(cl::getCompilerBuildConfig(), OS); + OS << '\n'; +#endif +} + /// Utility function for printing version number. void cl::PrintVersionMessage() { CommonOptions->VersionPrinterInstance.print(CommonOptions->ExtraVersionPrinters); diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 67e6e5b962b1a..7a19d24d1ff48 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -134,6 +134,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zcd", {1, 0}}, {"zce", {1, 0}}, {"zcf", {1, 0}}, + {"zcmop", {1, 0}}, {"zcmp", {1, 0}}, {"zcmt", {1, 0}}, @@ -162,6 +163,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = { {"zihintntl", {1, 0}}, {"zihintpause", {2, 0}}, {"zihpm", {2, 0}}, + {"zimop", {1, 0}}, {"zk", {1, 0}}, {"zkn", {1, 0}}, @@ -233,15 +235,11 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"zalasr", {0, 1}}, {"zalrsc", {0, 2}}, - {"zcmop", {0, 2}}, - {"zfbfmin", {1, 0}}, {"zicfilp", {0, 4}}, {"zicfiss", {0, 4}}, - {"zimop", {0, 1}}, - {"ztso", {0, 1}}, {"zvfbfmin", {1, 0}}, @@ -264,25 +262,25 @@ static constexpr RISCVProfile SupportedProfiles[] = { "sscounterenw_sstvala_sstvecd_svade_svbare_svinval_svpbmt"}, {"rva23u64", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_ziccamoa_ziccif_zicclsm_ziccrse_" - "zicntr_zicond_zihintntl_zihintpause_zihpm_zimop0p1_za64rs_zawrs_zfa_" - "zfhmin_zcb_zcmop0p2_zba_zbb_zbs_zkt_zvbb_zvfhmin_zvkt"}, + "zicntr_zicond_zihintntl_zihintpause_zihpm_zimop_za64rs_zawrs_zfa_zfhmin_" + "zcb_zcmop_zba_zbb_zbs_zkt_zvbb_zvfhmin_zvkt"}, {"rva23s64", "rv64imafdcvh_zic64b_zicbom_zicbop_zicboz_ziccamoa_ziccif_zicclsm_ziccrse_" - "zicntr_zicond_zifencei_zihintntl_zihintpause_zihpm_zimop0p1_za64rs_zawrs_" - "zfa_zfhmin_zcb_zcmop0p2_zba_zbb_zbs_zkt_zvbb_zvfhmin_zvkt_shcounterenw_" + "zicntr_zicond_zifencei_zihintntl_zihintpause_zihpm_zimop_za64rs_zawrs_" + "zfa_zfhmin_zcb_zcmop_zba_zbb_zbs_zkt_zvbb_zvfhmin_zvkt_shcounterenw_" "shgatpa_shtvala_shvsatpa_shvstvala_shvstvecd_ssccptr_sscofpmf_" "sscounterenw_ssnpm0p8_ssstateen_sstc_sstvala_sstvecd_ssu64xl_svade_" "svbare_svinval_svnapot_svpbmt"}, {"rvb23u64", "rv64imafdc_zic64b_zicbom_zicbop_zicboz_ziccamoa_ziccif_" "zicclsm_ziccrse_zicntr_zicond_zihintntl_zihintpause_zihpm_" - "zimop0p1_za64rs_zawrs_zfa_zcb_zcmop0p2_zba_zbb_zbs_zkt"}, + "zimop_za64rs_zawrs_zfa_zcb_zcmop_zba_zbb_zbs_zkt"}, {"rvb23s64", "rv64imafdc_zic64b_zicbom_zicbop_zicboz_ziccamoa_ziccif_zicclsm_ziccrse_" - "zicntr_zicond_zifencei_zihintntl_zihintpause_zihpm_zimop0p1_za64rs_zawrs_" - "zfa_zcb_zcmop0p2_zba_zbb_zbs_zkt_ssccptr_sscofpmf_sscounterenw_sstc_" - "sstvala_sstvecd_ssu64xl_svade_svbare_svinval_svnapot_svpbmt"}, - {"rvm23u32", "rv32im_zicbop_zicond_zicsr_zihintntl_zihintpause_zimop0p1_" - "zca_zcb_zce_zcmop0p2_zcmp_zcmt_zba_zbb_zbs"}, + "zicntr_zicond_zifencei_zihintntl_zihintpause_zihpm_zimop_za64rs_zawrs_" + "zfa_zcb_zcmop_zba_zbb_zbs_zkt_ssccptr_sscofpmf_sscounterenw_sstc_sstvala_" + "sstvecd_ssu64xl_svade_svbare_svinval_svnapot_svpbmt"}, + {"rvm23u32", "rv32im_zicbop_zicond_zicsr_zihintntl_zihintpause_zimop_zca_" + "zcb_zce_zcmop_zcmp_zcmt_zba_zbb_zbs"}, }; static void verifyTables() { diff --git a/llvm/lib/Support/ToolOutputFile.cpp b/llvm/lib/Support/ToolOutputFile.cpp index 01f7095f3499d..7a07286882fee 100644 --- a/llvm/lib/Support/ToolOutputFile.cpp +++ b/llvm/lib/Support/ToolOutputFile.cpp @@ -17,14 +17,14 @@ using namespace llvm; static bool isStdout(StringRef Filename) { return Filename == "-"; } -ToolOutputFile::CleanupInstaller::CleanupInstaller(StringRef Filename) +CleanupInstaller::CleanupInstaller(StringRef Filename) : Filename(std::string(Filename)), Keep(false) { // Arrange for the file to be deleted if the process is killed. if (!isStdout(Filename)) sys::RemoveFileOnSignal(Filename); } -ToolOutputFile::CleanupInstaller::~CleanupInstaller() { +CleanupInstaller::~CleanupInstaller() { if (isStdout(Filename)) return; diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index 854d531ab371f..4f0336a85daaa 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -157,7 +157,9 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) { SmallString<256> RealPath; sys::fs::real_path(PathNameUTF8, RealPath); - return std::string(RealPath); + if (RealPath.size()) + return std::string(RealPath); + return std::string(PathNameUTF8.data()); } UniqueID file_status::getUniqueID() const { diff --git a/llvm/lib/Support/raw_socket_stream.cpp b/llvm/lib/Support/raw_socket_stream.cpp index afb0ed11b2c24..1dcf6352f2cc2 100644 --- a/llvm/lib/Support/raw_socket_stream.cpp +++ b/llvm/lib/Support/raw_socket_stream.cpp @@ -14,8 +14,14 @@ #include "llvm/Support/raw_socket_stream.h" #include "llvm/Config/config.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" + +#include +#include +#include #ifndef _WIN32 +#include #include #include #else @@ -45,7 +51,6 @@ WSABalancer::WSABalancer() { } WSABalancer::~WSABalancer() { WSACleanup(); } - #endif // _WIN32 static std::error_code getLastSocketErrorCode() { @@ -56,104 +61,231 @@ static std::error_code getLastSocketErrorCode() { #endif } -ListeningSocket::ListeningSocket(int SocketFD, StringRef SocketPath) - : FD(SocketFD), SocketPath(SocketPath) {} +static sockaddr_un setSocketAddr(StringRef SocketPath) { + struct sockaddr_un Addr; + memset(&Addr, 0, sizeof(Addr)); + Addr.sun_family = AF_UNIX; + strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1); + return Addr; +} + +static Expected getSocketFD(StringRef SocketPath) { +#ifdef _WIN32 + SOCKET Socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (Socket == INVALID_SOCKET) { +#else + int Socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (Socket == -1) { +#endif // _WIN32 + return llvm::make_error(getLastSocketErrorCode(), + "Create socket failed"); + } + + struct sockaddr_un Addr = setSocketAddr(SocketPath); + if (::connect(Socket, (struct sockaddr *)&Addr, sizeof(Addr)) == -1) + return llvm::make_error(getLastSocketErrorCode(), + "Connect socket failed"); + +#ifdef _WIN32 + return _open_osfhandle(Socket, 0); +#else + return Socket; +#endif // _WIN32 +} + +ListeningSocket::ListeningSocket(int SocketFD, StringRef SocketPath, + int PipeFD[2]) + : FD(SocketFD), SocketPath(SocketPath), PipeFD{PipeFD[0], PipeFD[1]} {} ListeningSocket::ListeningSocket(ListeningSocket &&LS) - : FD(LS.FD), SocketPath(LS.SocketPath) { + : FD(LS.FD.load()), SocketPath(LS.SocketPath), + PipeFD{LS.PipeFD[0], LS.PipeFD[1]} { + LS.FD = -1; + LS.SocketPath.clear(); + LS.PipeFD[0] = -1; + LS.PipeFD[1] = -1; } Expected ListeningSocket::createUnix(StringRef SocketPath, int MaxBacklog) { + // Handle instances where the target socket address already exists and + // differentiate between a preexisting file with and without a bound socket + // + // ::bind will return std::errc:address_in_use if a file at the socket address + // already exists (e.g., the file was not properly unlinked due to a crash) + // even if another socket has not yet binded to that address + if (llvm::sys::fs::exists(SocketPath)) { + Expected MaybeFD = getSocketFD(SocketPath); + if (!MaybeFD) { + + // Regardless of the error, notify the caller that a file already exists + // at the desired socket address and that there is no bound socket at that + // address. The file must be removed before ::bind can use the address + consumeError(MaybeFD.takeError()); + return llvm::make_error( + std::make_error_code(std::errc::file_exists), + "Socket address unavailable"); + } + ::close(std::move(*MaybeFD)); + + // Notify caller that the provided socket address already has a bound socket + return llvm::make_error( + std::make_error_code(std::errc::address_in_use), + "Socket address unavailable"); + } + #ifdef _WIN32 WSABalancer _; - SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); - if (MaybeWinsocket == INVALID_SOCKET) { + SOCKET Socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (Socket == INVALID_SOCKET) #else - int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); - if (MaybeWinsocket == -1) { + int Socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (Socket == -1) #endif return llvm::make_error(getLastSocketErrorCode(), "socket create failed"); - } - struct sockaddr_un Addr; - memset(&Addr, 0, sizeof(Addr)); - Addr.sun_family = AF_UNIX; - strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1); - - if (bind(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)) == -1) { - std::error_code Err = getLastSocketErrorCode(); - if (Err == std::errc::address_in_use) - ::close(MaybeWinsocket); - return llvm::make_error(Err, "Bind error"); + struct sockaddr_un Addr = setSocketAddr(SocketPath); + if (::bind(Socket, (struct sockaddr *)&Addr, sizeof(Addr)) == -1) { + // Grab error code from call to ::bind before calling ::close + std::error_code EC = getLastSocketErrorCode(); + ::close(Socket); + return llvm::make_error(EC, "Bind error"); } - if (listen(MaybeWinsocket, MaxBacklog) == -1) { + + // Mark socket as passive so incoming connections can be accepted + if (::listen(Socket, MaxBacklog) == -1) return llvm::make_error(getLastSocketErrorCode(), "Listen error"); - } - int UnixSocket; + + int PipeFD[2]; #ifdef _WIN32 - UnixSocket = _open_osfhandle(MaybeWinsocket, 0); + // Reserve 1 byte for the pipe and use default textmode + if (::_pipe(PipeFD, 1, 0) == -1) #else - UnixSocket = MaybeWinsocket; + if (::pipe(PipeFD) == -1) +#endif // _WIN32 + return llvm::make_error(getLastSocketErrorCode(), + "pipe failed"); + +#ifdef _WIN32 + return ListeningSocket{_open_osfhandle(Socket, 0), SocketPath, PipeFD}; +#else + return ListeningSocket{Socket, SocketPath, PipeFD}; #endif // _WIN32 - return ListeningSocket{UnixSocket, SocketPath}; } -Expected> ListeningSocket::accept() { - int AcceptFD; +Expected> +ListeningSocket::accept(std::chrono::milliseconds Timeout) { + + struct pollfd FDs[2]; + FDs[0].events = POLLIN; #ifdef _WIN32 SOCKET WinServerSock = _get_osfhandle(FD); + FDs[0].fd = WinServerSock; +#else + FDs[0].fd = FD; +#endif + FDs[1].events = POLLIN; + FDs[1].fd = PipeFD[0]; + + // Keep track of how much time has passed in case poll is interupted by a + // signal and needs to be recalled + int RemainingTime = Timeout.count(); + std::chrono::milliseconds ElapsedTime = std::chrono::milliseconds(0); + int PollStatus = -1; + + while (PollStatus == -1 && (Timeout.count() == -1 || ElapsedTime < Timeout)) { + if (Timeout.count() != -1) + RemainingTime -= ElapsedTime.count(); + + auto Start = std::chrono::steady_clock::now(); +#ifdef _WIN32 + PollStatus = WSAPoll(FDs, 2, RemainingTime); + if (PollStatus == SOCKET_ERROR) { +#else + PollStatus = ::poll(FDs, 2, RemainingTime); + if (PollStatus == -1) { +#endif + // Ignore error if caused by interupting signal + std::error_code PollErrCode = getLastSocketErrorCode(); + if (PollErrCode != std::errc::interrupted) + return llvm::make_error(PollErrCode, "FD poll failed"); + } + + if (PollStatus == 0) + return llvm::make_error( + std::make_error_code(std::errc::timed_out), + "No client requests within timeout window"); + + if (FDs[0].revents & POLLNVAL) + return llvm::make_error( + std::make_error_code(std::errc::bad_file_descriptor), + "File descriptor closed by another thread"); + + if (FDs[1].revents & POLLIN) + return llvm::make_error( + std::make_error_code(std::errc::operation_canceled), + "Accept canceled"); + + auto Stop = std::chrono::steady_clock::now(); + ElapsedTime += + std::chrono::duration_cast(Stop - Start); + } + + int AcceptFD; +#ifdef _WIN32 SOCKET WinAcceptSock = ::accept(WinServerSock, NULL, NULL); AcceptFD = _open_osfhandle(WinAcceptSock, 0); #else AcceptFD = ::accept(FD, NULL, NULL); -#endif //_WIN32 +#endif + if (AcceptFD == -1) return llvm::make_error(getLastSocketErrorCode(), - "Accept failed"); + "Socket accept failed"); return std::make_unique(AcceptFD); } -ListeningSocket::~ListeningSocket() { - if (FD == -1) +void ListeningSocket::shutdown() { + int ObservedFD = FD.load(); + + if (ObservedFD == -1) return; - ::close(FD); - unlink(SocketPath.c_str()); -} -static Expected GetSocketFD(StringRef SocketPath) { -#ifdef _WIN32 - SOCKET MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); - if (MaybeWinsocket == INVALID_SOCKET) { -#else - int MaybeWinsocket = socket(AF_UNIX, SOCK_STREAM, 0); - if (MaybeWinsocket == -1) { -#endif // _WIN32 - return llvm::make_error(getLastSocketErrorCode(), - "Create socket failed"); - } + // If FD equals ObservedFD set FD to -1; If FD doesn't equal ObservedFD then + // another thread is responsible for shutdown so return + if (!FD.compare_exchange_strong(ObservedFD, -1)) + return; - struct sockaddr_un Addr; - memset(&Addr, 0, sizeof(Addr)); - Addr.sun_family = AF_UNIX; - strncpy(Addr.sun_path, SocketPath.str().c_str(), sizeof(Addr.sun_path) - 1); + ::close(ObservedFD); + ::unlink(SocketPath.c_str()); - int status = connect(MaybeWinsocket, (struct sockaddr *)&Addr, sizeof(Addr)); - if (status == -1) { - return llvm::make_error(getLastSocketErrorCode(), - "Connect socket failed"); - } -#ifdef _WIN32 - return _open_osfhandle(MaybeWinsocket, 0); -#else - return MaybeWinsocket; -#endif // _WIN32 + // Ensure ::poll returns if shutdown is called by a seperate thread + char Byte = 'A'; + ::write(PipeFD[1], &Byte, 1); } +ListeningSocket::~ListeningSocket() { + shutdown(); + + // Close the pipe's FDs in the destructor instead of within + // ListeningSocket::shutdown to avoid unnecessary synchronization issues that + // would occur as PipeFD's values would have to be changed to -1 + // + // The move constructor sets PipeFD to -1 + if (PipeFD[0] != -1) + ::close(PipeFD[0]); + if (PipeFD[1] != -1) + ::close(PipeFD[1]); +} + +//===----------------------------------------------------------------------===// +// raw_socket_stream +//===----------------------------------------------------------------------===// + raw_socket_stream::raw_socket_stream(int SocketFD) : raw_fd_stream(SocketFD, true) {} @@ -162,11 +294,10 @@ raw_socket_stream::createConnectedUnix(StringRef SocketPath) { #ifdef _WIN32 WSABalancer _; #endif // _WIN32 - Expected FD = GetSocketFD(SocketPath); + Expected FD = getSocketFD(SocketPath); if (!FD) return FD.takeError(); return std::make_unique(*FD); } raw_socket_stream::~raw_socket_stream() {} - diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 6425aa9b091f7..741c97a3dc009 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -212,7 +212,7 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align", "Disallow all unaligned memory " "access">; -foreach i = {1-7,9-15,18,20-28,30} in +foreach i = {1-7,9-15,18,20-28} in def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true", "Reserve X"#i#", making it unavailable " "as a GPR">; @@ -391,9 +391,18 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "equivalent when the immediate does " "not fit in the encoding.">; -def FeatureAddrLSLFast : SubtargetFeature< - "addr-lsl-fast", "HasAddrLSLFast", "true", - "Address operands with logical shift of up to 3 places are cheap">; +// Address operands with shift amount 2 or 3 are fast on all Arm chips except +// some old Apple cores (A7-A10?) which handle all shifts slowly. Cortex-A57 +// and derived designs through Cortex-X1 take an extra micro-op for shifts +// of 1 or 4. Other Arm chips handle all shifted operands at the same speed +// as unshifted operands. +// +// We don't try to model the behavior of the old Apple cores because new code +// targeting A7 is very unlikely to actually run on an A7. The Cortex cores +// are modeled by FeatureAddrLSLSlow14. +def FeatureAddrLSLSlow14 : SubtargetFeature< + "addr-lsl-slow-14", "HasAddrLSLSlow14", "true", + "Address operands with shift amount of 1 or 4 are slow">; def FeatureALULSLFast : SubtargetFeature< "alu-lsl-fast", "HasALULSLFast", "true", @@ -885,6 +894,7 @@ def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeatureBalanceFPOps, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureAddrLSLSlow14, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -903,6 +913,7 @@ def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", FeatureFuseAES, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -910,6 +921,7 @@ def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", "Cortex-A73 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -917,6 +929,7 @@ def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", "Cortex-A75 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -924,7 +937,7 @@ def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", "Cortex-A76 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -934,7 +947,7 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -944,7 +957,7 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -956,7 +969,7 @@ def TuneA78AE : SubtargetFeature<"a78ae", "ARMProcFamily", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -968,7 +981,7 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -979,7 +992,6 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -990,7 +1002,6 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1001,7 +1012,6 @@ def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1012,7 +1022,6 @@ def TuneA720AE : SubtargetFeature<"a720ae", "ARMProcFamily", "CortexA720", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1028,7 +1037,7 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1039,7 +1048,6 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1047,7 +1055,6 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", "Cortex-X3 ARM processors", [ - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureFuseAES, @@ -1057,7 +1064,6 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4", "Cortex-X4 ARM processors", [ - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureFuseAES, @@ -1215,7 +1221,6 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive]>; @@ -1234,7 +1239,6 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureZCZeroing]>; @@ -1244,7 +1248,6 @@ def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureStorePairSuppress]>; @@ -1254,7 +1257,6 @@ def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", FeaturePredictableSelectIsExpensive, FeatureZCZeroing, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureSlowSTRQro]>; @@ -1268,7 +1270,7 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1 "Neoverse N1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1278,7 +1280,6 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2 "Neoverse N2 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1288,7 +1289,6 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne "Neoverse 512-TVB ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1298,7 +1298,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1 "Neoverse V1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1309,7 +1309,6 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2 "Neoverse V2 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1321,7 +1320,6 @@ def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", FeaturePredictableSelectIsExpensive, FeatureZCZeroing, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast]>; def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99", @@ -1381,7 +1379,6 @@ def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, @@ -1397,7 +1394,6 @@ def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, @@ -1414,7 +1410,6 @@ def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 4fa719ad67cf3..f6ccd0ecfdc89 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -268,13 +268,19 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { if (Sign->getZExtValue()) Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC; - if (Flags == 0) - return; + uint64_t PAuthABIPlatform = -1; + if (const auto *PAP = mdconst::extract_or_null( + M.getModuleFlag("aarch64-elf-pauthabi-platform"))) + PAuthABIPlatform = PAP->getZExtValue(); + uint64_t PAuthABIVersion = -1; + if (const auto *PAV = mdconst::extract_or_null( + M.getModuleFlag("aarch64-elf-pauthabi-version"))) + PAuthABIVersion = PAV->getZExtValue(); // Emit a .note.gnu.property section with the flags. auto *TS = static_cast(OutStreamer->getTargetStreamer()); - TS->emitNoteSection(Flags); + TS->emitNoteSection(Flags, PAuthABIPlatform, PAuthABIVersion); } void AArch64AsmPrinter::emitFunctionHeaderComment() { diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 1e1c6ece85b24..10cad6d192440 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -114,6 +114,13 @@ def ext: GICombineRule < (apply [{ applyEXT(*${root}, ${matchinfo}); }]) >; +def insertelt_nonconst: GICombineRule < + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (wip_match_opcode G_INSERT_VECTOR_ELT):$root, + [{ return matchNonConstInsert(*${root}, MRI); }]), + (apply [{ applyNonConstInsert(*${root}, MRI, B); }]) +>; + def shuf_to_ins_matchdata : GIDefMatchData<"std::tuple">; def shuf_to_ins: GICombineRule < (defs root:$root, shuf_to_ins_matchdata:$matchinfo), @@ -140,8 +147,7 @@ def form_duplane : GICombineRule < >; def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, - form_duplane, - shuf_to_ins]>; + form_duplane, shuf_to_ins]>; // Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's def vector_unmerge_lowering : GICombineRule < @@ -269,7 +275,7 @@ def AArch64PostLegalizerLowering lower_vector_fcmp, form_truncstore, vector_sext_inreg_to_shift, unmerge_ext_to_unmerge, lower_mull, - vector_unmerge_lowering]> { + vector_unmerge_lowering, insertelt_nonconst]> { } // Post-legalization combines which are primarily optimizations. diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 163ed520a8a67..51bec3604026b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -462,7 +462,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel { SDValue &Offset, SDValue &SignExtend, SDValue &DoShift); bool isWorthFoldingALU(SDValue V, bool LSL = false) const; - bool isWorthFoldingAddr(SDValue V) const; + bool isWorthFoldingAddr(SDValue V, unsigned Size) const; bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, SDValue &Offset, SDValue &SignExtend); @@ -674,17 +674,22 @@ static bool isWorthFoldingSHL(SDValue V) { /// Determine whether it is worth to fold V into an extended register addressing /// mode. -bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V) const { +bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const { // Trivial if we are optimizing for code size or if there is only // one use of the value. if (CurDAG->shouldOptForSize() || V.hasOneUse()) return true; - // If a subtarget has a fastpath LSL we can fold a logical shift into - // the addressing mode and save a cycle. - if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::SHL && - isWorthFoldingSHL(V)) + + // If a subtarget has a slow shift, folding a shift into multiple loads + // costs additional micro-ops. + if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16)) + return false; + + // Check whether we're going to emit the address arithmetic anyway because + // it's used by a non-address operation. + if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V)) return true; - if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::ADD) { + if (V.getOpcode() == ISD::ADD) { const SDValue LHS = V.getOperand(0); const SDValue RHS = V.getOperand(1); if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) @@ -1203,7 +1208,7 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, if (ShiftVal != 0 && ShiftVal != LegalShiftVal) return false; - return isWorthFoldingAddr(N); + return isWorthFoldingAddr(N, Size); } bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, @@ -1231,7 +1236,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, } // Remember if it is worth folding N when it produces extended register. - bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N); + bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size); // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && @@ -1261,7 +1266,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, MVT::i32); - if (isWorthFoldingAddr(LHS)) + if (isWorthFoldingAddr(LHS, Size)) return true; } @@ -1273,7 +1278,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, MVT::i32); - if (isWorthFoldingAddr(RHS)) + if (isWorthFoldingAddr(RHS, Size)) return true; } @@ -1343,7 +1348,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, } // Remember if it is worth folding N when it produces extended register. - bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N); + bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size); // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8218960406ec1..744b2cdef504d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1716,40 +1716,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setMaxAtomicSizeInBitsSupported(128); if (Subtarget->isWindowsArm64EC()) { - // FIXME: are there other intrinsics we need to add here? - setLibcallName(RTLIB::MEMCPY, "#memcpy"); - setLibcallName(RTLIB::MEMSET, "#memset"); - setLibcallName(RTLIB::MEMMOVE, "#memmove"); - setLibcallName(RTLIB::REM_F32, "#fmodf"); - setLibcallName(RTLIB::REM_F64, "#fmod"); - setLibcallName(RTLIB::FMA_F32, "#fmaf"); - setLibcallName(RTLIB::FMA_F64, "#fma"); - setLibcallName(RTLIB::SQRT_F32, "#sqrtf"); - setLibcallName(RTLIB::SQRT_F64, "#sqrt"); - setLibcallName(RTLIB::CBRT_F32, "#cbrtf"); - setLibcallName(RTLIB::CBRT_F64, "#cbrt"); - setLibcallName(RTLIB::LOG_F32, "#logf"); - setLibcallName(RTLIB::LOG_F64, "#log"); - setLibcallName(RTLIB::LOG2_F32, "#log2f"); - setLibcallName(RTLIB::LOG2_F64, "#log2"); - setLibcallName(RTLIB::LOG10_F32, "#log10f"); - setLibcallName(RTLIB::LOG10_F64, "#log10"); - setLibcallName(RTLIB::EXP_F32, "#expf"); - setLibcallName(RTLIB::EXP_F64, "#exp"); - setLibcallName(RTLIB::EXP2_F32, "#exp2f"); - setLibcallName(RTLIB::EXP2_F64, "#exp2"); - setLibcallName(RTLIB::EXP10_F32, "#exp10f"); - setLibcallName(RTLIB::EXP10_F64, "#exp10"); - setLibcallName(RTLIB::SIN_F32, "#sinf"); - setLibcallName(RTLIB::SIN_F64, "#sin"); - setLibcallName(RTLIB::COS_F32, "#cosf"); - setLibcallName(RTLIB::COS_F64, "#cos"); - setLibcallName(RTLIB::POW_F32, "#powf"); - setLibcallName(RTLIB::POW_F64, "#pow"); - setLibcallName(RTLIB::LDEXP_F32, "#ldexpf"); - setLibcallName(RTLIB::LDEXP_F64, "#ldexp"); - setLibcallName(RTLIB::FREXP_F32, "#frexpf"); - setLibcallName(RTLIB::FREXP_F64, "#frexp"); + // FIXME: are there intrinsics we need to exclude from this? + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + auto code = static_cast(i); + auto libcallName = getLibcallName(code); + if ((libcallName != nullptr) && (libcallName[0] != '#')) { + setLibcallName(code, Saver.save(Twine("#") + libcallName).data()); + } + } } } @@ -17956,16 +17930,14 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); + const auto &Subtarget = DAG.getSubtarget(); if (!VT.isVector()) return SDValue(); - // The combining code currently only works for NEON vectors. In particular, - // it does not work for SVE when dealing with vectors wider than 128 bits. - // It also doesn't work for streaming mode because it causes generating - // bsl instructions that are invalid in streaming mode. - if (TLI.useSVEForFixedLengthVectorVT( - VT, !DAG.getSubtarget().isNeonAvailable())) + // The combining code works for NEON, SVE2 and SME. + if (TLI.useSVEForFixedLengthVectorVT(VT, !Subtarget.isNeonAvailable()) || + (VT.isScalableVector() && !Subtarget.hasSVE2())) return SDValue(); SDValue N0 = N->getOperand(0); @@ -18020,6 +17992,14 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1); for (int i = 1; i >= 0; --i) for (int j = 1; j >= 0; --j) { + APInt Val1, Val2; + + if (ISD::isConstantSplatVector(N0->getOperand(i).getNode(), Val1) && + ISD::isConstantSplatVector(N1->getOperand(j).getNode(), Val2) && + (BitMask & ~Val1.getZExtValue()) == Val2.getZExtValue()) { + return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i), + N0->getOperand(1 - i), N1->getOperand(1 - j)); + } BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(i)); BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(j)); if (!BVN0 || !BVN1) @@ -18035,9 +18015,8 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, break; } } - if (FoundMatch) - return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0), + return DAG.getNode(AArch64ISD::BSP, DL, VT, N0->getOperand(i), N0->getOperand(1 - i), N1->getOperand(1 - j)); } @@ -24581,6 +24560,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false)) return R; return performFlagSettingCombine(N, DCI, AArch64ISD::SBC); + case AArch64ISD::BICi: { + APInt DemandedBits = + APInt::getAllOnes(N->getValueType(0).getScalarSizeInBits()); + APInt DemandedElts = + APInt::getAllOnes(N->getValueType(0).getVectorNumElements()); + + if (DAG.getTargetLoweringInfo().SimplifyDemandedBits( + SDValue(N, 0), DemandedBits, DemandedElts, DCI)) + return SDValue(); + + break; + } case ISD::XOR: return performXorCombine(N, DAG, DCI, Subtarget); case ISD::MUL: @@ -26025,7 +26016,7 @@ static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) { Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { // Android provides a fixed TLS slot for the stack cookie. See the definition // of TLS_SLOT_STACK_GUARD in - // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h + // https://android.googlesource.com/platform/bionic/+/main/libc/platform/bionic/tls_defines.h if (Subtarget->isTargetAndroid()) return UseTlsOffset(IRB, 0x28); @@ -27621,6 +27612,24 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode( // used - simplify to just Val. return TLO.CombineTo(Op, ShiftR->getOperand(0)); } + case AArch64ISD::BICi: { + // Fold BICi if all destination bits already known to be zeroed + SDValue Op0 = Op.getOperand(0); + KnownBits KnownOp0 = + TLO.DAG.computeKnownBits(Op0, OriginalDemandedElts, Depth + 1); + // Op0 &= ~(ConstantOperandVal(1) << ConstantOperandVal(2)) + uint64_t BitsToClear = Op->getConstantOperandVal(1) + << Op->getConstantOperandVal(2); + APInt AlreadyZeroedBitsToClear = BitsToClear & KnownOp0.Zero; + if (APInt(Known.getBitWidth(), BitsToClear) + .isSubsetOf(AlreadyZeroedBitsToClear)) + return TLO.CombineTo(Op, Op0); + + Known = KnownOp0 & + KnownBits::makeConstant(APInt(Known.getBitWidth(), ~BitsToClear)); + + return false; + } case ISD::INTRINSIC_WO_CHAIN: { if (auto ElementSize = IsSVECntIntrinsic(Op)) { unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 3465f3be88754..18439dc7f0102 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1003,6 +1003,9 @@ class AArch64TargetLowering : public TargetLowering { /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; + llvm::BumpPtrAllocator BumpAlloc; + llvm::StringSaver Saver{BumpAlloc}; + bool isExtFreeImpl(const Instruction *Ext) const override; void addTypeForNEON(MVT VT); diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td index 0002db52b1995..de94cf64c9801 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -547,10 +547,10 @@ let Predicates = [HasLSE] in { let Predicates = [HasRCPC3, HasNEON] in { // LDAP1 loads def : Pat<(vector_insert (v2i64 VecListOne128:$Rd), - (i64 (acquiring_load GPR64sp:$Rn)), VectorIndexD:$idx), + (i64 (acquiring_load GPR64sp:$Rn)), (i64 VectorIndexD:$idx)), (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>; def : Pat<(vector_insert (v2f64 VecListOne128:$Rd), - (f64 (bitconvert (i64 (acquiring_load GPR64sp:$Rn)))), VectorIndexD:$idx), + (f64 (bitconvert (i64 (acquiring_load GPR64sp:$Rn)))), (i64 VectorIndexD:$idx)), (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>; def : Pat<(v1i64 (scalar_to_vector (i64 (acquiring_load GPR64sp:$Rn)))), diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 8360bef8e2f82..1f437d0ed6f8d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -7983,7 +7983,7 @@ class SIMDInsFromMain { + (vector_insert (vectype V128:$Rd), regtype:$Rn, (i64 idxtype:$idx)))]> { let Inst{14-11} = 0b0011; } @@ -7997,8 +7997,8 @@ class SIMDInsFromElement; + (elttype (vector_extract (vectype V128:$Rn), (i64 idxtype:$idx2))), + (i64 idxtype:$idx)))]>; class SIMDInsMainMovAlias diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d0c5e6b99e9ee..9783b33219460 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2993,7 +2993,7 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, return false; Shift = AArch64_AM::getShiftValue(Shift); if (!OptSize) { - if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast()) + if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14()) return false; if (avoidSlowSTRQ(MemI)) return false; @@ -4807,29 +4807,20 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::STRBui; break; - case 2: + case 2: { + bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC); if (AArch64::FPR16RegClass.hasSubClassEq(RC)) Opc = AArch64::STRHui; - else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVEorSME() && "Unexpected register store without SVE store instructions"); - Opc = AArch64::STR_PXI; - StackID = TargetStackID::ScalableVector; - } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) { - assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && + assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && "Unexpected register store without SVE2p1 or SME2"); - if (SrcReg.isVirtual()) { - auto NewSrcReg = - MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass); - BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg) - .addReg(SrcReg); - SrcReg = NewSrcReg; - } else - SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0; Opc = AArch64::STR_PXI; StackID = TargetStackID::ScalableVector; } break; + } case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { Opc = AArch64::STRWui; @@ -4990,26 +4981,22 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRBui; break; - case 2: + case 2: { + bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC); if (AArch64::FPR16RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRHui; - else if (AArch64::PPRRegClass.hasSubClassEq(RC)) { + else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) { assert(Subtarget.hasSVEorSME() && "Unexpected register load without SVE load instructions"); - Opc = AArch64::LDR_PXI; - StackID = TargetStackID::ScalableVector; - } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) { - assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && + assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) && "Unexpected register load without SVE2p1 or SME2"); - PNRReg = DestReg; - if (DestReg.isVirtual()) - DestReg = MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass); - else - DestReg = (DestReg - AArch64::PN0) + AArch64::P0; + if (IsPNR) + PNRReg = DestReg; Opc = AArch64::LDR_PXI; StackID = TargetStackID::ScalableVector; } break; + } case 4: if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { Opc = AArch64::LDRWui; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index b1f514f75207f..e1624f70185e1 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6601,6 +6601,15 @@ def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndex VectorIndexB:$imm, GPR32:$Rm), dsub)>; +def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))), + (EXTRACT_SUBREG + (INSvi8lane (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)), + VectorIndexB:$imm, (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0)), + dsub)>; +def : Pat<(v16i8 (vector_insert (v16i8 V128:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))), + (INSvi8lane V128:$Rn, VectorIndexB:$imm, + (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0))>; + // Copy an element at a constant index in one vector into a constant indexed // element of another. // FIXME refactor to a shared class/dev parameterized on vector type, vector @@ -6633,26 +6642,26 @@ def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane multiclass Neon_INS_elt_pattern { def : Pat<(VT128 (vector_insert V128:$src, - (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), - imm:$Immd)), + (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))), + (i64 imm:$Immd))), (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; def : Pat<(VT128 (vector_insert V128:$src, - (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), - imm:$Immd)), + (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))), + (i64 imm:$Immd))), (INS V128:$src, imm:$Immd, (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; def : Pat<(VT64 (vector_insert V64:$src, - (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), - imm:$Immd)), + (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))), + (i64 imm:$Immd))), (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, V128:$Rn, imm:$Immn), dsub)>; def : Pat<(VT64 (vector_insert V64:$src, - (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), - imm:$Immd)), + (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))), + (i64 imm:$Immd))), (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), @@ -6671,14 +6680,14 @@ defm : Neon_INS_elt_pattern; // Insert from bitcast // vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0) -def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), +def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))), (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>; -def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)), +def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))), (EXTRACT_SUBREG (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)), imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0), dsub)>; -def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)), +def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))), (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>; // bitcast of an extract @@ -8100,7 +8109,7 @@ def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))), class Ld1Lane128Pat : Pat<(vector_insert (VTy VecListOne128:$Rd), - (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)), (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; def : Ld1Lane128Pat; @@ -8123,14 +8132,14 @@ class Ld1Lane128IdxOpPat : Pat<(vector_insert (VTy VecListOne128:$Rd), - (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)), (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>; class Ld1Lane64IdxOpPat : Pat<(vector_insert (VTy VecListOne64:$Rd), - (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)), (EXTRACT_SUBREG (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), (IdxOp VecIndex:$idx), GPR64sp:$Rn), @@ -8170,7 +8179,7 @@ let Predicates = [IsNeonAvailable] in { class Ld1Lane64Pat : Pat<(vector_insert (VTy VecListOne64:$Rd), - (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)), (EXTRACT_SUBREG (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), VecIndex:$idx, GPR64sp:$Rn), diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp index 6dfb2b9df7135..a9bd8d877fb2e 100644 --- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp @@ -190,17 +190,23 @@ AArch64LoopIdiomTransformPass::run(Loop &L, LoopAnalysisManager &AM, bool AArch64LoopIdiomTransform::run(Loop *L) { CurLoop = L; - if (DisableAll || L->getHeader()->getParent()->hasOptSize()) + Function &F = *L->getHeader()->getParent(); + if (DisableAll || F.hasOptSize()) return false; + if (F.hasFnAttribute(Attribute::NoImplicitFloat)) { + LLVM_DEBUG(dbgs() << DEBUG_TYPE << " is disabled on " << F.getName() + << " due to its NoImplicitFloat attribute"); + return false; + } + // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. if (!L->getLoopPreheader()) return false; - LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" - << CurLoop->getHeader()->getParent()->getName() - << "] Loop %" << CurLoop->getHeader()->getName() << "\n"); + LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" << F.getName() << "] Loop %" + << CurLoop->getHeader()->getName() << "\n"); return recognizeByteCompare(); } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index fef1748021b07..80d0f9c57f4b3 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -953,17 +953,6 @@ class PNRAsmOperand: AsmOperandClass { let ParserMethod = "tryParseSVEPredicateVector"; } -let RenderMethod = "addPNRasPPRRegOperands" in { - def PNRasPPROpAny : PNRAsmOperand<"PNRasPPRPredicateAny", "PNR", 0>; - def PNRasPPROp8 : PNRAsmOperand<"PNRasPPRPredicateB", "PNR", 8>; -} - -class PNRasPPRRegOp : SVERegOp {} - -def PNRasPPRAny : PNRasPPRRegOp<"", PNRasPPROpAny, ElementSizeNone, PPR>; -def PNRasPPR8 : PNRasPPRRegOp<"b", PNRasPPROp8, ElementSizeB, PPR>; - def PNRAsmOpAny: PNRAsmOperand<"PNPredicateAny", "PNR", 0>; def PNRAsmOp8 : PNRAsmOperand<"PNPredicateB", "PNR", 8>; def PNRAsmOp16 : PNRAsmOperand<"PNPredicateH", "PNR", 16>; @@ -1004,6 +993,29 @@ let Namespace = "AArch64" in { def psub1 : SubRegIndex<16, -1>; } +class PPRorPNRClass : RegisterClass< + "AArch64", + [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16, + (add PPR, PNR)> { + let Size = 16; +} + +class PPRorPNRAsmOperand: AsmOperandClass { + let Name = "SVE" # name # "Reg"; + let PredicateMethod = "isSVEPredicateOrPredicateAsCounterRegOfWidth<" + # Width # ", " # "AArch64::" + # RegClass # "RegClassID>"; + let DiagnosticType = "InvalidSVE" # name # "Reg"; + let RenderMethod = "addPPRorPNRRegOperands"; + let ParserMethod = "tryParseSVEPredicateOrPredicateAsCounterVector"; +} + +def PPRorPNR : PPRorPNRClass; +def PPRorPNRAsmOp8 : PPRorPNRAsmOperand<"PPRorPNRB", "PPRorPNR", 8>; +def PPRorPNRAsmOpAny : PPRorPNRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>; +def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>; +def PPRorPNR8 : PPRRegOp<"b", PPRorPNRAsmOp8, ElementSizeB, PPRorPNR>; + // Pairs of SVE predicate vector registers. def PSeqPairs : RegisterTuples<[psub0, psub1], [(rotl PPR, 0), (rotl PPR, 1)]>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index dd5e11c0f5e35..9c747198c12d8 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -487,6 +487,27 @@ def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags; def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags; +def AArch64fadd : PatFrags<(ops node:$op1, node:$op2), + [(fadd node:$op1, node:$op2), + (AArch64fadd_p (SVEAllActive), node:$op1, node:$op2)]>; + +def AArch64fmul : PatFrags<(ops node:$op1, node:$op2), + [(fmul node:$op1, node:$op2), + (AArch64fmul_p (SVEAllActive), node:$op1, node:$op2)]>; + +def AArch64fsub : PatFrags<(ops node:$op1, node:$op2), + [(fsub node:$op1, node:$op2), + (AArch64fsub_p (SVEAllActive), node:$op1, node:$op2)]>; + +def AArch64mul : PatFrag<(ops node:$op1, node:$op2), + (AArch64mul_p (SVEAnyPredicate), node:$op1, node:$op2)>; + +def AArch64smulh : PatFrag<(ops node:$op1, node:$op2), + (AArch64smulh_p (SVEAnyPredicate), node:$op1, node:$op2)>; + +def AArch64umulh : PatFrag<(ops node:$op1, node:$op2), + (AArch64umulh_p (SVEAnyPredicate), node:$op1, node:$op2)>; + let Predicates = [HasSVE] in { def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; def RDFFRS_PPz : sve_int_rdffr_pred<0b1, "rdffrs">; @@ -705,9 +726,9 @@ let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { } // End HasSVEorSME, UseExperimentalZeroingPseudos let Predicates = [HasSVEorSME] in { - defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>; - defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>; - defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>; + defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", AArch64fadd>; + defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", AArch64fsub>; + defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", AArch64fmul>; } // End HasSVEorSME let Predicates = [HasSVE] in { @@ -3402,9 +3423,9 @@ let Predicates = [HasSVE2orSME] in { defm SQRDMULH_ZZZ : sve2_int_mul<0b101, "sqrdmulh", int_aarch64_sve_sqrdmulh>; // SVE2 integer multiply vectors (unpredicated) - defm MUL_ZZZ : sve2_int_mul<0b000, "mul", null_frag, AArch64mul_p>; - defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", null_frag, AArch64smulh_p>; - defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", null_frag, AArch64umulh_p>; + defm MUL_ZZZ : sve2_int_mul<0b000, "mul", AArch64mul>; + defm SMULH_ZZZ : sve2_int_mul<0b010, "smulh", AArch64smulh>; + defm UMULH_ZZZ : sve2_int_mul<0b011, "umulh", AArch64umulh>; defm PMUL_ZZZ : sve2_int_mul_single<0b001, "pmul", int_aarch64_sve_pmul>; // SVE2 complex integer dot product (indexed) @@ -4013,20 +4034,10 @@ let Predicates = [HasSVEorSME] in { // Aliases for existing SVE instructions for which predicate-as-counter are // accepted as an operand to the instruction -def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]", - (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; -def : InstAlias<"ldr $Pt, [$Rn]", - (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>; - -def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]", - (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; -def : InstAlias<"str $Pt, [$Rn]", - (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>; - def : InstAlias<"mov $Pd, $Pn", - (ORR_PPzPP PNRasPPR8:$Pd, PNRasPPR8:$Pn, PNRasPPR8:$Pn, PNRasPPR8:$Pn), 0>; + (ORR_PPzPP PPRorPNR8:$Pd, PPRorPNR8:$Pn, PPRorPNR8:$Pn, PPRorPNR8:$Pn), 0>; -def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>; +def : InstAlias<"pfalse\t$Pd", (PFALSE PPRorPNR8:$Pd), 0>; } @@ -4059,9 +4070,9 @@ defm BFADD_ZPmZZ : sve2p1_bf_2op_p_zds<0b0000, "bfadd", "BFADD_ZPZZ", AArch64fad defm BFSUB_ZPmZZ : sve2p1_bf_2op_p_zds<0b0001, "bfsub", "BFSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryComm>; defm BFMUL_ZPmZZ : sve2p1_bf_2op_p_zds<0b0010, "bfmul", "BFMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>; -defm BFADD_ZZZ : sve2p1_bf_3op_u_zd<0b000, "bfadd", fadd, AArch64fadd_p>; -defm BFSUB_ZZZ : sve2p1_bf_3op_u_zd<0b001, "bfsub", fsub, AArch64fsub_p>; -defm BFMUL_ZZZ : sve2p1_bf_3op_u_zd<0b010, "bfmul", fmul, AArch64fmul_p>; +defm BFADD_ZZZ : sve2p1_bf_3op_u_zd<0b000, "bfadd", AArch64fadd>; +defm BFSUB_ZZZ : sve2p1_bf_3op_u_zd<0b001, "bfsub", AArch64fsub>; +defm BFMUL_ZZZ : sve2p1_bf_3op_u_zd<0b010, "bfmul", AArch64fmul>; defm BFADD_ZPZZ : sve2p1_bf_bin_pred_zds; defm BFSUB_ZPZZ : sve2p1_bf_bin_pred_zds; diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 9e43f206efcf7..19ef6f4fb32e7 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -15,6 +15,12 @@ using namespace llvm; #define DEBUG_TYPE "aarch64-selectiondag-info" +static cl::opt + LowerToSMERoutines("aarch64-lower-to-sme-routines", cl::Hidden, + cl::desc("Enable AArch64 SME memory operations " + "to lower to librt functions"), + cl::init(true)); + SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode, SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, @@ -76,15 +82,79 @@ SDValue AArch64SelectionDAGInfo::EmitMOPS(AArch64ISD::NodeType SDOpcode, } } +SDValue AArch64SelectionDAGInfo::EmitStreamingCompatibleMemLibCall( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, RTLIB::Libcall LC) const { + const AArch64Subtarget &STI = + DAG.getMachineFunction().getSubtarget(); + const AArch64TargetLowering *TLI = STI.getTargetLowering(); + SDValue Symbol; + TargetLowering::ArgListEntry DstEntry; + DstEntry.Ty = PointerType::getUnqual(*DAG.getContext()); + DstEntry.Node = Dst; + TargetLowering::ArgListTy Args; + Args.push_back(DstEntry); + EVT PointerVT = TLI->getPointerTy(DAG.getDataLayout()); + + switch (LC) { + case RTLIB::MEMCPY: { + TargetLowering::ArgListEntry Entry; + Entry.Ty = PointerType::getUnqual(*DAG.getContext()); + Symbol = DAG.getExternalSymbol("__arm_sc_memcpy", PointerVT); + Entry.Node = Src; + Args.push_back(Entry); + break; + } + case RTLIB::MEMMOVE: { + TargetLowering::ArgListEntry Entry; + Entry.Ty = PointerType::getUnqual(*DAG.getContext()); + Symbol = DAG.getExternalSymbol("__arm_sc_memmove", PointerVT); + Entry.Node = Src; + Args.push_back(Entry); + break; + } + case RTLIB::MEMSET: { + TargetLowering::ArgListEntry Entry; + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Symbol = DAG.getExternalSymbol("__arm_sc_memset", PointerVT); + Src = DAG.getZExtOrTrunc(Src, DL, MVT::i32); + Entry.Node = Src; + Args.push_back(Entry); + break; + } + default: + return SDValue(); + } + + TargetLowering::ArgListEntry SizeEntry; + SizeEntry.Node = Size; + SizeEntry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Args.push_back(SizeEntry); + assert(Symbol->getOpcode() == ISD::ExternalSymbol && + "Function name is not set"); + + TargetLowering::CallLoweringInfo CLI(DAG); + PointerType *RetTy = PointerType::getUnqual(*DAG.getContext()); + CLI.setDebugLoc(DL).setChain(Chain).setLibCallee( + TLI->getLibcallCallingConv(LC), RetTy, Symbol, std::move(Args)); + return TLI->LowerCallTo(CLI).second; +} + SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { const AArch64Subtarget &STI = DAG.getMachineFunction().getSubtarget(); + if (STI.hasMOPS()) return EmitMOPS(AArch64ISD::MOPS_MEMCOPY, DAG, DL, Chain, Dst, Src, Size, Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); + + SMEAttrs Attrs(DAG.getMachineFunction().getFunction()); + if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody()) + return EmitStreamingCompatibleMemLibCall(DAG, DL, Chain, Dst, Src, Size, + RTLIB::MEMCPY); return SDValue(); } @@ -95,10 +165,14 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( const AArch64Subtarget &STI = DAG.getMachineFunction().getSubtarget(); - if (STI.hasMOPS()) { + if (STI.hasMOPS()) return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size, Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{}); - } + + SMEAttrs Attrs(DAG.getMachineFunction().getFunction()); + if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody()) + return EmitStreamingCompatibleMemLibCall(DAG, dl, Chain, Dst, Src, Size, + RTLIB::MEMSET); return SDValue(); } @@ -108,10 +182,15 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemmove( MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { const AArch64Subtarget &STI = DAG.getMachineFunction().getSubtarget(); - if (STI.hasMOPS()) { + + if (STI.hasMOPS()) return EmitMOPS(AArch64ISD::MOPS_MEMMOVE, DAG, dl, Chain, Dst, Src, Size, Alignment, isVolatile, DstPtrInfo, SrcPtrInfo); - } + + SMEAttrs Attrs(DAG.getMachineFunction().getFunction()); + if (LowerToSMERoutines && !Attrs.hasNonStreamingInterfaceAndBody()) + return EmitStreamingCompatibleMemLibCall(DAG, dl, Chain, Dst, Src, Size, + RTLIB::MEMMOVE); return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 73f93724d6fc7..514de44778630 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -47,6 +47,11 @@ class AArch64SelectionDAGInfo : public SelectionDAGTargetInfo { SDValue Chain, SDValue Op1, SDValue Op2, MachinePointerInfo DstPtrInfo, bool ZeroData) const override; + + SDValue EmitStreamingCompatibleMemLibCall(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + RTLIB::Libcall LC) const; }; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ee7137b92445b..20150d7386753 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3815,18 +3815,29 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) { return LegalizationCost * LT.first; } -InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, - VectorType *Tp, - ArrayRef Mask, - TTI::TargetCostKind CostKind, - int Index, VectorType *SubTp, - ArrayRef Args) { +InstructionCost AArch64TTIImpl::getShuffleCost( + TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, + TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, + ArrayRef Args, const Instruction *CxtI) { std::pair LT = getTypeLegalizationCost(Tp); + // If we have a Mask, and the LT is being legalized somehow, split the Mask // into smaller vectors and sum the cost of each shuffle. if (!Mask.empty() && isa(Tp) && LT.second.isVector() && Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() && Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) { + + // Check for ST3/ST4 instructions, which are represented in llvm IR as + // store(interleaving-shuffle). The shuffle cost could potentially be free, + // but we model it with a cost of LT.first so that LD3/LD3 have a higher + // cost than just the store. + if (CxtI && CxtI->hasOneUse() && isa(*CxtI->user_begin()) && + (ShuffleVectorInst::isInterleaveMask( + Mask, 4, Tp->getElementCount().getKnownMinValue() * 2) || + ShuffleVectorInst::isInterleaveMask( + Mask, 3, Tp->getElementCount().getKnownMinValue() * 2))) + return LT.first; + unsigned TpNumElts = Mask.size(); unsigned LTNumElts = LT.second.getVectorNumElements(); unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts; @@ -3874,7 +3885,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, if (NumSources <= 2) Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, - NTp, NMask, CostKind, 0, nullptr, Args); + NTp, NMask, CostKind, 0, nullptr, Args, CxtI); else if (any_of(enumerate(NMask), [&](const auto &ME) { return ME.value() % LTNumElts == ME.index(); })) @@ -3913,6 +3924,14 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, all_of(Mask, [](int E) { return E < 8; })) return getPerfectShuffleCost(Mask); + // Check for identity masks, which we can treat as free. + if (!Mask.empty() && LT.second.isFixedLengthVector() && + (Kind == TTI::SK_PermuteTwoSrc || Kind == TTI::SK_PermuteSingleSrc) && + all_of(enumerate(Mask), [](const auto &M) { + return M.value() < 0 || M.value() == (int)M.index(); + })) + return 0; + if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc || Kind == TTI::SK_Reverse || Kind == TTI::SK_Splice) { @@ -4055,7 +4074,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, // Restore optimal kind. if (IsExtractSubvector) Kind = TTI::SK_ExtractSubvector; - return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp); + return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args, + CxtI); } static bool containsDecreasingPointers(Loop *TheLoop, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index de39dea2be43e..dba384481f6a3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -393,7 +393,8 @@ class AArch64TTIImpl : public BasicTTIImplBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = std::nullopt); + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr); InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 21643ebb41384..a3b966aa61550 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -276,6 +276,8 @@ class AArch64AsmParser : public MCTargetAsmParser { ParseStatus tryParseSVEDataVector(OperandVector &Operands); template ParseStatus tryParseSVEPredicateVector(OperandVector &Operands); + ParseStatus + tryParseSVEPredicateOrPredicateAsCounterVector(OperandVector &Operands); template ParseStatus tryParseVectorList(OperandVector &Operands, bool ExpectMatch = false); @@ -1241,6 +1243,7 @@ class AArch64Operand : public MCParsedAsmOperand { case AArch64::PPR_p8to15RegClassID: case AArch64::PNRRegClassID: case AArch64::PNR_p8to15RegClassID: + case AArch64::PPRorPNRRegClassID: RK = RegKind::SVEPredicateAsCounter; break; default: @@ -1264,6 +1267,7 @@ class AArch64Operand : public MCParsedAsmOperand { case AArch64::PPR_p8to15RegClassID: case AArch64::PNRRegClassID: case AArch64::PNR_p8to15RegClassID: + case AArch64::PPRorPNRRegClassID: RK = RegKind::SVEPredicateVector; break; default: @@ -1290,6 +1294,20 @@ class AArch64Operand : public MCParsedAsmOperand { return DiagnosticPredicateTy::NearMatch; } + template + DiagnosticPredicate isSVEPredicateOrPredicateAsCounterRegOfWidth() const { + if (Kind != k_Register || (Reg.Kind != RegKind::SVEPredicateAsCounter && + Reg.Kind != RegKind::SVEPredicateVector)) + return DiagnosticPredicateTy::NoMatch; + + if ((isSVEPredicateAsCounterReg() || + isSVEPredicateVectorRegOfWidth()) && + Reg.ElementWidth == ElementWidth) + return DiagnosticPredicateTy::Match; + + return DiagnosticPredicateTy::NearMatch; + } + template DiagnosticPredicate isSVEPredicateAsCounterRegOfWidth() const { if (Kind != k_Register || Reg.Kind != RegKind::SVEPredicateAsCounter) @@ -1770,6 +1788,15 @@ class AArch64Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createReg(AArch64::Z0 + getReg() - Base)); } + void addPPRorPNRRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + unsigned Reg = getReg(); + // Normalise to PPR + if (Reg >= AArch64::PN0 && Reg <= AArch64::PN15) + Reg = Reg - AArch64::PN0 + AArch64::P0; + Inst.addOperand(MCOperand::createReg(Reg)); + } + void addPNRasPPRRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand( @@ -4167,6 +4194,15 @@ ParseStatus AArch64AsmParser::tryParseVectorRegister(MCRegister &Reg, return ParseStatus::NoMatch; } +ParseStatus AArch64AsmParser::tryParseSVEPredicateOrPredicateAsCounterVector( + OperandVector &Operands) { + ParseStatus Status = + tryParseSVEPredicateVector(Operands); + if (!Status.isSuccess()) + Status = tryParseSVEPredicateVector(Operands); + return Status; +} + /// tryParseSVEPredicateVector - Parse a SVE predicate register operand. template ParseStatus @@ -6019,6 +6055,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, return Error(Loc, "Invalid restricted vector register, expected z0.d..z15.d"); case Match_InvalidSVEPattern: return Error(Loc, "invalid predicate pattern"); + case Match_InvalidSVEPPRorPNRAnyReg: + case Match_InvalidSVEPPRorPNRBReg: case Match_InvalidSVEPredicateAnyReg: case Match_InvalidSVEPredicateBReg: case Match_InvalidSVEPredicateHReg: @@ -6131,9 +6169,6 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, case Match_AddSubLSLImm3ShiftLarge: return Error(Loc, "expected 'lsl' with optional integer in range [0, 7]"); - case Match_InvalidSVEPNRasPPRPredicateBReg: - return Error(Loc, - "Expected predicate-as-counter register name with .B suffix"); default: llvm_unreachable("unexpected error code!"); } @@ -6653,6 +6688,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidZPR_4b16: case Match_InvalidZPR_4b32: case Match_InvalidZPR_4b64: + case Match_InvalidSVEPPRorPNRAnyReg: + case Match_InvalidSVEPPRorPNRBReg: case Match_InvalidSVEPredicateAnyReg: case Match_InvalidSVEPattern: case Match_InvalidSVEVecLenSpecifier: @@ -6714,7 +6751,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidSVEVectorListStrided4x16: case Match_InvalidSVEVectorListStrided4x32: case Match_InvalidSVEVectorListStrided4x64: - case Match_InvalidSVEPNRasPPRPredicateBReg: case Match_MSR: case Match_MRS: { if (ErrorInfo >= Operands.size()) diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index a21b4b77166ed..ddb875e73ff5a 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -143,6 +143,9 @@ DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask, static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const MCDisassembler *Decoder); +static DecodeStatus DecodePPRorPNRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const MCDisassembler *Decoder); static DecodeStatus DecodePNRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const MCDisassembler *Decoder); @@ -741,6 +744,18 @@ static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo, return Success; } +static DecodeStatus DecodePPRorPNRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const MCDisassembler *Decoder) { + if (RegNo > 15) + return Fail; + + unsigned Register = + AArch64MCRegisterClasses[AArch64::PPRorPNRRegClassID].getRegister(RegNo); + Inst.addOperand(MCOperand::createReg(Register)); + return Success; +} + static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index a8f2c45279e61..61f5bc2464ee5 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -191,7 +191,6 @@ class AArch64InstructionSelector : public InstructionSelector { MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &MIRBuilder); - bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, MachineRegisterInfo &MRI); /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a @@ -3498,8 +3497,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return selectShuffleVector(I, MRI); case TargetOpcode::G_EXTRACT_VECTOR_ELT: return selectExtractElt(I, MRI); - case TargetOpcode::G_INSERT_VECTOR_ELT: - return selectInsertElt(I, MRI); case TargetOpcode::G_CONCAT_VECTORS: return selectConcatVectors(I, MRI); case TargetOpcode::G_JUMP_TABLE: @@ -5330,65 +5327,6 @@ bool AArch64InstructionSelector::selectUSMovFromExtend( return true; } -bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, - MachineRegisterInfo &MRI) { - assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); - - // Get information on the destination. - Register DstReg = I.getOperand(0).getReg(); - const LLT DstTy = MRI.getType(DstReg); - unsigned VecSize = DstTy.getSizeInBits(); - - // Get information on the element we want to insert into the destination. - Register EltReg = I.getOperand(2).getReg(); - const LLT EltTy = MRI.getType(EltReg); - unsigned EltSize = EltTy.getSizeInBits(); - if (EltSize < 8 || EltSize > 64) - return false; - - // Find the definition of the index. Bail out if it's not defined by a - // G_CONSTANT. - Register IdxReg = I.getOperand(3).getReg(); - auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI); - if (!VRegAndVal) - return false; - unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); - - // Perform the lane insert. - Register SrcReg = I.getOperand(1).getReg(); - const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); - - if (VecSize < 128) { - // If the vector we're inserting into is smaller than 128 bits, widen it - // to 128 to do the insert. - MachineInstr *ScalarToVec = - emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB); - if (!ScalarToVec) - return false; - SrcReg = ScalarToVec->getOperand(0).getReg(); - } - - // Create an insert into a new FPR128 register. - // Note that if our vector is already 128 bits, we end up emitting an extra - // register. - MachineInstr *InsMI = - emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB); - - if (VecSize < 128) { - // If we had to widen to perform the insert, then we have to demote back to - // the original size to get the result we want. - if (!emitNarrowVector(DstReg, InsMI->getOperand(0).getReg(), MIB, MRI)) - return false; - } else { - // No widening needed. - InsMI->getOperand(0).setReg(DstReg); - constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); - } - - I.eraseFromParent(); - return true; -} - MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8( Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) { unsigned int Op; @@ -6907,10 +6845,8 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( MI.getParent()->getParent()->getFunction().hasOptSize()) return true; - // It's better to avoid folding and recomputing shifts when we don't have a - // fastpath. - if (!STI.hasAddrLSLFast()) - return false; + // FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as + // appropriate. // We have a fastpath, so folding a shift in and potentially computing it // many times may be beneficial. Check if this is only used in memory ops. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 043f142f3099b..96ded69905f7c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -886,9 +886,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampMaxNumElements(1, p0, 2); getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) - .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64})) + .legalIf( + typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64, v2p0})) .moreElementsToNextPow2(0) - .widenVectorEltsToVectorMinSize(0, 64); + .widenVectorEltsToVectorMinSize(0, 64) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) + .clampNumElements(0, v2s32, v4s32) + .clampMaxNumElements(0, s64, 2) + .clampMaxNumElements(0, p0, 2); getActionDefinitionsBuilder(G_BUILD_VECTOR) .legalFor({{v8s8, s8}, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 3cee2de4f5df8..b571f56bf9e17 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -475,6 +476,55 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { MI.eraseFromParent(); } +bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) { + assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); + + auto ValAndVReg = + getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI); + return !ValAndVReg; +} + +void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &Builder) { + auto &Insert = cast(MI); + Builder.setInstrAndDebugLoc(Insert); + + Register Offset = Insert.getIndexReg(); + LLT VecTy = MRI.getType(Insert.getReg(0)); + LLT EltTy = MRI.getType(Insert.getElementReg()); + LLT IdxTy = MRI.getType(Insert.getIndexReg()); + + // Create a stack slot and store the vector into it + MachineFunction &MF = Builder.getMF(); + Align Alignment( + std::min(VecTy.getSizeInBytes().getKnownMinValue(), 16)); + int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(), + Alignment, false); + LLT FramePtrTy = LLT::pointer(0, 64); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); + auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx); + + Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8)); + + // Get the pointer to the element, and be sure not to hit undefined behavior + // if the index is out of bounds. + assert(isPowerOf2_64(VecTy.getNumElements()) && + "Expected a power-2 vector size"); + auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1); + Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0); + auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes()); + Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0); + Register EltPtr = + Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul) + .getReg(0); + + // Write the inserted element + Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1)); + // Reload the whole vector. + Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8)); + Insert.eraseFromParent(); +} + /// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a /// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair. /// diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 0fc4d7f199106..58d000b6b2a9e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/LowLevelTypeUtils.h" #include "llvm/CodeGen/MachineFunction.h" @@ -398,7 +399,10 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings( void AArch64RegisterBankInfo::applyMappingImpl( MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const { - switch (OpdMapper.getMI().getOpcode()) { + MachineInstr &MI = OpdMapper.getMI(); + MachineRegisterInfo &MRI = OpdMapper.getMRI(); + + switch (MI.getOpcode()) { case TargetOpcode::G_OR: case TargetOpcode::G_BITCAST: case TargetOpcode::G_LOAD: @@ -407,6 +411,14 @@ void AArch64RegisterBankInfo::applyMappingImpl( OpdMapper.getInstrMapping().getID() <= 4) && "Don't know how to handle that ID"); return applyDefaultMapping(OpdMapper); + case TargetOpcode::G_INSERT_VECTOR_ELT: { + // Extend smaller gpr operands to 32 bit. + Builder.setInsertPt(*MI.getParent(), MI.getIterator()); + auto Ext = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(2).getReg()); + MRI.setRegBank(Ext.getReg(0), getRegBank(AArch64::GPRRegBankID)); + MI.getOperand(2).setReg(Ext.getReg(0)); + return applyDefaultMapping(OpdMapper); + } default: llvm_unreachable("Don't know how to handle that operation"); } @@ -752,6 +764,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } unsigned NumOperands = MI.getNumOperands(); + unsigned MappingID = DefaultMappingID; // Track the size and bank of each register. We don't do partial mappings. SmallVector OpSize(NumOperands); @@ -1002,8 +1015,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // The element may be either a GPR or FPR. Preserve that behaviour. if (getRegBank(MI.getOperand(2).getReg(), MRI, TRI) == &AArch64::FPRRegBank) OpRegBankIdx[2] = PMI_FirstFPR; - else + else { + // If the type is i8/i16, and the regank will be GPR, then we change the + // type to i32 in applyMappingImpl. + LLT Ty = MRI.getType(MI.getOperand(2).getReg()); + if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) + MappingID = 1; OpRegBankIdx[2] = PMI_FirstGPR; + } // Index needs to be a GPR. OpRegBankIdx[3] = PMI_FirstGPR; @@ -1124,6 +1143,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } } - return getInstructionMapping(DefaultMappingID, Cost, - getOperandsMapping(OpdsMapping), NumOperands); + return getInstructionMapping(MappingID, Cost, getOperandsMapping(OpdsMapping), + NumOperands); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index e1d6dd7a056bc..dc5383ce941ed 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -58,8 +58,17 @@ void AArch64TargetStreamer::finish() { emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI); } -void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { - if (Flags == 0) +void AArch64TargetStreamer::emitNoteSection(unsigned Flags, + uint64_t PAuthABIPlatform, + uint64_t PAuthABIVersion) { + assert((PAuthABIPlatform == uint64_t(-1)) == + (PAuthABIVersion == uint64_t(-1))); + uint64_t DescSz = 0; + if (Flags != 0) + DescSz += 4 * 4; + if (PAuthABIPlatform != uint64_t(-1)) + DescSz += 4 + 4 + 8 * 2; + if (DescSz == 0) return; MCStreamer &OutStreamer = getStreamer(); @@ -80,15 +89,25 @@ void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { // Emit the note header. OutStreamer.emitValueToAlignment(Align(8)); OutStreamer.emitIntValue(4, 4); // data size for "GNU\0" - OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size + OutStreamer.emitIntValue(DescSz, 4); // Elf_Prop array size OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); OutStreamer.emitBytes(StringRef("GNU", 4)); // note name // Emit the PAC/BTI properties. - OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4); - OutStreamer.emitIntValue(4, 4); // data size - OutStreamer.emitIntValue(Flags, 4); // data - OutStreamer.emitIntValue(0, 4); // pad + if (Flags != 0) { + OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4); + OutStreamer.emitIntValue(4, 4); // data size + OutStreamer.emitIntValue(Flags, 4); // data + OutStreamer.emitIntValue(0, 4); // pad + } + + // Emit the PAuth ABI compatibility info + if (PAuthABIPlatform != uint64_t(-1)) { + OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_PAUTH, 4); + OutStreamer.emitIntValue(8 * 2, 4); // data size + OutStreamer.emitIntValue(PAuthABIPlatform, 8); + OutStreamer.emitIntValue(PAuthABIVersion, 8); + } OutStreamer.endSection(Nt); OutStreamer.switchSection(Cur); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index 7676d88a82b5c..e8a9dc445b96b 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -35,7 +35,8 @@ class AArch64TargetStreamer : public MCTargetStreamer { void emitCurrentConstantPool(); /// Callback used to implement the .note.gnu.property section. - void emitNoteSection(unsigned Flags); + void emitNoteSection(unsigned Flags, uint64_t PAuthABIPlatform = -1, + uint64_t PAuthABIVersion = -1); /// Callback used to implement the .inst directive. virtual void emitInst(uint32_t Inst); diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 44d9a8ac7cb67..3363aab4b093c 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1301,7 +1301,7 @@ multiclass sve2_clamp { } class sve2_int_perm_sel_p - : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm, + : I<(outs PPRorPNRAny:$Pd), (ins PPRorPNRAny:$Pn, ppr_ty:$Pm, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm), asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>, Sched<[]> { @@ -1345,19 +1345,6 @@ multiclass sve2_int_perm_sel_p { let Inst{20-18} = 0b000; } - def : InstAlias(NAME # _B) PNRasPPRAny:$Pd, - PNRasPPRAny:$Pn, PPR8:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm), 0>; - def : InstAlias(NAME # _H) PNRasPPRAny:$Pd, - PNRasPPRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>; - def : InstAlias(NAME # _S) PNRasPPRAny:$Pd, - PNRasPPRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>; - def : InstAlias(NAME # _D) PNRasPPRAny:$Pd, - PNRasPPRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>; - def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm), MatrixIndexGPR32Op12_15:$idx)), (!cast(NAME # _B) $Pn, $Pm, $idx, 0)>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1e76d58669da1..fb0c6188edb34 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -477,12 +477,6 @@ class SVE_2_Op_Pred_All_Active_Pt; -class SVE_2_Op_Pred_Any_Predicate -: Pat<(vtd (op (pt (SVEAnyPredicate)), vt1:$Op1, vt2:$Op2)), - (inst $Op1, $Op2)>; - class SVE_3_Op_Pat : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), @@ -550,11 +544,6 @@ class SVE_3_Op_Pat_Shift_Imm_SelZero -: Pat<(vtd (op vt1:$Op1)), - (inst (IMPLICIT_DEF), (ptrue 31), $Op1)>; - class SVE_2_Op_AllActive_Pat : Pat<(vtd (op vt1:$Op1, vt2:$Op2)), @@ -740,7 +729,7 @@ let hasNoSchedulingInfo = 1 in { //===----------------------------------------------------------------------===// class sve_int_pfalse opc, string asm> -: I<(outs PPR8:$Pd), (ins), +: I<(outs PPRorPNR8:$Pd), (ins), asm, "\t$Pd", "", []>, Sched<[]> { @@ -1848,7 +1837,7 @@ multiclass sve_int_sel_vvv { //===----------------------------------------------------------------------===// class sve_int_pred_log opc, string asm> -: I<(outs PPR8:$Pd), (ins PPRAny:$Pg, PPR8:$Pn, PPR8:$Pm), +: I<(outs PPRorPNR8:$Pd), (ins PPRorPNRAny:$Pg, PPRorPNR8:$Pn, PPRorPNR8:$Pm), asm, "\t$Pd, $Pg/z, $Pn, $Pm", "", []>, Sched<[]> { @@ -2278,8 +2267,7 @@ class sve_fp_3op_u_zd sz, bits<3> opc, string asm, ZPRRegOp zprty> let mayRaiseFPException = 1; } -multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op, - SDPatternOperator predicated_op = null_frag> { +multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op> { def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>; def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>; def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>; @@ -2287,18 +2275,12 @@ multiclass sve_fp_3op_u_zd opc, string asm, SDPatternOperator op, def : SVE_2_Op_Pat(NAME # _H)>; def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pred_All_Active(NAME # _H)>; - def : SVE_2_Op_Pred_All_Active(NAME # _S)>; - def : SVE_2_Op_Pred_All_Active(NAME # _D)>; } -multiclass sve2p1_bf_3op_u_zd opc1, string asm, SDPatternOperator op, - SDPatternOperator predicated_op = null_frag> { - def NAME : sve_fp_3op_u_zd<0b00, opc1, asm, ZPR16>; - def : SVE_2_Op_Pat(NAME)>; +multiclass sve2p1_bf_3op_u_zd opc, string asm, SDPatternOperator op> { + def NAME : sve_fp_3op_u_zd<0b00, opc, asm, ZPR16>; - def : SVE_2_Op_Pred_All_Active(NAME)>; + def : SVE_2_Op_Pat(NAME)>; } multiclass sve_fp_3op_u_zd_ftsmul opc, string asm, SDPatternOperator op> { @@ -3684,8 +3666,7 @@ class sve2_int_mul sz, bits<3> opc, string asm, ZPRRegOp zprty> let hasSideEffects = 0; } -multiclass sve2_int_mul opc, string asm, SDPatternOperator op, - SDPatternOperator op_pred = null_frag> { +multiclass sve2_int_mul opc, string asm, SDPatternOperator op> { def _B : sve2_int_mul<0b00, opc, asm, ZPR8>; def _H : sve2_int_mul<0b01, opc, asm, ZPR16>; def _S : sve2_int_mul<0b10, opc, asm, ZPR32>; @@ -3695,11 +3676,6 @@ multiclass sve2_int_mul opc, string asm, SDPatternOperator op, def : SVE_2_Op_Pat(NAME # _H)>; def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _D)>; - - def : SVE_2_Op_Pred_Any_Predicate(NAME # _B)>; - def : SVE_2_Op_Pred_Any_Predicate(NAME # _H)>; - def : SVE_2_Op_Pred_Any_Predicate(NAME # _S)>; - def : SVE_2_Op_Pred_Any_Predicate(NAME # _D)>; } multiclass sve2_int_mul_single opc, string asm, SDPatternOperator op> { @@ -4934,7 +4910,6 @@ multiclass sve2_int_bitwise_ternary_op opc, string asm, SDPatternOperato def : SVE_3_Op_Pat(NAME)>; def : SVE_3_Op_Pat(NAME)>; - def : SVE_3_Op_BSP_Pat(NAME)>; def : SVE_3_Op_BSP_Pat(NAME)>; def : SVE_3_Op_BSP_Pat(NAME)>; @@ -6689,7 +6664,7 @@ multiclass sve_mem_z_spill { } class sve_mem_p_spill -: I<(outs), (ins PPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), +: I<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), asm, "\t$Pt, [$Rn, $imm9, mul vl]", "", []>, Sched<[]> { @@ -6712,7 +6687,7 @@ multiclass sve_mem_p_spill { def NAME : sve_mem_p_spill; def : InstAlias(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; + (!cast(NAME) PPRorPNRAny:$Pt, GPR64sp:$Rn, 0), 1>; } //===----------------------------------------------------------------------===// @@ -7858,7 +7833,7 @@ multiclass sve_mem_z_fill { } class sve_mem_p_fill -: I<(outs PPRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), +: I<(outs PPRorPNRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), asm, "\t$Pt, [$Rn, $imm9, mul vl]", "", []>, Sched<[]> { @@ -7881,7 +7856,7 @@ multiclass sve_mem_p_fill { def NAME : sve_mem_p_fill; def : InstAlias(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>; + (!cast(NAME) PPRorPNRAny:$Pt, GPR64sp:$Rn, 0), 1>; } class sve2_mem_gldnt_vs_base opc, dag iops, string asm, diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp index d399e0ac0794f..015ca4cb92b25 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp @@ -53,6 +53,9 @@ SMEAttrs::SMEAttrs(StringRef FuncName) : Bitmask(0) { if (FuncName == "__arm_tpidr2_restore") Bitmask |= SMEAttrs::SM_Compatible | encodeZAState(StateValue::In) | SMEAttrs::SME_ABI_Routine; + if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" || + FuncName == "__arm_sc_memmove" || FuncName == "__arm_sc_memchr") + Bitmask |= SMEAttrs::SM_Compatible; } SMEAttrs::SMEAttrs(const AttributeList &Attrs) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 9083150b33848..1114a8c40114e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1086,7 +1086,7 @@ void SplitPtrStructs::processConditionals() { if (MaybeRsrc) for (Value *V : Seen) FoundRsrcs[cast(V)] = NewRsrc; - } else if (auto *SI = dyn_cast(I)) { + } else if (isa(I)) { if (MaybeRsrc) { ConditionalTemps.push_back(cast(Rsrc)); Rsrc->replaceAllUsesWith(*MaybeRsrc); @@ -1777,8 +1777,8 @@ void SplitPtrStructs::processFunction(Function &F) { Originals.push_back(&I); for (Instruction *I : Originals) { auto [Rsrc, Off] = visit(I); - assert((Rsrc && Off) || - (!Rsrc && !Off) && "Can't have a resource but no offset"); + assert(((Rsrc && Off) || (!Rsrc && !Off)) && + "Can't have a resource but no offset"); if (Rsrc) RsrcParts[I] = Rsrc; if (Off) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index fa77b94fc22de..8f0eae362ecae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -642,6 +642,17 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.ShouldTrackLaneMasks = true; } +void GCNSubtarget::mirFileLoaded(MachineFunction &MF) const { + if (isWave32()) { + // Fix implicit $vcc operands after MIParser has verified that they match + // the instruction definitions. + for (auto &MBB : MF) { + for (auto &MI : MBB) + InstrInfo.fixImplicitOperands(MI); + } + } +} + bool GCNSubtarget::hasMadF16() const { return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 31077dbc0b2cc..84320d296a037 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1127,7 +1127,8 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *VT, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args, + const Instruction *CxtI) { Kind = improveShuffleKindFromMask(Kind, Mask, VT, Index, SubTp); // Treat extractsubvector as single op permutation. bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index cd8e9fd10bbf2..0dab3a9827794 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -234,7 +234,8 @@ class GCNTTIImpl final : public BasicTTIImplBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = std::nullopt); + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4da10beabe316..e24a18a2842f6 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -923,6 +923,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override; + void mirFileLoaded(MachineFunction &MF) const override; + unsigned getMaxNumUserSGPRs() const { return AMDGPU::getMaxNumUserSGPRs(*this); } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 3be894ad3bef2..ee87f7f0e555e 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1212,7 +1212,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args, + const Instruction *CxtI) { Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); // Treat extractsubvector as single op permutation. bool IsExtractSubvector = Kind == TTI::SK_ExtractSubvector; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index bb4b321b53009..04b32194f806f 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -220,7 +220,8 @@ class ARMTTIImpl : public BasicTTIImplBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = std::nullopt); + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr); bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp index 85eabdb17ad19..5b0b799880a35 100644 --- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -12,6 +12,8 @@ #include "Thumb1InstrInfo.h" #include "ARMSubtarget.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -47,24 +49,57 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && "Thumb1 can only copy GPR registers"); - if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) - || !ARM::tGPRRegClass.contains(DestReg)) + if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) || + !ARM::tGPRRegClass.contains(DestReg)) BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) .add(predOps(ARMCC::AL)); else { - // FIXME: Can also use 'mov hi, $src; mov $dst, hi', - // with hi as either r10 or r11. - const TargetRegisterInfo *RegInfo = st.getRegisterInfo(); - if (MBB.computeRegisterLiveness(RegInfo, ARM::CPSR, I) - == MachineBasicBlock::LQR_Dead) { + LiveRegUnits UsedRegs(*RegInfo); + UsedRegs.addLiveOuts(MBB); + + auto InstUpToI = MBB.end(); + while (InstUpToI != I) + // The pre-decrement is on purpose here. + // We want to have the liveness right before I. + UsedRegs.stepBackward(*--InstUpToI); + + if (UsedRegs.available(ARM::CPSR)) { BuildMI(MBB, I, DL, get(ARM::tMOVSr), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)) ->addRegisterDead(ARM::CPSR, RegInfo); return; } + // Use high register to move source to destination + // if movs is not an option. + BitVector Allocatable = RegInfo->getAllocatableSet( + MF, RegInfo->getRegClass(ARM::hGPRRegClassID)); + + Register TmpReg = ARM::NoRegister; + // Prefer R12 as it is known to not be preserved anyway + if (UsedRegs.available(ARM::R12) && Allocatable.test(ARM::R12)) { + TmpReg = ARM::R12; + } else { + for (Register Reg : Allocatable.set_bits()) { + if (UsedRegs.available(Reg)) { + TmpReg = Reg; + break; + } + } + } + + if (TmpReg) { + BuildMI(MBB, I, DL, get(ARM::tMOVr), TmpReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .add(predOps(ARMCC::AL)); + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(TmpReg, getKillRegState(true)) + .add(predOps(ARMCC::AL)); + return; + } + // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it BuildMI(MBB, I, DL, get(ARM::tPUSH)) .add(predOps(ARMCC::AL)) diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index fc2834cb0b45c..083f25f49dec4 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -286,25 +286,6 @@ MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI, return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } -bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const { - // BTI clearing instructions shall not take part in scheduling regions as - // they must stay in their intended place. Although PAC isn't BTI clearing, - // it can be transformed into PACBTI after the pre-RA Machine Scheduling - // has taken place, so its movement must also be restricted. - switch (MI.getOpcode()) { - case ARM::t2BTI: - case ARM::t2PAC: - case ARM::t2PACBTI: - case ARM::t2SG: - return true; - default: - break; - } - return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF); -} - void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 8915da8c5bf3c..4bb412f09dcbe 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -68,10 +68,6 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo { unsigned OpIdx1, unsigned OpIdx2) const override; - bool isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const override; - private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 458b8717256f2..f47fcff5d6025 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -230,7 +230,8 @@ InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, - ArrayRef Args) { + ArrayRef Args, + const Instruction *CxtI) { return 1; } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h index fdb34f308e641..9689f2f5bb865 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -122,7 +122,8 @@ class HexagonTTIImpl : public BasicTTIImplBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, - ArrayRef Args = std::nullopt); + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr); InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index dc2d61a6e4740..2993726d2b649 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -206,22 +206,19 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, if (StackSize == 0 && !MFI.adjustsStack()) return; - uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF, true); - uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount; + uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF); // Split the SP adjustment to reduce the offsets of callee saved spill. if (FirstSPAdjustAmount) StackSize = FirstSPAdjustAmount; // Adjust stack. adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); - if (FirstSPAdjustAmount != 2048 || SecondSPAdjustAmount == 0) { - // Emit ".cfi_def_cfa_offset StackSize". - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlag(MachineInstr::FrameSetup); - } + // Emit ".cfi_def_cfa_offset StackSize". + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); const auto &CSI = MFI.getCalleeSavedInfo(); @@ -258,25 +255,14 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, } // Emit the second SP adjustment after saving callee saved registers. - if (FirstSPAdjustAmount && SecondSPAdjustAmount) { - if (hasFP(MF)) { - assert(SecondSPAdjustAmount > 0 && - "SecondSPAdjustAmount should be greater than zero"); - adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, - MachineInstr::FrameSetup); - } else { - // FIXME: RegScavenger will place the spill instruction before the - // prologue if a VReg is created in the prologue. This will pollute the - // caller's stack data. Therefore, until there is better way, we just use - // the `addi.w/d` instruction for stack adjustment to ensure that VReg - // will not be created. - for (int Val = SecondSPAdjustAmount; Val > 0; Val -= 2048) - BuildMI(MBB, MBBI, DL, - TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), SPReg) - .addReg(SPReg) - .addImm(Val < 2048 ? -Val : -2048) - .setMIFlag(MachineInstr::FrameSetup); + if (FirstSPAdjustAmount) { + uint64_t SecondSPAdjustAmount = RealStackSize - FirstSPAdjustAmount; + assert(SecondSPAdjustAmount > 0 && + "SecondSPAdjustAmount should be greater than zero"); + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount, + MachineInstr::FrameSetup); + if (!hasFP(MF)) { // If we are using a frame-pointer, and thus emitted ".cfi_def_cfa fp, 0", // don't emit an sp-based .cfi_def_cfa_offset // Emit ".cfi_def_cfa_offset RealStackSize" @@ -369,27 +355,20 @@ void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, // st.d $ra, $sp, 2024 // st.d $fp, $sp, 2016 // addi.d $sp, $sp, -16 -uint64_t -LoongArchFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF, - bool IsPrologue) const { +uint64_t LoongArchFrameLowering::getFirstSPAdjustAmount( + const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const std::vector &CSI = MFI.getCalleeSavedInfo(); // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed // 12-bit and there exists a callee-saved register needing to be pushed. - if (!isInt<12>(MFI.getStackSize())) { + if (!isInt<12>(MFI.getStackSize()) && (CSI.size() > 0)) { // FirstSPAdjustAmount is chosen as (2048 - StackAlign) because 2048 will // cause sp = sp + 2048 in the epilogue to be split into multiple // instructions. Offsets smaller than 2048 can fit in a single load/store // instruction, and we have to stick with the stack alignment. // So (2048 - StackAlign) will satisfy the stack alignment. - // - // FIXME: This place may seem odd. When using multiple ADDI instructions to - // adjust the stack in Prologue, and there are no callee-saved registers, we - // can take advantage of the logic of split sp ajustment to reduce code - // changes. - return CSI.size() > 0 ? 2048 - getStackAlign().value() - : (IsPrologue ? 2048 : 0); + return 2048 - getStackAlign().value(); } return 0; } diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h index 57d2565c32c09..bc2ac02c91f81 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -52,8 +52,7 @@ class LoongArchFrameLowering : public TargetFrameLowering { bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; - uint64_t getFirstSPAdjustAmount(const MachineFunction &MF, - bool IsPrologue = false) const; + uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const; bool enableShrinkWrapping(const MachineFunction &MF) const override; diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td index 3532e56e74170..e2d4e49ddf27b 100644 --- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td +++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td @@ -15,8 +15,8 @@ /// ADD [~] ADDA [~] ADDI [~] ADDQ [ ] ADDX [~] /// CLR [ ] CMP [~] CMPA [~] CMPI [~] CMPM [ ] /// CMP2 [ ] DIVS/DIVU [~] DIVSL/DIVUL [ ] EXT [~] EXTB [ ] -/// MULS/MULU [~] NEG [~] NEGX [~] SUB [~] SUBA [~] -/// SUBI [~] SUBQ [ ] SUBX [~] +/// MULS/MULU [~] NEG [~] NEGX [~] NOT [~] SUB [~] +/// SUBA [~] SUBI [~] SUBQ [ ] SUBX [~] /// /// Map: /// @@ -769,7 +769,7 @@ def : Pat<(mulhu i16:$dst, Mxi16immSExt16:$opd), //===----------------------------------------------------------------------===// -// NEG/NEGX +// NEG/NEGX/NOT //===----------------------------------------------------------------------===// /// ------------+------------+------+---------+--------- @@ -809,12 +809,26 @@ class MxNegX_D } } +class MxNot_D + : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src), + "not."#TYPE.Prefix#"\t$dst", + [(set TYPE.VT:$dst, (not TYPE.VT:$src))]> { + let Inst = (descend 0b01000110, + /*SIZE*/!cast("MxEncSize"#TYPE.Size).Value, + //MODE without last bit + 0b00, + //REGISTER prefixed by D/A bit + (operand "$dst", 4) + ); +} + } // let Constraints } // let Defs = [CCR] foreach S = [8, 16, 32] in { def NEG#S#d : MxNeg_D("MxType"#S#"d")>; def NEGX#S#d : MxNegX_D("MxType"#S#"d")>; + def NOT#S#d : MxNot_D("MxType"#S#"d")>; } def : Pat<(MxSub 0, i8 :$src), (NEG8d MxDRD8 :$src)>; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 57e1019adb741..3fa35efc2d159 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -607,7 +607,8 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, - ArrayRef Args) { + ArrayRef Args, + const Instruction *CxtI) { InstructionCost CostFactor = vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index c3ade9968c336..36006dd7df739 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -112,7 +112,8 @@ class PPCTTIImpl : public BasicTTIImplBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, - ArrayRef Args = std::nullopt); + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr); InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 8534024b6002b..86e44343b5086 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -290,16 +290,7 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { switch (Opc) { case TargetOpcode::G_ADD: - case TargetOpcode::G_SUB: { - if (MRI.getType(MI.getOperand(0).getReg()).isVector()) { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - return getInstructionMapping( - DefaultMappingID, /*Cost=*/1, - getVRBValueMapping(Ty.getSizeInBits().getKnownMinValue()), - NumOperands); - } - } - LLVM_FALLTHROUGH; + case TargetOpcode::G_SUB: case TargetOpcode::G_SHL: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: @@ -320,10 +311,6 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_PTRTOINT: case TargetOpcode::G_INTTOPTR: - case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: - return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, - NumOperands); case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: @@ -334,10 +321,34 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMINNUM: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - return getInstructionMapping(DefaultMappingID, /*Cost=*/1, - getFPValueMapping(Ty.getSizeInBits()), - NumOperands); + TypeSize Size = Ty.getSizeInBits(); + + const ValueMapping *Mapping; + if (Ty.isVector()) + Mapping = getVRBValueMapping(Size.getKnownMinValue()); + else if (isPreISelGenericFloatingPointOpcode(Opc)) + Mapping = getFPValueMapping(Size.getFixedValue()); + else + Mapping = GPRValueMapping; + +#ifndef NDEBUG + // Make sure all the operands are using similar size and type. + for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { + LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); + assert(Ty.isVector() == OpTy.isVector() && + "Operand has incompatible type"); + // Don't check size for GPR. + if (OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc)) + assert(Size == OpTy.getSizeInBits() && "Operand has incompatible size"); + } +#endif // End NDEBUG + + return getInstructionMapping(DefaultMappingID, 1, Mapping, NumOperands); } + case TargetOpcode::G_SEXTLOAD: + case TargetOpcode::G_ZEXTLOAD: + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, + NumOperands); case TargetOpcode::G_IMPLICIT_DEF: { Register Dst = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst); diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 5bf594c0b5eae..9982a73ee914d 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -80,7 +80,8 @@ class RISCVAsmPrinter : public AsmPrinter { bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) override; - void EmitToStreamer(MCStreamer &S, const MCInst &Inst); + // Returns whether Inst is compressed. + bool EmitToStreamer(MCStreamer &S, const MCInst &Inst); bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); @@ -180,12 +181,13 @@ void RISCVAsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, SM.recordStatepoint(*MILabel, MI); } -void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { +bool RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { MCInst CInst; bool Res = RISCVRVC::compress(CInst, Inst, *STI); if (Res) ++RISCVNumInstrsCompressed; AsmPrinter::EmitToStreamer(*OutStreamer, Res ? CInst : Inst); + return Res; } // Simple pseudo-instructions have their lowering (with expansion to real diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 6ef2289bb4bee..794455aa73040 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -104,7 +104,7 @@ def FeatureStdExtZihpm "'Zihpm' (Hardware Performance Counters)", [FeatureStdExtZicsr]>; -def FeatureStdExtZimop : SubtargetFeature<"experimental-zimop", "HasStdExtZimop", "true", +def FeatureStdExtZimop : SubtargetFeature<"zimop", "HasStdExtZimop", "true", "'Zimop' (May-Be-Operations)">; def HasStdExtZimop : Predicate<"Subtarget->hasStdExtZimop()">, AssemblerPredicate<(all_of FeatureStdExtZimop), @@ -390,7 +390,7 @@ def HasStdExtCOrZcfOrZce "'C' (Compressed Instructions) or " "'Zcf' (Compressed Single-Precision Floating-Point Instructions)">; -def FeatureStdExtZcmop : SubtargetFeature<"experimental-zcmop", "HasStdExtZcmop", "true", +def FeatureStdExtZcmop : SubtargetFeature<"zcmop", "HasStdExtZcmop", "true", "'Zcmop' (Compressed May-Be-Operations)", [FeatureStdExtZca]>; def HasStdExtZcmop : Predicate<"Subtarget->hasStdExtZcmop()">, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 279d8a435a04c..6e97575c167cd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13543,6 +13543,7 @@ enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 }; /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w /// sub | sub_vl -> vwsub(u) | vwsub(u)_w /// mul | mul_vl -> vwmul(u) | vwmul_su +/// shl | shl_vl -> vwsll /// fadd -> vfwadd | vfwadd_w /// fsub -> vfwsub | vfwsub_w /// fmul -> vfwmul @@ -13552,7 +13553,7 @@ enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 }; /// NodeExtensionHelper for `a` and one for `b`. /// /// This class abstracts away how the extension is materialized and -/// how its Mask, VL, number of users affect the combines. +/// how its number of users affect the combines. /// /// In particular: /// - VWADD_W is conceptually == add(op0, sext(op1)) @@ -13576,15 +13577,6 @@ struct NodeExtensionHelper { /// This boolean captures whether we care if this operand would still be /// around after the folding happens. bool EnforceOneUse; - /// Records if this operand's mask needs to match the mask of the operation - /// that it will fold into. - bool CheckMask; - /// Value of the Mask for this operand. - /// It may be SDValue(). - SDValue Mask; - /// Value of the vector length operand. - /// It may be SDValue(). - SDValue VL; /// Original value that this NodeExtensionHelper represents. SDValue OrigOperand; @@ -13605,7 +13597,8 @@ struct NodeExtensionHelper { /// Check if this instance represents a splat. bool isSplat() const { - return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL; + return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL || + OrigOperand.getOpcode() == ISD::SPLAT_VECTOR; } /// Get the extended opcode. @@ -13649,6 +13642,8 @@ struct NodeExtensionHelper { case RISCVISD::VZEXT_VL: case RISCVISD::FP_EXTEND_VL: return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); + case ISD::SPLAT_VECTOR: + return DAG.getSplat(NarrowVT, DL, Source.getOperand(0)); case RISCVISD::VMV_V_X_VL: return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL); @@ -13721,6 +13716,9 @@ struct NodeExtensionHelper { case ISD::MUL: case RISCVISD::MUL_VL: return RISCVISD::VWMULU_VL; + case ISD::SHL: + case RISCVISD::SHL_VL: + return RISCVISD::VWSLL_VL; default: llvm_unreachable("Unexpected opcode"); } @@ -13781,6 +13779,47 @@ struct NodeExtensionHelper { /// Check if this node needs to be fully folded or extended for all users. bool needToPromoteOtherUsers() const { return EnforceOneUse; } + void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + unsigned Opc = OrigOperand.getOpcode(); + MVT VT = OrigOperand.getSimpleValueType(); + + assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) && + "Unexpected Opcode"); + + // The pasthru must be undef for tail agnostic. + if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef()) + return; + + // Get the scalar value. + SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0) + : OrigOperand.getOperand(1); + + // See if we have enough sign bits or zero bits in the scalar to use a + // widening opcode by splatting to smaller element size. + unsigned EltBits = VT.getScalarSizeInBits(); + unsigned ScalarBits = Op.getValueSizeInBits(); + // Make sure we're getting all element bits from the scalar register. + // FIXME: Support implicit sign extension of vmv.v.x? + if (ScalarBits < EltBits) + return; + + unsigned NarrowSize = VT.getScalarSizeInBits() / 2; + // If the narrow type cannot be expressed with a legal VMV, + // this is not a valid candidate. + if (NarrowSize < 8) + return; + + if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) + SupportsSExt = true; + + if (DAG.MaskedValueIsZero(Op, + APInt::getBitsSetFrom(ScalarBits, NarrowSize))) + SupportsZExt = true; + + EnforceOneUse = false; + } + /// Helper method to set the various fields of this struct based on the /// type of \p Root. void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, @@ -13789,8 +13828,10 @@ struct NodeExtensionHelper { SupportsSExt = false; SupportsFPExt = false; EnforceOneUse = true; - CheckMask = true; unsigned Opc = OrigOperand.getOpcode(); + // For the nodes we handle below, we end up using their inputs directly: see + // getSource(). However since they either don't have a passthru or we check + // that their passthru is undef, we can safely ignore their mask and VL. switch (Opc) { case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: { @@ -13806,72 +13847,29 @@ struct NodeExtensionHelper { SupportsZExt = Opc == ISD::ZERO_EXTEND; SupportsSExt = Opc == ISD::SIGN_EXTEND; - - SDLoc DL(Root); - std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); break; } case RISCVISD::VZEXT_VL: SupportsZExt = true; - Mask = OrigOperand.getOperand(1); - VL = OrigOperand.getOperand(2); break; case RISCVISD::VSEXT_VL: SupportsSExt = true; - Mask = OrigOperand.getOperand(1); - VL = OrigOperand.getOperand(2); break; case RISCVISD::FP_EXTEND_VL: SupportsFPExt = true; - Mask = OrigOperand.getOperand(1); - VL = OrigOperand.getOperand(2); break; - case RISCVISD::VMV_V_X_VL: { - // Historically, we didn't care about splat values not disappearing during - // combines. - EnforceOneUse = false; - CheckMask = false; - VL = OrigOperand.getOperand(2); - - // The operand is a splat of a scalar. - - // The pasthru must be undef for tail agnostic. - if (!OrigOperand.getOperand(0).isUndef()) - break; - - // Get the scalar value. - SDValue Op = OrigOperand.getOperand(1); - - // See if we have enough sign bits or zero bits in the scalar to use a - // widening opcode by splatting to smaller element size. - MVT VT = Root->getSimpleValueType(0); - unsigned EltBits = VT.getScalarSizeInBits(); - unsigned ScalarBits = Op.getValueSizeInBits(); - // Make sure we're getting all element bits from the scalar register. - // FIXME: Support implicit sign extension of vmv.v.x? - if (ScalarBits < EltBits) - break; - - unsigned NarrowSize = VT.getScalarSizeInBits() / 2; - // If the narrow type cannot be expressed with a legal VMV, - // this is not a valid candidate. - if (NarrowSize < 8) - break; - - if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize) - SupportsSExt = true; - if (DAG.MaskedValueIsZero(Op, - APInt::getBitsSetFrom(ScalarBits, NarrowSize))) - SupportsZExt = true; + case ISD::SPLAT_VECTOR: + case RISCVISD::VMV_V_X_VL: + fillUpExtensionSupportForSplat(Root, DAG, Subtarget); break; - } default: break; } } /// Check if \p Root supports any extension folding combines. - static bool isSupportedRoot(const SDNode *Root) { + static bool isSupportedRoot(const SDNode *Root, + const RISCVSubtarget &Subtarget) { switch (Root->getOpcode()) { case ISD::ADD: case ISD::SUB: @@ -13897,6 +13895,11 @@ struct NodeExtensionHelper { case RISCVISD::VFWADD_W_VL: case RISCVISD::VFWSUB_W_VL: return true; + case ISD::SHL: + return Root->getValueType(0).isScalableVector() && + Subtarget.hasStdExtZvbb(); + case RISCVISD::SHL_VL: + return Subtarget.hasStdExtZvbb(); default: return false; } @@ -13905,8 +13908,9 @@ struct NodeExtensionHelper { /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - assert(isSupportedRoot(Root) && "Trying to build an helper with an " - "unsupported root"); + assert(isSupportedRoot(Root, Subtarget) && + "Trying to build an helper with an " + "unsupported root"); assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); assert(DAG.getTargetLoweringInfo().isTypeLegal(Root->getValueType(0))); OrigOperand = Root->getOperand(OperandIdx); @@ -13930,8 +13934,6 @@ struct NodeExtensionHelper { Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWSUB_W_VL; SupportsFPExt = Opc == RISCVISD::VFWADD_W_VL || Opc == RISCVISD::VFWSUB_W_VL; - std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); - CheckMask = true; // There's no existing extension here, so we don't have to worry about // making sure it gets removed. EnforceOneUse = false; @@ -13944,26 +13946,17 @@ struct NodeExtensionHelper { } } - /// Check if this operand is compatible with the given vector length \p VL. - bool isVLCompatible(SDValue VL) const { - return this->VL != SDValue() && this->VL == VL; - } - - /// Check if this operand is compatible with the given \p Mask. - bool isMaskCompatible(SDValue Mask) const { - return !CheckMask || (this->Mask != SDValue() && this->Mask == Mask); - } - /// Helper function to get the Mask and VL from \p Root. static std::pair getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - assert(isSupportedRoot(Root) && "Unexpected root"); + assert(isSupportedRoot(Root, Subtarget) && "Unexpected root"); switch (Root->getOpcode()) { case ISD::ADD: case ISD::SUB: case ISD::MUL: - case ISD::OR: { + case ISD::OR: + case ISD::SHL: { SDLoc DL(Root); MVT VT = Root->getSimpleValueType(0); return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); @@ -13973,13 +13966,6 @@ struct NodeExtensionHelper { } } - /// Check if the Mask and VL of this operand are compatible with \p Root. - bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) const { - auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); - return isMaskCompatible(Mask) && isVLCompatible(VL); - } - /// Helper function to check if \p N is commutative with respect to the /// foldings that are supported by this class. static bool isCommutative(const SDNode *N) { @@ -14001,6 +13987,8 @@ struct NodeExtensionHelper { case RISCVISD::VWSUBU_W_VL: case RISCVISD::FSUB_VL: case RISCVISD::VFWSUB_W_VL: + case ISD::SHL: + case RISCVISD::SHL_VL: return false; default: llvm_unreachable("Unexpected opcode"); @@ -14054,6 +14042,7 @@ struct CombineResult { case ISD::SUB: case ISD::MUL: case ISD::OR: + case ISD::SHL: Merge = DAG.getUNDEF(Root->getValueType(0)); break; } @@ -14079,9 +14068,6 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS, uint8_t AllowExtMask, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || - !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) - return std::nullopt; if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt) return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()), Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS, @@ -14090,7 +14076,7 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()), Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS, /*RHSExt=*/{ExtKind::SExt}); - if ((AllowExtMask & ExtKind::FPExt) && RHS.SupportsFPExt) + if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt) return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()), Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS, /*RHSExt=*/{ExtKind::FPExt}); @@ -14120,9 +14106,6 @@ static std::optional canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) - return std::nullopt; - if (RHS.SupportsFPExt) return CombineResult( NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt), @@ -14190,9 +14173,6 @@ canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, if (!LHS.SupportsSExt || !RHS.SupportsZExt) return std::nullopt; - if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || - !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) - return std::nullopt; return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS, /*RHSExt=*/{ExtKind::ZExt}); @@ -14224,6 +14204,11 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { // mul -> vwmulsu Strategies.push_back(canFoldToVW_SU); break; + case ISD::SHL: + case RISCVISD::SHL_VL: + // shl -> vwsll + Strategies.push_back(canFoldToVWWithZEXT); + break; case RISCVISD::VWADD_W_VL: case RISCVISD::VWSUB_W_VL: // vwadd_w|vwsub_w -> vwadd|vwsub @@ -14251,6 +14236,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { /// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w /// sub | sub_vl -> vwsub(u) | vwsub(u)_w /// mul | mul_vl -> vwmul(u) | vwmul_su +/// shl | shl_vl -> vwsll /// fadd_vl -> vfwadd | vfwadd_w /// fsub_vl -> vfwsub | vfwsub_w /// fmul_vl -> vfwmul @@ -14265,7 +14251,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, if (DCI.isBeforeLegalize()) return SDValue(); - if (!NodeExtensionHelper::isSupportedRoot(N)) + if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget)) return SDValue(); SmallVector Worklist; @@ -14276,7 +14262,7 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, while (!Worklist.empty()) { SDNode *Root = Worklist.pop_back_val(); - if (!NodeExtensionHelper::isSupportedRoot(Root)) + if (!NodeExtensionHelper::isSupportedRoot(Root, Subtarget)) return SDValue(); NodeExtensionHelper LHS(N, 0, DAG, Subtarget); @@ -16371,9 +16357,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, VPSN->getMemOperand(), IndexType); break; } + case RISCVISD::SHL_VL: + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; + [[fallthrough]]; case RISCVISD::SRA_VL: - case RISCVISD::SRL_VL: - case RISCVISD::SHL_VL: { + case RISCVISD::SRL_VL: { SDValue ShAmt = N->getOperand(1); if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { // We don't need the upper 32 bits of a 64-bit element for a shift amount. @@ -16393,6 +16382,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, [[fallthrough]]; case ISD::SRL: case ISD::SHL: { + if (N->getOpcode() == ISD::SHL) { + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; + } SDValue ShAmt = N->getOperand(1); if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) { // We don't need the upper 32 bits of a 64-bit element for a shift amount. diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 52c794446af08..a5c8524d05cbc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -155,7 +155,7 @@ def OPC_BRANCH : RISCVOpcode<"BRANCH", 0b1100011>; def OPC_JALR : RISCVOpcode<"JALR", 0b1100111>; def OPC_JAL : RISCVOpcode<"JAL", 0b1101111>; def OPC_SYSTEM : RISCVOpcode<"SYSTEM", 0b1110011>; -def OPC_OP_P : RISCVOpcode<"OP_P", 0b1110111>; +def OPC_OP_VE : RISCVOpcode<"OP_VE", 0b1110111>; def OPC_CUSTOM_3 : RISCVOpcode<"CUSTOM_3", 0b1111011>; class RVInstCommonTSFlags); + unsigned NF = RISCVRI::getNF(RegClass->TSFlags); - unsigned Opc; - unsigned SubRegIdx; - unsigned VVOpc, VIOpc; - switch (LMul) { - default: - llvm_unreachable("Impossible LMUL for vector register copy."); - case RISCVII::LMUL_1: - Opc = RISCV::VMV1R_V; - SubRegIdx = RISCV::sub_vrm1_0; - VVOpc = RISCV::PseudoVMV_V_V_M1; - VIOpc = RISCV::PseudoVMV_V_I_M1; - break; - case RISCVII::LMUL_2: - Opc = RISCV::VMV2R_V; - SubRegIdx = RISCV::sub_vrm2_0; - VVOpc = RISCV::PseudoVMV_V_V_M2; - VIOpc = RISCV::PseudoVMV_V_I_M2; - break; - case RISCVII::LMUL_4: - Opc = RISCV::VMV4R_V; - SubRegIdx = RISCV::sub_vrm4_0; - VVOpc = RISCV::PseudoVMV_V_V_M4; - VIOpc = RISCV::PseudoVMV_V_I_M4; - break; - case RISCVII::LMUL_8: - assert(NF == 1); - Opc = RISCV::VMV8R_V; - SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0. - VVOpc = RISCV::PseudoVMV_V_V_M8; - VIOpc = RISCV::PseudoVMV_V_I_M8; - break; - } - - bool UseVMV_V_V = false; - bool UseVMV_V_I = false; - MachineBasicBlock::const_iterator DefMBBI; - if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { - UseVMV_V_V = true; - Opc = VVOpc; - - if (DefMBBI->getOpcode() == VIOpc) { - UseVMV_V_I = true; - Opc = VIOpc; + uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); + uint16_t DstEncoding = TRI->getEncodingValue(DstReg); + auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(LMul); + assert(!Fractional && "It is impossible be fractional lmul here."); + unsigned NumRegs = NF * LMulVal; + bool ReversedCopy = + forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs); + if (ReversedCopy) { + // If the src and dest overlap when copying a tuple, we need to copy the + // registers in reverse. + SrcEncoding += NumRegs - 1; + DstEncoding += NumRegs - 1; + } + + unsigned I = 0; + auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding) + -> std::tuple { + if (ReversedCopy) { + // For reversed copying, if there are enough aligned registers(8/4/2), we + // can do a larger copy(LMUL8/4/2). + // Besides, we have already known that DstEncoding is larger than + // SrcEncoding in forwardCopyWillClobberTuple, so the difference between + // DstEncoding and SrcEncoding should be >= LMUL value we try to use to + // avoid clobbering. + uint16_t Diff = DstEncoding - SrcEncoding; + if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 && + DstEncoding % 8 == 7) + return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V, + RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8}; + if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 && + DstEncoding % 4 == 3) + return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V, + RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4}; + if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 && + DstEncoding % 2 == 1) + return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V, + RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2}; + // Or we should do LMUL1 copying. + return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, + RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; } - } - if (NF == 1) { - auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); - if (UseVMV_V_V) - MIB.addReg(DstReg, RegState::Undef); - if (UseVMV_V_I) - MIB = MIB.add(DefMBBI->getOperand(2)); - else - MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); - if (UseVMV_V_V) { - const MCInstrDesc &Desc = DefMBBI->getDesc(); - MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL - MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW - MIB.addImm(0); // tu, mu - MIB.addReg(RISCV::VL, RegState::Implicit); - MIB.addReg(RISCV::VTYPE, RegState::Implicit); + // For forward copying, if source register encoding and destination register + // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying. + if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0) + return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V, + RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8}; + if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0) + return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V, + RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4}; + if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0) + return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V, + RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2}; + // Or we should do LMUL1 copying. + return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, + RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; + }; + auto FindRegWithEncoding = [&TRI](const TargetRegisterClass &RegClass, + uint16_t Encoding) { + ArrayRef Regs = RegClass.getRegisters(); + const auto *FoundReg = llvm::find_if(Regs, [&](MCPhysReg Reg) { + return TRI->getEncodingValue(Reg) == Encoding; + }); + // We should be always able to find one valid register. + assert(FoundReg != Regs.end()); + return *FoundReg; + }; + while (I != NumRegs) { + // For non-segment copying, we only do this once as the registers are always + // aligned. + // For segment copying, we may do this several times. If the registers are + // aligned to larger LMUL, we can eliminate some copyings. + auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] = + GetCopyInfo(SrcEncoding, DstEncoding); + auto [NumCopied, _] = RISCVVType::decodeVLMUL(LMulCopied); + + MachineBasicBlock::const_iterator DefMBBI; + if (LMul == LMulCopied && + isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { + Opc = VVOpc; + if (DefMBBI->getOpcode() == VIOpc) + Opc = VIOpc; } - return; - } - int I = 0, End = NF, Incr = 1; - unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); - unsigned DstEncoding = TRI->getEncodingValue(DstReg); - unsigned LMulVal; - bool Fractional; - std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); - assert(!Fractional && "It is impossible be fractional lmul here."); - if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { - I = NF - 1; - End = -1; - Incr = -1; - } - - for (; I != End; I += Incr) { - auto MIB = - BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)); - if (UseVMV_V_V) - MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I), RegState::Undef); + // Emit actual copying. + // For reversed copying, the encoding should be decreased. + MCRegister ActualSrcReg = FindRegWithEncoding( + RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding); + MCRegister ActualDstReg = FindRegWithEncoding( + RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding); + + auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg); + bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I; + bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V; + if (UseVMV) + MIB.addReg(ActualDstReg, RegState::Undef); if (UseVMV_V_I) MIB = MIB.add(DefMBBI->getOperand(2)); else - MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), - getKillRegState(KillSrc)); - if (UseVMV_V_V) { + MIB = MIB.addReg(ActualSrcReg, getKillRegState(KillSrc)); + if (UseVMV) { const MCInstrDesc &Desc = DefMBBI->getDesc(); MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW @@ -398,6 +412,11 @@ void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB, MIB.addReg(RISCV::VL, RegState::Implicit); MIB.addReg(RISCV::VTYPE, RegState::Implicit); } + + // If we are copying reversely, we should decrease the encoding. + SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied); + DstEncoding += (ReversedCopy ? -NumCopied : NumCopied); + I += NumCopied; } } @@ -504,90 +523,17 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } // VR->VR copies. - if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_1); - return; - } - - if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_2); - return; - } - - if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_4); - return; - } - - if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_8); - return; - } - - if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_1, - /*NF=*/2); - return; - } - - if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_2, - /*NF=*/2); - return; - } - - if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_4, - /*NF=*/2); - return; - } - - if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_1, - /*NF=*/3); - return; - } - - if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_2, - /*NF=*/3); - return; - } - - if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_1, - /*NF=*/4); - return; - } - - if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_2, - /*NF=*/4); - return; - } - - if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_1, - /*NF=*/5); - return; - } - - if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_1, - /*NF=*/6); - return; - } - - if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_1, - /*NF=*/7); - return; - } - - if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { - copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RISCVII::LMUL_1, - /*NF=*/8); - return; + static const TargetRegisterClass *RVVRegClasses[] = { + &RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass, + &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass, + &RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass, + &RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass, + &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass}; + for (const auto &RegClass : RVVRegClasses) { + if (RegClass->contains(DstReg, SrcReg)) { + copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass); + return; + } } llvm_unreachable("Impossible reg-to-reg copy"); @@ -3052,24 +2998,13 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI, #undef CASE_WIDEOP_OPCODE_LMULS #undef CASE_WIDEOP_OPCODE_COMMON -void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator II, - const DebugLoc &DL, Register DestReg, - int64_t Amount, - MachineInstr::MIFlag Flag) const { - assert(Amount > 0 && "There is no need to get VLEN scaled value."); - assert(Amount % 8 == 0 && - "Reserve the stack by the multiple of one vector size."); - +void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, const DebugLoc &DL, + Register DestReg, uint32_t Amount, + MachineInstr::MIFlag Flag) const { MachineRegisterInfo &MRI = MF.getRegInfo(); - assert(isInt<32>(Amount / 8) && - "Expect the number of vector registers within 32-bits."); - uint32_t NumOfVReg = Amount / 8; - - BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag); - if (llvm::has_single_bit(NumOfVReg)) { - uint32_t ShiftAmount = Log2_32(NumOfVReg); + if (llvm::has_single_bit(Amount)) { + uint32_t ShiftAmount = Log2_32(Amount); if (ShiftAmount == 0) return; BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) @@ -3077,23 +3012,23 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, .addImm(ShiftAmount) .setMIFlag(Flag); } else if (STI.hasStdExtZba() && - ((NumOfVReg % 3 == 0 && isPowerOf2_64(NumOfVReg / 3)) || - (NumOfVReg % 5 == 0 && isPowerOf2_64(NumOfVReg / 5)) || - (NumOfVReg % 9 == 0 && isPowerOf2_64(NumOfVReg / 9)))) { + ((Amount % 3 == 0 && isPowerOf2_64(Amount / 3)) || + (Amount % 5 == 0 && isPowerOf2_64(Amount / 5)) || + (Amount % 9 == 0 && isPowerOf2_64(Amount / 9)))) { // We can use Zba SHXADD+SLLI instructions for multiply in some cases. unsigned Opc; uint32_t ShiftAmount; - if (NumOfVReg % 9 == 0) { + if (Amount % 9 == 0) { Opc = RISCV::SH3ADD; - ShiftAmount = Log2_64(NumOfVReg / 9); - } else if (NumOfVReg % 5 == 0) { + ShiftAmount = Log2_64(Amount / 9); + } else if (Amount % 5 == 0) { Opc = RISCV::SH2ADD; - ShiftAmount = Log2_64(NumOfVReg / 5); - } else if (NumOfVReg % 3 == 0) { + ShiftAmount = Log2_64(Amount / 5); + } else if (Amount % 3 == 0) { Opc = RISCV::SH1ADD; - ShiftAmount = Log2_64(NumOfVReg / 3); + ShiftAmount = Log2_64(Amount / 3); } else { - llvm_unreachable("Unexpected number of vregs"); + llvm_unreachable("implied by if-clause"); } if (ShiftAmount) BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) @@ -3104,9 +3039,9 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, .addReg(DestReg, RegState::Kill) .addReg(DestReg) .setMIFlag(Flag); - } else if (llvm::has_single_bit(NumOfVReg - 1)) { + } else if (llvm::has_single_bit(Amount - 1)) { Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); - uint32_t ShiftAmount = Log2_32(NumOfVReg - 1); + uint32_t ShiftAmount = Log2_32(Amount - 1); BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) .addReg(DestReg) .addImm(ShiftAmount) @@ -3115,9 +3050,9 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, .addReg(ScaledRegister, RegState::Kill) .addReg(DestReg, RegState::Kill) .setMIFlag(Flag); - } else if (llvm::has_single_bit(NumOfVReg + 1)) { + } else if (llvm::has_single_bit(Amount + 1)) { Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass); - uint32_t ShiftAmount = Log2_32(NumOfVReg + 1); + uint32_t ShiftAmount = Log2_32(Amount + 1); BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister) .addReg(DestReg) .addImm(ShiftAmount) @@ -3128,7 +3063,7 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, .setMIFlag(Flag); } else if (STI.hasStdExtM() || STI.hasStdExtZmmul()) { Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass); - movImm(MBB, II, DL, N, NumOfVReg, Flag); + movImm(MBB, II, DL, N, Amount, Flag); BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg) .addReg(DestReg, RegState::Kill) .addReg(N, RegState::Kill) @@ -3136,14 +3071,14 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF, } else { Register Acc; uint32_t PrevShiftAmount = 0; - for (uint32_t ShiftAmount = 0; NumOfVReg >> ShiftAmount; ShiftAmount++) { - if (NumOfVReg & (1U << ShiftAmount)) { + for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) { + if (Amount & (1U << ShiftAmount)) { if (ShiftAmount) BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg) .addReg(DestReg, RegState::Kill) .addImm(ShiftAmount - PrevShiftAmount) .setMIFlag(Flag); - if (NumOfVReg >> (ShiftAmount + 1)) { + if (Amount >> (ShiftAmount + 1)) { // If we don't have an accmulator yet, create it and copy DestReg. if (!Acc) { Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index dd049fca05971..81d9c9db783c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -69,7 +69,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { void copyPhysRegVector(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc, - RISCVII::VLMUL LMul, unsigned NF = 1) const; + const TargetRegisterClass *RegClass) const; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc) const override; @@ -229,10 +229,12 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { unsigned OpIdx, const TargetRegisterInfo *TRI) const override; - void getVLENFactoredAmount( - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator II, const DebugLoc &DL, Register DestReg, - int64_t Amount, MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const; + /// Generate code to multiply the value in DestReg by Amt - handles all + /// the common optimizations for this idiom, and supports fallback for + /// subtargets which don't support multiply instructions. + void mulImm(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator II, const DebugLoc &DL, + Register DestReg, uint32_t Amt, MachineInstr::MIFlag Flag) const; bool useMachineCombiner() const override { return true; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index fd8777fdc121c..2cfad7f7c0611 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1419,7 +1419,7 @@ defm : BccPat; defm : BccPat; defm : BccPat; -let Predicates = [HasStdExtC, OptForMinSize] in { +let Predicates = [HasStdExtCOrZca, OptForMinSize] in { def : BrccCompressOpt; def : BrccCompressOpt; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index af20b11514ca1..cf9a31c23a06e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -761,6 +761,11 @@ class GetVTypePredicates { true : [HasVInstructions]); } +class GetVTypeScalarPredicates { + list Predicates = !cond(!eq(vti.Scalar, bf16) : [HasStdExtZfbfmin], + true : []); +} + class VPseudoUSLoadNoMask : Pseudo<(outs RetClass:$rd), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index b4c6ba7e9723d..da761ae856706 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1454,8 +1454,9 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { // Vector Splats //===----------------------------------------------------------------------===// -foreach fvti = AllFloatVectors in { - let Predicates = GetVTypePredicates.Predicates in +foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { + let Predicates = !listconcat(GetVTypePredicates.Predicates, + GetVTypeScalarPredicates.Predicates) in def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)), (!cast("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 73d52d5ecafb5..b721dcd989885 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2599,7 +2599,12 @@ foreach fvti = AllFloatVectors in { fvti.RegClass:$merge, fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + } +} +foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { + let Predicates = !listconcat(GetVTypePredicates.Predicates, + GetVTypeScalarPredicates.Predicates) in { // 13.16. Vector Floating-Point Move Instruction // If we're splatting fpimm0, use vmv.v.x vd, x0. def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td index 6fbfde5ef488c..dd13a07d606d0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td @@ -8,8 +8,6 @@ // // This file describes the RISC-V instructions from the standard Compressed // May-Be-Operations Extension (Zcmop). -// This version is still experimental as the 'Zcmop' extension hasn't been -// ratified yet. It is based on v0.2 of the specification. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td index f8ec099ca8197..6b26550a29026 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td @@ -8,8 +8,6 @@ // // This file describes the RISC-V instructions from the standard // May-Be-Operations Extension (Zimop). -// This version is still experimental as the 'Zimop' extension hasn't been -// ratified yet. It is based on v0.1 of the specification. // //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index c1facc790fc0f..575f9b41accb5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -64,7 +64,7 @@ multiclass VROR_IV_V_X_I funct6> // op vd, vs2, vs1 class PALUVVNoVm funct6, RISCVVFormat opv, string opcodestr> : VALUVVNoVm { - let Inst{6-0} = OPC_OP_P.Value; + let Inst{6-0} = OPC_OP_VE.Value; } // op vd, vs2, vs1 @@ -74,13 +74,13 @@ class PALUVVNoVmTernary funct6, RISCVVFormat opv, string opcodestr> opcodestr, "$vd, $vs2, $vs1"> { let Constraints = "$vd = $vd_wb"; let vm = 1; - let Inst{6-0} = OPC_OP_P.Value; + let Inst{6-0} = OPC_OP_VE.Value; } // op vd, vs2, imm class PALUVINoVm funct6, string opcodestr, Operand optype> : VALUVINoVm { - let Inst{6-0} = OPC_OP_P.Value; + let Inst{6-0} = OPC_OP_VE.Value; let Inst{14-12} = OPMVV.Value; } @@ -91,7 +91,7 @@ class PALUVINoVmBinary funct6, string opcodestr, Operand optype> opcodestr, "$vd, $vs2, $imm"> { let Constraints = "$vd = $vd_wb"; let vm = 1; - let Inst{6-0} = OPC_OP_P.Value; + let Inst{6-0} = OPC_OP_VE.Value; let Inst{14-12} = OPMVV.Value; } @@ -103,7 +103,7 @@ class PALUVs2NoVmBinary funct6, bits<5> vs1, RISCVVFormat opv, opcodestr, "$vd, $vs2"> { let Constraints = "$vd = $vd_wb"; let vm = 1; - let Inst{6-0} = OPC_OP_P.Value; + let Inst{6-0} = OPC_OP_VE.Value; } multiclass VAES_MV_V_S funct6_vv, bits<6> funct6_vs, bits<5> vs1, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 11c3f2d57eb00..46e79272d60eb 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -195,10 +195,30 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, Register ScratchReg = DestReg; if (DestReg == SrcReg) ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); - TII->getVLENFactoredAmount(MF, MBB, II, DL, ScratchReg, ScalableValue, Flag); - BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) - .addReg(SrcReg).addReg(ScratchReg, RegState::Kill) - .setMIFlag(Flag); + + assert(ScalableValue > 0 && "There is no need to get VLEN scaled value."); + assert(ScalableValue % 8 == 0 && + "Reserve the stack by the multiple of one vector size."); + assert(isInt<32>(ScalableValue / 8) && + "Expect the number of vector registers within 32-bits."); + uint32_t NumOfVReg = ScalableValue / 8; + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg) + .setMIFlag(Flag); + + if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() && + (NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) { + unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD : + (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD); + BuildMI(MBB, II, DL, TII->get(Opc), DestReg) + .addReg(ScratchReg, RegState::Kill) + .addReg(SrcReg, getKillRegState(KillSrcReg)) + .setMIFlag(Flag); + } else { + TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag); + BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) + .addReg(SrcReg).addReg(ScratchReg, RegState::Kill) + .setMIFlag(Flag); + } SrcReg = DestReg; KillSrcReg = true; } @@ -239,6 +259,31 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, return; } + // Use shNadd if doing so lets us materialize a 12 bit immediate with a single + // instruction. This saves 1 instruction over the full lui/addi+add fallback + // path. We avoid anything which can be done with a single lui as it might + // be compressible. Note that the sh1add case is fully covered by the 2x addi + // case just above and is thus ommitted. + if (ST.hasStdExtZba() && (Val & 0xFFF) != 0) { + unsigned Opc = 0; + if (isShiftedInt<12, 3>(Val)) { + Opc = RISCV::SH3ADD; + Val = Val >> 3; + } else if (isShiftedInt<12, 2>(Val)) { + Opc = RISCV::SH2ADD; + Val = Val >> 2; + } + if (Opc) { + Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); + TII->movImm(MBB, II, DL, ScratchReg, Val, Flag); + BuildMI(MBB, II, DL, TII->get(Opc), DestReg) + .addReg(ScratchReg, RegState::Kill) + .addReg(SrcReg, getKillRegState(KillSrcReg)) + .setMIFlag(Flag); + return; + } + } + unsigned Opc = RISCV::ADD; if (Val < 0) { Val = -Val; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 943c4f2627cf2..7e04e9154b524 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -14,12 +14,45 @@ #define LLVM_LIB_TARGET_RISCV_RISCVREGISTERINFO_H #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/TargetParser/RISCVTargetParser.h" #define GET_REGINFO_HEADER #include "RISCVGenRegisterInfo.inc" namespace llvm { +namespace RISCVRI { +enum { + // The IsVRegClass value of this RegisterClass. + IsVRegClassShift = 0, + IsVRegClassShiftMask = 0b1 << IsVRegClassShift, + // The VLMul value of this RegisterClass. This value is valid iff IsVRegClass + // is true. + VLMulShift = IsVRegClassShift + 1, + VLMulShiftMask = 0b111 << VLMulShift, + + // The NF value of this RegisterClass. This value is valid iff IsVRegClass is + // true. + NFShift = VLMulShift + 3, + NFShiftMask = 0b111 << NFShift, +}; + +/// \returns the IsVRegClass for the register class. +static inline bool isVRegClass(uint64_t TSFlags) { + return TSFlags & IsVRegClassShiftMask >> IsVRegClassShift; +} + +/// \returns the LMUL for the register class. +static inline RISCVII::VLMUL getLMul(uint64_t TSFlags) { + return static_cast((TSFlags & VLMulShiftMask) >> VLMulShift); +} + +/// \returns the NF for the register class. +static inline unsigned getNF(uint64_t TSFlags) { + return static_cast((TSFlags & NFShiftMask) >> NFShift) + 1; +} +} // namespace RISCVRI + struct RISCVRegisterInfo : public RISCVGenRegisterInfo { RISCVRegisterInfo(unsigned HwMode); @@ -116,30 +149,18 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { } static bool isVRRegClass(const TargetRegisterClass *RC) { - return RISCV::VRRegClass.hasSubClassEq(RC) || - RISCV::VRM2RegClass.hasSubClassEq(RC) || - RISCV::VRM4RegClass.hasSubClassEq(RC) || - RISCV::VRM8RegClass.hasSubClassEq(RC); + return RISCVRI::isVRegClass(RC->TSFlags) && + RISCVRI::getNF(RC->TSFlags) == 1; } static bool isVRNRegClass(const TargetRegisterClass *RC) { - return RISCV::VRN2M1RegClass.hasSubClassEq(RC) || - RISCV::VRN2M2RegClass.hasSubClassEq(RC) || - RISCV::VRN2M4RegClass.hasSubClassEq(RC) || - RISCV::VRN3M1RegClass.hasSubClassEq(RC) || - RISCV::VRN3M2RegClass.hasSubClassEq(RC) || - RISCV::VRN4M1RegClass.hasSubClassEq(RC) || - RISCV::VRN4M2RegClass.hasSubClassEq(RC) || - RISCV::VRN5M1RegClass.hasSubClassEq(RC) || - RISCV::VRN6M1RegClass.hasSubClassEq(RC) || - RISCV::VRN7M1RegClass.hasSubClassEq(RC) || - RISCV::VRN8M1RegClass.hasSubClassEq(RC); + return RISCVRI::isVRegClass(RC->TSFlags) && RISCVRI::getNF(RC->TSFlags) > 1; } static bool isRVVRegClass(const TargetRegisterClass *RC) { - return isVRRegClass(RC) || isVRNRegClass(RC); + return RISCVRI::isVRegClass(RC->TSFlags); } }; -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 90c4a7193ee33..316daf2763ca1 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -132,8 +132,21 @@ def XLenRI : RegInfoByHwMode< [RV32, RV64], [RegInfo<32,32,32>, RegInfo<64,64,64>]>; +class RISCVRegisterClass regTypes, int align, dag regList> + : RegisterClass<"RISCV", regTypes, align, regList> { + bit IsVRegClass = 0; + int VLMul = 1; + int NF = 1; + + let Size = !if(IsVRegClass, !mul(VLMul, NF, 64), 0); + + let TSFlags{0} = IsVRegClass; + let TSFlags{3-1} = !logtwo(VLMul); + let TSFlags{6-4} = !sub(NF, 1); +} + class GPRRegisterClass - : RegisterClass<"RISCV", [XLenVT, XLenFVT, i32], 32, regList> { + : RISCVRegisterClass<[XLenVT, XLenFVT, i32], 32, regList> { let RegInfos = XLenRI; } @@ -229,7 +242,7 @@ let RegAltNameIndices = [ABIRegAltName] in { // meaning caller-save regs are listed before callee-save. // We start by allocating argument registers in reverse order since they are // compressible. -def FPR16 : RegisterClass<"RISCV", [f16, bf16], 16, (add +def FPR16 : RISCVRegisterClass<[f16, bf16], 16, (add (sequence "F%u_H", 15, 10), // fa5-fa0 (sequence "F%u_H", 0, 7), // ft0-f7 (sequence "F%u_H", 16, 17), // fa6-fa7 @@ -238,7 +251,7 @@ def FPR16 : RegisterClass<"RISCV", [f16, bf16], 16, (add (sequence "F%u_H", 18, 27) // fs2-fs11 )>; -def FPR32 : RegisterClass<"RISCV", [f32], 32, (add +def FPR32 : RISCVRegisterClass<[f32], 32, (add (sequence "F%u_F", 15, 10), (sequence "F%u_F", 0, 7), (sequence "F%u_F", 16, 17), @@ -247,14 +260,14 @@ def FPR32 : RegisterClass<"RISCV", [f32], 32, (add (sequence "F%u_F", 18, 27) )>; -def FPR32C : RegisterClass<"RISCV", [f32], 32, (add +def FPR32C : RISCVRegisterClass<[f32], 32, (add (sequence "F%u_F", 15, 10), (sequence "F%u_F", 8, 9) )>; // The order of registers represents the preferred allocation sequence, // meaning caller-save regs are listed before callee-save. -def FPR64 : RegisterClass<"RISCV", [f64], 64, (add +def FPR64 : RISCVRegisterClass<[f64], 64, (add (sequence "F%u_D", 15, 10), (sequence "F%u_D", 0, 7), (sequence "F%u_D", 16, 17), @@ -263,7 +276,7 @@ def FPR64 : RegisterClass<"RISCV", [f64], 64, (add (sequence "F%u_D", 18, 27) )>; -def FPR64C : RegisterClass<"RISCV", [f64], 64, (add +def FPR64C : RISCVRegisterClass<[f64], 64, (add (sequence "F%u_D", 15, 10), (sequence "F%u_D", 8, 9) )>; @@ -464,8 +477,8 @@ let isConstant = true in def VLENB : RISCVReg<0, "vlenb">, DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>; -def VCSR : RegisterClass<"RISCV", [XLenVT], 32, - (add VTYPE, VL, VLENB)> { +def VCSR : RISCVRegisterClass<[XLenVT], 32, + (add VTYPE, VL, VLENB)> { let RegInfos = XLenRI; let isAllocatable = 0; } @@ -483,12 +496,11 @@ foreach m = [1, 2, 4] in { } class VReg regTypes, dag regList, int Vlmul> - : RegisterClass<"RISCV", - regTypes, - 64, // The maximum supported ELEN is 64. - regList> { - int VLMul = Vlmul; - int Size = !mul(Vlmul, 64); + : RISCVRegisterClass { + let IsVRegClass = 1; + let VLMul = Vlmul; } defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t, @@ -537,13 +549,11 @@ def VRM8 : VReg; def VRM8NoV0 : VReg; -def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> { - let Size = 64; -} +def VMV0 : VReg; let RegInfos = XLenRI in { -def GPRF16 : RegisterClass<"RISCV", [f16], 16, (add GPR)>; -def GPRF32 : RegisterClass<"RISCV", [f32], 32, (add GPR)>; +def GPRF16 : RISCVRegisterClass<[f16], 16, (add GPR)>; +def GPRF32 : RISCVRegisterClass<[f32], 32, (add GPR)>; } // RegInfos = XLenRI // Dummy zero register for use in the register pair containing X0 (as X1 is @@ -580,7 +590,7 @@ let RegAltNameIndices = [ABIRegAltName] in { let RegInfos = RegInfoByHwMode<[RV32, RV64], [RegInfo<64, 64, 32>, RegInfo<128, 128, 64>]>, DecoderMethod = "DecodeGPRPairRegisterClass" in -def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add +def GPRPair : RISCVRegisterClass<[XLenPairFVT], 64, (add X10_X11, X12_X13, X14_X15, X16_X17, X6_X7, X28_X29, X30_X31, @@ -594,13 +604,17 @@ def VM : VReg; foreach m = LMULList in { foreach nf = NFList.L in { - def "VRN" # nf # "M" # m # "NoV0": VReg<[untyped], - (add !cast("VN" # nf # "M" # m # "NoV0")), - !mul(nf, m)>; - def "VRN" # nf # "M" # m: VReg<[untyped], - (add !cast("VN" # nf # "M" # m # "NoV0"), - !cast("VN" # nf # "M" # m # "V0")), - !mul(nf, m)>; + let NF = nf in { + def "VRN" # nf # "M" # m # "NoV0" + : VReg<[untyped], + (add !cast("VN" # nf # "M" # m # "NoV0")), + m>; + def "VRN" # nf # "M" # m + : VReg<[untyped], + (add !cast("VN" # nf # "M" # m # "NoV0"), + !cast("VN" # nf # "M" # m # "V0")), + m>; + } } } diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index ba108912d9340..85f8f5f654fe7 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -254,6 +254,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; + bool isTargetAndroid() const { return getTargetTriple().isAndroid(); } bool isTargetFuchsia() const { return getTargetTriple().isOSFuchsia(); } bool useConstantPoolForLargeInts() const; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 27a4d78d6df77..bc9756c5e6dda 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -245,6 +245,10 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return TTI::TCC_Free; } +bool RISCVTTIImpl::hasActiveVectorLength(unsigned, Type *DataTy, Align) const { + return ST->hasVInstructions(); +} + TargetTransformInfo::PopcntSupportKind RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); @@ -325,7 +329,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args) { + ArrayRef Args, + const Instruction *CxtI) { Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); std::pair LT = getTypeLegalizationCost(Tp); @@ -859,6 +864,21 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, } break; } + case Intrinsic::get_active_lane_mask: { + if (ST->hasVInstructions()) { + Type *ExpRetTy = VectorType::get( + ICA.getArgTypes()[0], cast(RetTy)->getElementCount()); + auto LT = getTypeLegalizationCost(ExpRetTy); + + // vid.v v8 // considered hoisted + // vsaddu.vx v8, v8, a0 + // vmsltu.vx v0, v8, a1 + return LT.first * + getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX}, + LT.second, CostKind); + } + break; + } // TODO: add more intrinsic case Intrinsic::experimental_stepvector: { auto LT = getTypeLegalizationCost(RetTy); @@ -1322,10 +1342,14 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmandn.mm v8, v8, v9 // vmand.mm v9, v0, v9 // vmor.mm v0, v9, v8 - return LT.first * 3; + return LT.first * + getRISCVInstructionCost( + {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM}, + LT.second, CostKind); } // vselect and max/min are supported natively. - return LT.first * 1; + return LT.first * + getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second, CostKind); } if (ValTy->getScalarSizeInBits() == 1) { @@ -1334,13 +1358,21 @@ InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, // vmandn.mm v8, v8, v9 // vmand.mm v9, v0, v9 // vmor.mm v0, v9, v8 - return LT.first * 5; + MVT InterimVT = LT.second.changeVectorElementType(MVT::i8); + return LT.first * + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI}, + InterimVT, CostKind) + + LT.first * getRISCVInstructionCost( + {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM}, + LT.second, CostKind); } // vmv.v.x v10, a0 // vmsne.vi v0, v10, 0 // vmerge.vvm v8, v9, v8, v0 - return LT.first * 3; + return LT.first * getRISCVInstructionCost( + {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM}, + LT.second, CostKind); } if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index ac32aea4ce2b8..e0c0e6517b6f1 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -78,6 +78,22 @@ class RISCVTTIImpl : public BasicTTIImplBase { const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); + /// \name EVL Support for predicated vectorization. + /// Whether the target supports the %evl parameter of VP intrinsic efficiently + /// in hardware, for the given opcode and type/alignment. (see LLVM Language + /// Reference - "Vector Predication Intrinsics", + /// https://llvm.org/docs/LangRef.html#vector-predication-intrinsics and + /// "IR-level VP intrinsics", + /// https://llvm.org/docs/Proposals/VectorPredication.html#ir-level-vp-intrinsics). + /// \param Opcode the opcode of the instruction checked for predicated version + /// support. + /// \param DataType the type of the instruction with the \p Opcode checked for + /// prediction support. + /// \param Alignment the alignment for memory access operation checked for + /// predicated version support. + bool hasActiveVectorLength(unsigned Opcode, Type *DataType, + Align Alignment) const; + TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth); bool shouldExpandReduction(const IntrinsicInst *II) const; @@ -130,7 +146,8 @@ class RISCVTTIImpl : public BasicTTIImplBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = std::nullopt); + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp index 4eee8062f2824..1de4616fd5b77 100644 --- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp @@ -43,6 +43,8 @@ using namespace llvm; namespace { class SPIRVAsmPrinter : public AsmPrinter { + unsigned NLabels = 0; + public: explicit SPIRVAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) @@ -109,10 +111,9 @@ void SPIRVAsmPrinter::emitEndOfAsmFile(Module &M) { uint32_t DecSPIRVVersion = ST->getSPIRVVersion(); uint32_t Major = DecSPIRVVersion / 10; uint32_t Minor = DecSPIRVVersion - Major * 10; - // TODO: calculate Bound more carefully from maximum used register number, - // accounting for generated OpLabels and other related instructions if - // needed. - unsigned Bound = 2 * (ST->getBound() + 1); + // Bound is an approximation that accounts for the maximum used register + // number and number of generated OpLabels + unsigned Bound = 2 * (ST->getBound() + 1) + NLabels; bool FlagToRestore = OutStreamer->getUseAssemblerInfoForParsing(); OutStreamer->setUseAssemblerInfoForParsing(true); if (MCAssembler *Asm = OutStreamer->getAssemblerPtr()) @@ -158,6 +159,7 @@ void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) { LabelInst.setOpcode(SPIRV::OpLabel); LabelInst.addOperand(MCOperand::createReg(MAI->getOrCreateMBBRegister(MBB))); outputMCInst(LabelInst); + ++NLabels; } void SPIRVAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 674bf394fc927..55090c9ec271b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -460,15 +460,36 @@ void SPIRVEmitIntrinsics::preprocessCompositeConstants(IRBuilder<> &B) { } Instruction *SPIRVEmitIntrinsics::visitSwitchInst(SwitchInst &I) { - IRBuilder<> B(I.getParent()); + BasicBlock *ParentBB = I.getParent(); + IRBuilder<> B(ParentBB); + B.SetInsertPoint(&I); SmallVector Args; - for (auto &Op : I.operands()) - if (Op.get()->getType()->isSized()) + SmallVector BBCases; + for (auto &Op : I.operands()) { + if (Op.get()->getType()->isSized()) { Args.push_back(Op); - B.SetInsertPoint(&I); - B.CreateIntrinsic(Intrinsic::spv_switch, {I.getOperand(0)->getType()}, - {Args}); - return &I; + } else if (BasicBlock *BB = dyn_cast(Op.get())) { + BBCases.push_back(BB); + Args.push_back(BlockAddress::get(BB->getParent(), BB)); + } else { + report_fatal_error("Unexpected switch operand"); + } + } + CallInst *NewI = B.CreateIntrinsic(Intrinsic::spv_switch, + {I.getOperand(0)->getType()}, {Args}); + // remove switch to avoid its unneeded and undesirable unwrap into branches + // and conditions + I.replaceAllUsesWith(NewI); + I.eraseFromParent(); + // insert artificial and temporary instruction to preserve valid CFG, + // it will be removed after IR translation pass + B.SetInsertPoint(ParentBB); + IndirectBrInst *BrI = B.CreateIndirectBr( + Constant::getNullValue(PointerType::getUnqual(ParentBB->getContext())), + BBCases.size()); + for (BasicBlock *BBCase : BBCases) + BrI->addDestination(BBCase); + return BrI; } Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) { diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index ac799374adce8..37f575e884ef4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -284,7 +284,12 @@ class SPIRVGlobalRegistry { // Return the VReg holding the result of the given OpTypeXXX instruction. Register getSPIRVTypeID(const SPIRVType *SpirvType) const; - void setCurrentFunc(MachineFunction &MF) { CurMF = &MF; } + // Return previous value of the current machine function + MachineFunction *setCurrentFunc(MachineFunction &MF) { + MachineFunction *Ret = CurMF; + CurMF = &MF; + return Ret; + } // Whether the given VReg has an OpTypeXXX instruction mapped to it with the // given opcode (e.g. OpTypeFloat). diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp index d450078d793fb..8db54c74f2369 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp @@ -160,12 +160,15 @@ void validateFunCallMachineDef(const SPIRVSubtarget &STI, : nullptr; if (DefElemType) { const Type *DefElemTy = GR.getTypeForSPIRVType(DefElemType); - // Switch GR context to the call site instead of the (default) definition - // side - GR.setCurrentFunc(*FunCall.getParent()->getParent()); + // validatePtrTypes() works in the context if the call site + // When we process historical records about forward calls + // we need to switch context to the (forward) call site and + // then restore it back to the current machine function. + MachineFunction *CurMF = + GR.setCurrentFunc(*FunCall.getParent()->getParent()); validatePtrTypes(STI, CallMRI, GR, FunCall, OpIdx, DefElemType, DefElemTy); - GR.setCurrentFunc(*FunDef->getParent()->getParent()); + GR.setCurrentFunc(*CurMF); } } } @@ -215,6 +218,11 @@ void validateAccessChain(const SPIRVSubtarget &STI, MachineRegisterInfo *MRI, // TODO: the logic of inserting additional bitcast's is to be moved // to pre-IRTranslation passes eventually void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const { + // finalizeLowering() is called twice (see GlobalISel/InstructionSelect.cpp) + // We'd like to avoid the needless second processing pass. + if (ProcessedMF.find(&MF) != ProcessedMF.end()) + return; + MachineRegisterInfo *MRI = &MF.getRegInfo(); SPIRVGlobalRegistry &GR = *STI.getSPIRVGlobalRegistry(); GR.setCurrentFunc(MF); @@ -302,5 +310,6 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const { } } } + ProcessedMF.insert(&MF); TargetLowering::finalizeLowering(MF); } diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h index b01571bfc1eeb..8c1de7d97d1a3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.h +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.h @@ -16,6 +16,7 @@ #include "SPIRVGlobalRegistry.h" #include "llvm/CodeGen/TargetLowering.h" +#include namespace llvm { class SPIRVSubtarget; @@ -23,6 +24,9 @@ class SPIRVSubtarget; class SPIRVTargetLowering : public TargetLowering { const SPIRVSubtarget &STI; + // Record of already processed machine functions + mutable std::set ProcessedMF; + public: explicit SPIRVTargetLowering(const TargetMachine &TM, const SPIRVSubtarget &ST) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 49749b5634530..45a70da7f8690 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -432,10 +432,10 @@ bool SPIRVInstructionSelector::spvSelect(Register ResVReg, case TargetOpcode::G_FMINNUM: case TargetOpcode::G_FMINIMUM: - return selectExtInst(ResVReg, ResType, I, CL::fmin, GL::FMin); + return selectExtInst(ResVReg, ResType, I, CL::fmin, GL::NMin); case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMAXIMUM: - return selectExtInst(ResVReg, ResType, I, CL::fmax, GL::FMax); + return selectExtInst(ResVReg, ResType, I, CL::fmax, GL::NMax); case TargetOpcode::G_FCOPYSIGN: return selectExtInst(ResVReg, ResType, I, CL::copysign); diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index b133f0ae85de2..7e155a36aadbc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -438,186 +438,75 @@ static void processInstrsWithTypeFolding(MachineFunction &MF, } } +// Find basic blocks of the switch and replace registers in spv_switch() by its +// MBB equivalent. static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR, MachineIRBuilder MIB) { - // Before IRTranslator pass, calls to spv_switch intrinsic are inserted before - // each switch instruction. IRTranslator lowers switches to G_ICMP + G_BRCOND - // + G_BR triples. A switch with two cases may be transformed to this MIR - // sequence: - // - // intrinsic(@llvm.spv.switch), %CmpReg, %Const0, %Const1 - // %Dst0 = G_ICMP intpred(eq), %CmpReg, %Const0 - // G_BRCOND %Dst0, %bb.2 - // G_BR %bb.5 - // bb.5.entry: - // %Dst1 = G_ICMP intpred(eq), %CmpReg, %Const1 - // G_BRCOND %Dst1, %bb.3 - // G_BR %bb.4 - // bb.2.sw.bb: - // ... - // bb.3.sw.bb1: - // ... - // bb.4.sw.epilog: - // ... - // - // Sometimes (in case of range-compare switches), additional G_SUBs - // instructions are inserted before G_ICMPs. Those need to be additionally - // processed. - // - // This function modifies spv_switch call's operands to include destination - // MBBs (default and for each constant value). - // - // At the end, the function removes redundant [G_SUB] + G_ICMP + G_BRCOND + - // G_BR sequences. - - MachineRegisterInfo &MRI = MF.getRegInfo(); - - // Collect spv_switches and G_ICMPs across all MBBs in MF. - std::vector RelevantInsts; - - // Collect redundant MIs from [G_SUB] + G_ICMP + G_BRCOND + G_BR sequences. - // After updating spv_switches, the instructions can be removed. - std::vector PostUpdateArtifacts; - - // Temporary set of compare registers. G_SUBs and G_ICMPs relating to - // spv_switch use these registers. - DenseSet CompareRegs; + DenseMap BB2MBB; + SmallVector>> + Switches; for (MachineBasicBlock &MBB : MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + BB2MBB[MBB.getBasicBlock()] = &MBB; for (MachineInstr &MI : MBB) { + if (!isSpvIntrinsic(MI, Intrinsic::spv_switch)) + continue; // Calls to spv_switch intrinsics representing IR switches. - if (isSpvIntrinsic(MI, Intrinsic::spv_switch)) { - assert(MI.getOperand(1).isReg()); - CompareRegs.insert(MI.getOperand(1).getReg()); - RelevantInsts.push_back(&MI); - } - - // G_SUBs coming from range-compare switch lowering. G_SUBs are found - // after spv_switch but before G_ICMP. - if (MI.getOpcode() == TargetOpcode::G_SUB && MI.getOperand(1).isReg() && - CompareRegs.contains(MI.getOperand(1).getReg())) { - assert(MI.getOperand(0).isReg() && MI.getOperand(1).isReg()); - Register Dst = MI.getOperand(0).getReg(); - CompareRegs.insert(Dst); - PostUpdateArtifacts.push_back(&MI); - } - - // G_ICMPs relating to switches. - if (MI.getOpcode() == TargetOpcode::G_ICMP && MI.getOperand(2).isReg() && - CompareRegs.contains(MI.getOperand(2).getReg())) { - Register Dst = MI.getOperand(0).getReg(); - RelevantInsts.push_back(&MI); - PostUpdateArtifacts.push_back(&MI); - MachineInstr *CBr = MRI.use_begin(Dst)->getParent(); - assert(CBr->getOpcode() == SPIRV::G_BRCOND); - PostUpdateArtifacts.push_back(CBr); - MachineInstr *Br = CBr->getNextNode(); - assert(Br->getOpcode() == SPIRV::G_BR); - PostUpdateArtifacts.push_back(Br); + SmallVector NewOps; + for (unsigned i = 2; i < MI.getNumOperands(); ++i) { + Register Reg = MI.getOperand(i).getReg(); + if (i % 2 == 1) { + MachineInstr *ConstInstr = getDefInstrMaybeConstant(Reg, &MRI); + NewOps.push_back(ConstInstr); + } else { + MachineInstr *BuildMBB = MRI.getVRegDef(Reg); + assert(BuildMBB && + BuildMBB->getOpcode() == TargetOpcode::G_BLOCK_ADDR && + BuildMBB->getOperand(1).isBlockAddress() && + BuildMBB->getOperand(1).getBlockAddress()); + NewOps.push_back(BuildMBB); + } } + Switches.push_back(std::make_pair(&MI, NewOps)); } } - // Update each spv_switch with destination MBBs. - for (auto i = RelevantInsts.begin(); i != RelevantInsts.end(); i++) { - if (!isSpvIntrinsic(**i, Intrinsic::spv_switch)) - continue; - - // Currently considered spv_switch. - MachineInstr *Switch = *i; - // Set the first successor as default MBB to support empty switches. - MachineBasicBlock *DefaultMBB = *Switch->getParent()->succ_begin(); - // Container for mapping values to MMBs. - SmallDenseMap ValuesToMBBs; - - // Walk all G_ICMPs to collect ValuesToMBBs. Start at currently considered - // spv_switch (i) and break at any spv_switch with the same compare - // register (indicating we are back at the same scope). - Register CompareReg = Switch->getOperand(1).getReg(); - for (auto j = i + 1; j != RelevantInsts.end(); j++) { - if (isSpvIntrinsic(**j, Intrinsic::spv_switch) && - (*j)->getOperand(1).getReg() == CompareReg) - break; - - if (!((*j)->getOpcode() == TargetOpcode::G_ICMP && - (*j)->getOperand(2).getReg() == CompareReg)) - continue; - - MachineInstr *ICMP = *j; - Register Dst = ICMP->getOperand(0).getReg(); - MachineOperand &PredOp = ICMP->getOperand(1); - const auto CC = static_cast(PredOp.getPredicate()); - (void)CC; - assert((CC == CmpInst::ICMP_EQ || CC == CmpInst::ICMP_ULE) && - MRI.hasOneUse(Dst) && MRI.hasOneDef(CompareReg)); - uint64_t Value = getIConstVal(ICMP->getOperand(3).getReg(), &MRI); - MachineInstr *CBr = MRI.use_begin(Dst)->getParent(); - assert(CBr->getOpcode() == SPIRV::G_BRCOND && CBr->getOperand(1).isMBB()); - MachineBasicBlock *MBB = CBr->getOperand(1).getMBB(); - - // Map switch case Value to target MBB. - ValuesToMBBs[Value] = MBB; - - // Add target MBB as successor to the switch's MBB. - Switch->getParent()->addSuccessor(MBB); - - // The next MI is always G_BR to either the next case or the default. - MachineInstr *NextMI = CBr->getNextNode(); - assert(NextMI->getOpcode() == SPIRV::G_BR && - NextMI->getOperand(0).isMBB()); - MachineBasicBlock *NextMBB = NextMI->getOperand(0).getMBB(); - // Default MBB does not begin with G_ICMP using spv_switch compare - // register. - if (NextMBB->front().getOpcode() != SPIRV::G_ICMP || - (NextMBB->front().getOperand(2).isReg() && - NextMBB->front().getOperand(2).getReg() != CompareReg)) { - // Set default MBB and add it as successor to the switch's MBB. - DefaultMBB = NextMBB; - Switch->getParent()->addSuccessor(DefaultMBB); + SmallPtrSet ToEraseMI; + for (auto &SwIt : Switches) { + MachineInstr &MI = *SwIt.first; + SmallVector &Ins = SwIt.second; + SmallVector NewOps; + for (unsigned i = 0; i < Ins.size(); ++i) { + if (Ins[i]->getOpcode() == TargetOpcode::G_BLOCK_ADDR) { + BasicBlock *CaseBB = + Ins[i]->getOperand(1).getBlockAddress()->getBasicBlock(); + auto It = BB2MBB.find(CaseBB); + if (It == BB2MBB.end()) + report_fatal_error("cannot find a machine basic block by a basic " + "block in a switch statement"); + NewOps.push_back(MachineOperand::CreateMBB(It->second)); + MI.getParent()->addSuccessor(It->second); + ToEraseMI.insert(Ins[i]); + } else { + NewOps.push_back( + MachineOperand::CreateCImm(Ins[i]->getOperand(1).getCImm())); } } - - // Modify considered spv_switch operands using collected Values and - // MBBs. - SmallVector Values; - SmallVector MBBs; - for (unsigned k = 2; k < Switch->getNumExplicitOperands(); k++) { - Register CReg = Switch->getOperand(k).getReg(); - uint64_t Val = getIConstVal(CReg, &MRI); - MachineInstr *ConstInstr = getDefInstrMaybeConstant(CReg, &MRI); - if (!ValuesToMBBs[Val]) - continue; - - Values.push_back(ConstInstr->getOperand(1).getCImm()); - MBBs.push_back(ValuesToMBBs[Val]); - } - - for (unsigned k = Switch->getNumExplicitOperands() - 1; k > 1; k--) - Switch->removeOperand(k); - - Switch->addOperand(MachineOperand::CreateMBB(DefaultMBB)); - for (unsigned k = 0; k < Values.size(); k++) { - Switch->addOperand(MachineOperand::CreateCImm(Values[k])); - Switch->addOperand(MachineOperand::CreateMBB(MBBs[k])); - } - } - - for (MachineInstr *MI : PostUpdateArtifacts) { - MachineBasicBlock *ParentMBB = MI->getParent(); - MI->eraseFromParent(); - // If G_ICMP + G_BRCOND + G_BR were the only MIs in MBB, erase this MBB. It - // can be safely assumed, there are no breaks or phis directing into this - // MBB. However, we need to remove this MBB from the CFG graph. MBBs must be - // erased top-down. - if (ParentMBB->empty()) { - while (!ParentMBB->pred_empty()) - (*ParentMBB->pred_begin())->removeSuccessor(ParentMBB); - - while (!ParentMBB->succ_empty()) - ParentMBB->removeSuccessor(ParentMBB->succ_begin()); - - ParentMBB->eraseFromParent(); + for (unsigned i = MI.getNumOperands() - 1; i > 1; --i) + MI.removeOperand(i); + for (auto &MO : NewOps) + MI.addOperand(MO); + if (MachineInstr *Next = MI.getNextNode()) { + if (isSpvIntrinsic(*Next, Intrinsic::spv_track_constant)) { + ToEraseMI.insert(Next); + Next = MI.getNextNode(); + } + if (Next && Next->getOpcode() == TargetOpcode::G_BRINDIRECT) + ToEraseMI.insert(Next); } } + for (MachineInstr *BlockAddrI : ToEraseMI) + BlockAddrI->eraseFromParent(); } static bool isImplicitFallthrough(MachineBasicBlock &MBB) { diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index 215a8ea831904..6855471840e9d 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -434,6 +434,50 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, default: // See if this is a generic print operand return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); + case 'L': // Low order register of a twin word register operand + case 'H': // High order register of a twin word register operand + { + const SparcSubtarget &Subtarget = MF->getSubtarget(); + const MachineOperand &MO = MI->getOperand(OpNo); + const SparcRegisterInfo *RegisterInfo = Subtarget.getRegisterInfo(); + Register MOReg = MO.getReg(); + + Register HiReg, LoReg; + if (!SP::IntPairRegClass.contains(MOReg)) { + // If we aren't given a register pair already, find out which pair it + // belongs to. Note that here, the specified register operand, which + // refers to the high part of the twinword, needs to be an even-numbered + // register. + MOReg = RegisterInfo->getMatchingSuperReg(MOReg, SP::sub_even, + &SP::IntPairRegClass); + if (!MOReg) { + SMLoc Loc; + OutContext.reportError( + Loc, "Hi part of pair should point to an even-numbered register"); + OutContext.reportError( + Loc, "(note that in some cases it might be necessary to manually " + "bind the input/output registers instead of relying on " + "automatic allocation)"); + return true; + } + } + + HiReg = RegisterInfo->getSubReg(MOReg, SP::sub_even); + LoReg = RegisterInfo->getSubReg(MOReg, SP::sub_odd); + + Register Reg; + switch (ExtraCode[0]) { + case 'L': + Reg = LoReg; + break; + case 'H': + Reg = HiReg; + break; + } + + O << '%' << SparcInstPrinter::getRegisterName(Reg); + return false; + } case 'f': case 'r': break; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 5bdbaf47064d6..17e534f405c08 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -601,12 +601,10 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost( Args, CxtI); } -InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, - VectorType *Tp, - ArrayRef Mask, - TTI::TargetCostKind CostKind, - int Index, VectorType *SubTp, - ArrayRef Args) { +InstructionCost SystemZTTIImpl::getShuffleCost( + TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, + TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, + ArrayRef Args, const Instruction *CxtI) { Kind = improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp); if (ST->hasVector()) { unsigned NumVectors = getNumVectorRegs(Tp); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 2cccdf6d17dac..1d824d353d8fb 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -95,7 +95,8 @@ class SystemZTTIImpl : public BasicTTIImplBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = std::nullopt); + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6f65344215c02..010f9c30ab403 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3992,7 +3992,6 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops, EVT VT = Src.getValueType(); EVT SubVT = Sub.getValueType(); - // TODO - Handle more general insert_subvector chains. if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2)) { // insert_subvector(undef, x, lo) if (Idx == 0 && Src.isUndef()) { @@ -4005,8 +4004,19 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl &Ops, if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && Src.getOperand(1).getValueType() == SubVT && isNullConstant(Src.getOperand(2))) { - Ops.push_back(Src.getOperand(1)); - Ops.push_back(Sub); + // Attempt to recurse into inner (matching) concats. + SDValue Lo = Src.getOperand(1); + SDValue Hi = Sub; + SmallVector LoOps, HiOps; + if (collectConcatOps(Lo.getNode(), LoOps, DAG) && + collectConcatOps(Hi.getNode(), HiOps, DAG) && + LoOps.size() == HiOps.size()) { + Ops.append(LoOps); + Ops.append(HiOps); + return true; + } + Ops.push_back(Lo); + Ops.push_back(Hi); return true; } // insert_subvector(x, extract_subvector(x, lo), hi) @@ -44710,6 +44720,17 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, } } + // Attempt to fold extract(trunc(x),c) -> trunc(extract(x,c)). + if (CIdx && InputVector.getOpcode() == ISD::TRUNCATE) { + SDValue TruncSrc = InputVector.getOperand(0); + EVT TruncSVT = TruncSrc.getValueType().getScalarType(); + if (DCI.isBeforeLegalize() && TLI.isTypeLegal(TruncSVT)) { + SDValue NewExt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TruncSVT, TruncSrc, EltIdx); + return DAG.getAnyExtOrTrunc(NewExt, dl, VT); + } + } + return SDValue(); } @@ -54105,7 +54126,8 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. - if (DAG.SignBitIsZero(Op0)) { + SDNodeFlags Flags = N->getFlags(); + if (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0)) { if (IsStrict) return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other}, {N->getOperand(0), Op0}); @@ -56147,6 +56169,13 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::VFPEXT, DL, VT, InVec.getOperand(0)); } } + // v4i32 CVTPS2DQ(v4f32). + if (InOpcode == ISD::FP_TO_SINT && VT == MVT::v4i32) { + SDValue Src = InVec.getOperand(0); + if (Src.getValueType().getScalarType() == MVT::f32) + return DAG.getNode(InOpcode, DL, VT, + extractSubVector(Src, IdxVal, DAG, DL, SizeInBits)); + } if (IdxVal == 0 && (ISD::isExtOpcode(InOpcode) || ISD::isExtVecInRegOpcode(InOpcode)) && (SizeInBits == 128 || SizeInBits == 256) && diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index ce3b6af4cab47..270dd32c7235a 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -2161,6 +2161,11 @@ multiclass EFLAGSDefiningPats { def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast(INC16r#suffix) GR16:$src)>; def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast(INC32r#suffix) GR32:$src)>; def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast(INC64r#suffix) GR64:$src)>; + + def : Pat<(or_is_add GR8:$src, 1), (!cast(INC8r#suffix) GR8:$src)>; + def : Pat<(or_is_add GR16:$src, 1), (!cast(INC16r#suffix) GR16:$src)>; + def : Pat<(or_is_add GR32:$src, 1), (!cast(INC32r#suffix) GR32:$src)>; + def : Pat<(or_is_add GR64:$src, 1), (!cast(INC64r#suffix) GR64:$src)>; } } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f24334312c116..a5b2e4895eded 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6276,10 +6276,10 @@ static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget, case X86::RCPSSm: case X86::RCPSSr_Int: case X86::RCPSSm_Int: - case X86::ROUNDSDr: - case X86::ROUNDSDm: - case X86::ROUNDSSr: - case X86::ROUNDSSm: + case X86::ROUNDSDri: + case X86::ROUNDSDmi: + case X86::ROUNDSSri: + case X86::ROUNDSSmi: case X86::RSQRTSSr: case X86::RSQRTSSm: case X86::RSQRTSSr_Int: @@ -6778,14 +6778,14 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, case X86::VRCPSSr_Int: case X86::VRCPSSm: case X86::VRCPSSm_Int: - case X86::VROUNDSDr: - case X86::VROUNDSDm: - case X86::VROUNDSDr_Int: - case X86::VROUNDSDm_Int: - case X86::VROUNDSSr: - case X86::VROUNDSSm: - case X86::VROUNDSSr_Int: - case X86::VROUNDSSm_Int: + case X86::VROUNDSDri: + case X86::VROUNDSDmi: + case X86::VROUNDSDri_Int: + case X86::VROUNDSDmi_Int: + case X86::VROUNDSSri: + case X86::VROUNDSSmi: + case X86::VROUNDSSri_Int: + case X86::VROUNDSSmi_Int: case X86::VRSQRTSSr: case X86::VRSQRTSSr_Int: case X86::VRSQRTSSm: @@ -7516,8 +7516,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VRCPSSr_Int: case X86::RSQRTSSr_Int: case X86::VRSQRTSSr_Int: - case X86::ROUNDSSr_Int: - case X86::VROUNDSSr_Int: + case X86::ROUNDSSri_Int: + case X86::VROUNDSSri_Int: case X86::COMISSrr_Int: case X86::VCOMISSrr_Int: case X86::VCOMISSZrr_Int: @@ -7685,8 +7685,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VCVTSD2USI64Zrr_Int: case X86::VCVTTSD2USIZrr_Int: case X86::VCVTTSD2USI64Zrr_Int: - case X86::ROUNDSDr_Int: - case X86::VROUNDSDr_Int: + case X86::ROUNDSDri_Int: + case X86::VROUNDSDri_Int: case X86::COMISDrr_Int: case X86::VCOMISDrr_Int: case X86::VCOMISDZrr_Int: diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 69d45366a1dbc..2b391b60f2c9b 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5475,35 +5475,35 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, // Intrinsic operation, reg. // Vector intrinsic operation, reg let Uses = [MXCSR], mayRaiseFPException = 1 in { - def r : SS4AIi8, - Sched<[sched]>; + def ri : SS4AIi8, + Sched<[sched]>; // Vector intrinsic operation, mem - def m : SS4AIi8, - Sched<[sched.Folded]>; + def mi : SS4AIi8, + Sched<[sched.Folded]>; } } multiclass avx_fp_unop_rm opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SSr : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SSm : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SDm : SS4AIi8 opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SSr : SS4AIi8, Sched<[sched]>; + def SSri : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SSm : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; + def SSmi : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SDr : SS4AIi8, Sched<[sched]>; + def SDri : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SDm : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; + def SDmi : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, hasSideEffects = 0 } } -multiclass sse41_fp_binop_s opcss, bits<8> opcsd, - string OpcodeStr, X86FoldableSchedWrite sched, - ValueType VT32, ValueType VT64, - SDNode OpNode, bit Is2Addr = 1> { +multiclass sse41_fp_unop_s_int opcss, bits<8> opcsd, + string OpcodeStr, X86FoldableSchedWrite sched, + ValueType VT32, ValueType VT64, + SDNode OpNode, bit Is2Addr = 1> { let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in { - def SSr_Int : SS4AIi8, Sched<[sched]>; - def SSm_Int : SS4AIi8, Sched<[sched]>; - def SDm_Int : SS4AIi8, - VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; + defm VROUND : sse41_fp_unop_s_int<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, + v4f32, v2f64, X86RndScales, 0>, + VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; } let Predicates = [UseAVX] in { def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), - (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; + (VROUNDSSri (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), - (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; + (VROUNDSDri (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; } let Predicates = [UseAVX, OptForSize] in { def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), - (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; + (VROUNDSSmi (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), - (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; + (VROUNDSDmi (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; } let ExeDomain = SSEPackedSingle in @@ -5667,21 +5667,21 @@ defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; let Constraints = "$src1 = $dst" in -defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, - v4f32, v2f64, X86RndScales>; +defm ROUND : sse41_fp_unop_s_int<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, + v4f32, v2f64, X86RndScales>; let Predicates = [UseSSE41] in { def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), - (ROUNDSSr FR32:$src1, timm:$src2)>; + (ROUNDSSri FR32:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), - (ROUNDSDr FR64:$src1, timm:$src2)>; + (ROUNDSDri FR64:$src1, timm:$src2)>; } let Predicates = [UseSSE41, OptForSize] in { def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), - (ROUNDSSm addr:$src1, timm:$src2)>; + (ROUNDSSmi addr:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), - (ROUNDSDm addr:$src1, timm:$src2)>; + (ROUNDSDmi addr:$src1, timm:$src2)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index e2330ff34c175..e6510be6b9afd 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -980,8 +980,10 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, SmallString<256> Code; unsigned MinSize = MI.getOperand(0).getImm(); - if (NextMI != MI.getParent()->end()) { + if (NextMI != MI.getParent()->end() && !NextMI->isInlineAsm()) { // Lower the next MachineInstr to find its byte size. + // If the next instruction is inline assembly, we skip lowering it for now, + // and assume we should always generate NOPs. MCInst MCI; MCIL.Lower(&*NextMI, MCI); diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index b3ee7a82b9174..63ac91028ac93 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -329,11 +329,9 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : BWWriteResPair; // Floating point fabs/fchs. -defm : X86WriteRes; // Floating point rounding. -defm : X86WriteRes; // Floating point rounding (YMM/ZMM). +defm : BWWriteResPair; // Floating point rounding. +defm : BWWriteResPair; // Floating point rounding (YMM/ZMM). defm : X86WriteResPairUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; defm : BWWriteResPair; // Floating point and/or/xor logicals. defm : BWWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). defm : X86WriteResPairUnsupported; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 6c301a3cd3425..516dc62f1b6d5 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -329,12 +329,9 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : HWWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 88bb9ad8f1d74..ff3fe32be1851 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -2290,8 +2290,8 @@ def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { let Latency = 15; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup218], (instregex "^(V?)ROUNDP(D|S)m$")>; -def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S)m((_Int)?)$", +def : InstRW<[SPRWriteResGroup218], (instregex "^(V?)ROUNDP(D|S)mi$")>; +def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S)mi((_Int)?)$", "^VRNDSCALEP(D|S)Z128rm(bi|ik)$", "^VRNDSCALEP(D|S)Z128rmbik(z?)$", "^VRNDSCALEP(D|S)Z128rmi((kz)?)$", @@ -2303,13 +2303,13 @@ def SPRWriteResGroup219 : SchedWriteRes<[SPRPort00_01]> { let Latency = 8; let NumMicroOps = 2; } -def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)r$", - "^(V?)ROUND(PS|SD)r$", - "^(V?)ROUNDS(D|S)r_Int$", +def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)ri$", + "^(V?)ROUND(PS|SD)ri$", + "^(V?)ROUNDS(D|S)ri_Int$", "^VRNDSCALEP(D|S)Z(128|256)rri((k|kz)?)$", "^VRNDSCALES(D|S)Zr$", "^VRNDSCALES(D|S)Zr(b?)_Int((k|kz)?)$", - "^VROUNDP(D|S)Yr$")>; + "^VROUNDP(D|S)Yri$")>; def SPRWriteResGroup220 : SchedWriteRes<[SPRPort00_06]> { let ReleaseAtCycles = [2]; @@ -3737,7 +3737,7 @@ def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup390], (instregex "^VF(C?)MADDCPHZ(128|256)m(b?)$", - "^VROUNDP(D|S)Ym$")>; + "^VROUNDP(D|S)Ymi$")>; def : InstRW<[SPRWriteResGroup390, ReadAfterVecXLd], (instregex "^VF(C?)MADDCSHZm$", "^VF(C?)MULCPHZ128rm(b?)$", "^VF(C?)MULCSHZrm$", diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td index d90c8bd284eb0..2e87d5262818c 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver3.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td @@ -52,7 +52,7 @@ def Znver3Model : SchedMachineModel { int VecLoadLatency = 7; // Latency of a simple store operation. int StoreLatency = 1; - // FIXME + // FIXME: let HighLatency = 25; // FIXME: any better choice? // AMD SOG 19h, 2.8 Optimizing Branching // The branch misprediction penalty is in the range from 11 to 18 cycles, @@ -193,11 +193,11 @@ def Zn3Int : ProcResGroup<[Zn3ALU0, Zn3AGU0, Zn3BRU0, // scheduler 0 // <...>, and six FPU pipes. // Agner, 22.10 Floating point execution pipes // There are six floating point/vector execution pipes, -def Zn3FPP0 : ProcResource<1>; -def Zn3FPP1 : ProcResource<1>; -def Zn3FPP2 : ProcResource<1>; -def Zn3FPP3 : ProcResource<1>; -def Zn3FPP45 : ProcResource<2>; +def Zn3FP0 : ProcResource<1>; +def Zn3FP1 : ProcResource<1>; +def Zn3FP2 : ProcResource<1>; +def Zn3FP3 : ProcResource<1>; +def Zn3FP45 : ProcResource<2>; // // Execution Units @@ -205,63 +205,63 @@ def Zn3FPP45 : ProcResource<2>; // AMD SOG 19h, 2.11.1 Floating Point Execution Resources // (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) -defvar Zn3FPFMul0 = Zn3FPP0; -defvar Zn3FPFMul1 = Zn3FPP1; +defvar Zn3FPFMul0 = Zn3FP0; +defvar Zn3FPFMul1 = Zn3FP1; // (v)FADD* -defvar Zn3FPFAdd0 = Zn3FPP2; -defvar Zn3FPFAdd1 = Zn3FPP3; +defvar Zn3FPFAdd0 = Zn3FP2; +defvar Zn3FPFAdd1 = Zn3FP3; // All convert operations except pack/unpack -defvar Zn3FPFCvt0 = Zn3FPP2; -defvar Zn3FPFCvt1 = Zn3FPP3; +defvar Zn3FPFCvt0 = Zn3FP2; +defvar Zn3FPFCvt1 = Zn3FP3; // All Divide and Square Root except Reciprocal Approximation // AMD SOG 19h, 2.11.1 Floating Point Execution Resources // FDIV unit can support 2 simultaneous operations in flight // even though it occupies a single pipe. // FIXME: BufferSize=2 ? -defvar Zn3FPFDiv = Zn3FPP1; +defvar Zn3FPFDiv = Zn3FP1; // Moves and Logical operations on Floating Point Data Types -defvar Zn3FPFMisc0 = Zn3FPP0; -defvar Zn3FPFMisc1 = Zn3FPP1; -defvar Zn3FPFMisc2 = Zn3FPP2; -defvar Zn3FPFMisc3 = Zn3FPP3; +defvar Zn3FPFMisc0 = Zn3FP0; +defvar Zn3FPFMisc1 = Zn3FP1; +defvar Zn3FPFMisc2 = Zn3FP2; +defvar Zn3FPFMisc3 = Zn3FP3; // Integer Adds, Subtracts, and Compares // Some complex VADD operations are not available in all pipes. -defvar Zn3FPVAdd0 = Zn3FPP0; -defvar Zn3FPVAdd1 = Zn3FPP1; -defvar Zn3FPVAdd2 = Zn3FPP2; -defvar Zn3FPVAdd3 = Zn3FPP3; +defvar Zn3FPVAdd0 = Zn3FP0; +defvar Zn3FPVAdd1 = Zn3FP1; +defvar Zn3FPVAdd2 = Zn3FP2; +defvar Zn3FPVAdd3 = Zn3FP3; // Integer Multiplies, SAD, Blendvb -defvar Zn3FPVMul0 = Zn3FPP0; -defvar Zn3FPVMul1 = Zn3FPP3; +defvar Zn3FPVMul0 = Zn3FP0; +defvar Zn3FPVMul1 = Zn3FP3; // Data Shuffles, Packs, Unpacks, Permute // Some complex shuffle operations are only available in pipe1. -defvar Zn3FPVShuf = Zn3FPP1; -defvar Zn3FPVShufAux = Zn3FPP2; +defvar Zn3FPVShuf = Zn3FP1; +defvar Zn3FPVShufAux = Zn3FP2; // Bit Shift Left/Right operations -defvar Zn3FPVShift0 = Zn3FPP1; -defvar Zn3FPVShift1 = Zn3FPP2; +defvar Zn3FPVShift0 = Zn3FP1; +defvar Zn3FPVShift1 = Zn3FP2; // Moves and Logical operations on Packed Integer Data Types -defvar Zn3FPVMisc0 = Zn3FPP0; -defvar Zn3FPVMisc1 = Zn3FPP1; -defvar Zn3FPVMisc2 = Zn3FPP2; -defvar Zn3FPVMisc3 = Zn3FPP3; +defvar Zn3FPVMisc0 = Zn3FP0; +defvar Zn3FPVMisc1 = Zn3FP1; +defvar Zn3FPVMisc2 = Zn3FP2; +defvar Zn3FPVMisc3 = Zn3FP3; // *AES* -defvar Zn3FPAES0 = Zn3FPP0; -defvar Zn3FPAES1 = Zn3FPP1; +defvar Zn3FPAES0 = Zn3FP0; +defvar Zn3FPAES1 = Zn3FP1; // *CLM* -defvar Zn3FPCLM0 = Zn3FPP0; -defvar Zn3FPCLM1 = Zn3FPP1; +defvar Zn3FPCLM0 = Zn3FP0; +defvar Zn3FPCLM1 = Zn3FP1; // Execution pipeline grouping //===----------------------------------------------------------------------===// @@ -269,7 +269,7 @@ defvar Zn3FPCLM1 = Zn3FPP1; // AMD SOG 19h, 2.11 Floating-Point Unit // Stores and floating point to general purpose register transfer // have 2 dedicated pipelines (pipe 5 and 6). -def Zn3FPU0123 : ProcResGroup<[Zn3FPP0, Zn3FPP1, Zn3FPP2, Zn3FPP3]>; +def Zn3FPU0123 : ProcResGroup<[Zn3FP0, Zn3FP1, Zn3FP2, Zn3FP3]>; // (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) def Zn3FPFMul01 : ProcResGroup<[Zn3FPFMul0, Zn3FPFMul1]>; @@ -293,12 +293,12 @@ def Zn3FPFMisc12 : ProcResGroup<[Zn3FPFMisc1, Zn3FPFMisc2]>; // AMD SOG 19h, 2.11 Floating-Point Unit // Stores and floating point to general purpose register transfer // have 2 dedicated pipelines (pipe 5 and 6). -defvar Zn3FPLd01 = Zn3FPP45; +defvar Zn3FPLd01 = Zn3FP45; // AMD SOG 19h, 2.11 Floating-Point Unit // Note that FP stores are supported on two pipelines, // but throughput is limited to one per cycle. -let Super = Zn3FPP45 in +let Super = Zn3FP45 in def Zn3FPSt : ProcResource<1>; // Integer Adds, Subtracts, and Compares @@ -345,8 +345,8 @@ def Zn3FpPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 1], [0, 1, 1], // AMD SOG 19h, 2.11 Floating-Point Unit // <...> the scheduler can issue 1 micro op per cycle for each pipe. // FIXME: those are two separate schedulers, not a single big one. -def Zn3FP : ProcResGroup<[Zn3FPP0, Zn3FPP2, /*Zn3FPP4,*/ // scheduler 0 - Zn3FPP1, Zn3FPP3, Zn3FPP45 /*Zn3FPP5*/ // scheduler 1 +def Zn3FP : ProcResGroup<[Zn3FP0, Zn3FP2, /*Zn3FP4,*/ // scheduler 0 + Zn3FP1, Zn3FP3, Zn3FP45 /*Zn3FP5*/ // scheduler 1 ]> { let BufferSize = !mul(2, 32); } @@ -838,9 +838,9 @@ defm : Zn3WriteResInt; defm : Zn3WriteResIntPair; // FIXME: not from llvm-exegesis // Floating point. This covers both scalar and vector operations. -defm : Zn3WriteResInt; -defm : Zn3WriteResInt; -defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; defm : Zn3WriteResXMM; defm : Zn3WriteResXMM; defm : Zn3WriteResYMM; diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index 7c630a2b0da08..0bff1884933d8 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -80,13 +80,13 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( uint64_t Val = ValC->getZExtValue() & 255; // If the value is a constant, then we can potentially use larger sets. - if (Alignment > Align(2)) { + if (Alignment >= Align(4)) { // DWORD aligned AVT = MVT::i32; ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; - if (Subtarget.is64Bit() && Alignment > Align(8)) { // QWORD aligned + if (Subtarget.is64Bit() && Alignment >= Align(8)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 2092675a69246..b466624e13348 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1468,12 +1468,10 @@ X86TTIImpl::getAltInstrCost(VectorType *VecTy, unsigned Opcode0, return InstructionCost::getInvalid(); } -InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, - VectorType *BaseTp, - ArrayRef Mask, - TTI::TargetCostKind CostKind, - int Index, VectorType *SubTp, - ArrayRef Args) { +InstructionCost X86TTIImpl::getShuffleCost( + TTI::ShuffleKind Kind, VectorType *BaseTp, ArrayRef Mask, + TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, + ArrayRef Args, const Instruction *CxtI) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are widened to type v4i32. std::pair LT = getTypeLegalizationCost(BaseTp); @@ -2664,7 +2662,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, }; static const TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, @@ -5698,8 +5696,10 @@ int X86TTIImpl::getScatterOverhead() const { // Return an average cost of Gather / Scatter instruction, maybe improved later. // FIXME: Add TargetCostKind support. -InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, - const Value *Ptr, Align Alignment, +InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, + TTI::TargetCostKind CostKind, + Type *SrcVTy, const Value *Ptr, + Align Alignment, unsigned AddressSpace) { assert(isa(SrcVTy) && "Unexpected type in getGSVectorCost"); @@ -5750,8 +5750,8 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, // Handle splitting of vector of pointers auto *SplitSrcTy = FixedVectorType::get(SrcVTy->getScalarType(), VF / SplitFactor); - return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment, - AddressSpace); + return SplitFactor * getGSVectorCost(Opcode, CostKind, SplitSrcTy, Ptr, + Alignment, AddressSpace); } // The gather / scatter cost is given by Intel architects. It is a rough @@ -5771,15 +5771,15 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, /// VariableMask - The mask is non-constant at compile time. /// Alignment - Alignment for one element. /// AddressSpace - pointer[s] address space. -/// -/// FIXME: Add TargetCostKind support. -InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, - bool VariableMask, Align Alignment, +/// TODO: Remove this and use getCommonMaskedMemoryOpCost directly. +InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, + TTI::TargetCostKind CostKind, + Type *SrcVTy, bool VariableMask, + Align Alignment, unsigned AddressSpace) { Type *ScalarTy = SrcVTy->getScalarType(); unsigned VF = cast(SrcVTy)->getNumElements(); APInt DemandedElts = APInt::getAllOnes(VF); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost MaskUnpackCost = 0; if (VariableMask) { @@ -5848,10 +5848,11 @@ InstructionCost X86TTIImpl::getGatherScatterOpCost( (!isLegalMaskedScatter(SrcVTy, Align(Alignment)) || forceScalarizeMaskedScatter(cast(SrcVTy), Align(Alignment))))) - return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, + return getGSScalarCost(Opcode, CostKind, SrcVTy, VariableMask, Alignment, AddressSpace); - return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); + return getGSVectorCost(Opcode, CostKind, SrcVTy, Ptr, Alignment, + AddressSpace); } bool X86TTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 23035f655098a..8ef9b4f86ffd7 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -150,7 +150,8 @@ class X86TTIImpl : public BasicTTIImplBase { ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, - ArrayRef Args = std::nullopt); + ArrayRef Args = std::nullopt, + const Instruction *CxtI = nullptr); InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, @@ -294,12 +295,12 @@ class X86TTIImpl : public BasicTTIImplBase { private: bool supportsGather() const; - InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy, - bool VariableMask, Align Alignment, - unsigned AddressSpace); - InstructionCost getGSVectorCost(unsigned Opcode, Type *DataTy, - const Value *Ptr, Align Alignment, - unsigned AddressSpace); + InstructionCost getGSScalarCost(unsigned Opcode, TTI::TargetCostKind CostKind, + Type *DataTy, bool VariableMask, + Align Alignment, unsigned AddressSpace); + InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, + Type *DataTy, const Value *Ptr, + Align Alignment, unsigned AddressSpace); int getGatherOverhead() const; int getScatterOverhead() const; diff --git a/llvm/lib/TextAPI/InterfaceFile.cpp b/llvm/lib/TextAPI/InterfaceFile.cpp index 9979df92674cc..79694c90370f4 100644 --- a/llvm/lib/TextAPI/InterfaceFile.cpp +++ b/llvm/lib/TextAPI/InterfaceFile.cpp @@ -54,7 +54,7 @@ void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) { ParentUmbrellas.emplace(Iter, Target_, std::string(Parent)); } -void InterfaceFile::addRPath(const Target &InputTarget, StringRef RPath) { +void InterfaceFile::addRPath(StringRef RPath, const Target &InputTarget) { if (RPath.empty()) return; using RPathEntryT = const std::pair; @@ -198,9 +198,9 @@ InterfaceFile::merge(const InterfaceFile *O) const { IF->addReexportedLibrary(Lib.getInstallName(), Target); for (const auto &[Target, Path] : rpaths()) - IF->addRPath(Target, Path); + IF->addRPath(Path, Target); for (const auto &[Target, Path] : O->rpaths()) - IF->addRPath(Target, Path); + IF->addRPath(Path, Target); for (const auto *Sym : symbols()) { IF->addSymbol(Sym->getKind(), Sym->getName(), Sym->targets(), @@ -319,7 +319,7 @@ InterfaceFile::extract(Architecture Arch) const { for (const auto &It : rpaths()) if (It.first.Arch == Arch) - IF->addRPath(It.first, It.second); + IF->addRPath(It.second, It.first); for (const auto &Lib : allowableClients()) for (const auto &Target : Lib.targets()) diff --git a/llvm/lib/TextAPI/TextStubV5.cpp b/llvm/lib/TextAPI/TextStubV5.cpp index d96981035dddc..b072c0b5d69d0 100644 --- a/llvm/lib/TextAPI/TextStubV5.cpp +++ b/llvm/lib/TextAPI/TextStubV5.cpp @@ -672,7 +672,7 @@ Expected parseToInterfaceFile(const Object *File) { F->addParentUmbrella(Target, Lib); for (auto &[Path, Targets] : RPaths) for (auto Target : Targets) - F->addRPath(Target, Path); + F->addRPath(Path, Target); for (auto &[Targets, Symbols] : Exports) for (auto &Sym : Symbols) F->addSymbol(Sym.Kind, Sym.Name, Targets, Sym.Flags); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 4e4a499977669..b9d84d583f495 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -526,25 +526,30 @@ class CallsiteContextGraph { /// Create a clone of Edge's callee and move Edge to that new callee node, /// performing the necessary context id and allocation type updates. /// If callee's caller edge iterator is supplied, it is updated when removing - /// the edge from that list. + /// the edge from that list. If ContextIdsToMove is non-empty, only that + /// subset of Edge's ids are moved to an edge to the new callee. ContextNode * moveEdgeToNewCalleeClone(const std::shared_ptr &Edge, - EdgeIter *CallerEdgeI = nullptr); + EdgeIter *CallerEdgeI = nullptr, + DenseSet ContextIdsToMove = {}); /// Change the callee of Edge to existing callee clone NewCallee, performing /// the necessary context id and allocation type updates. /// If callee's caller edge iterator is supplied, it is updated when removing - /// the edge from that list. + /// the edge from that list. If ContextIdsToMove is non-empty, only that + /// subset of Edge's ids are moved to an edge to the new callee. void moveEdgeToExistingCalleeClone(const std::shared_ptr &Edge, ContextNode *NewCallee, EdgeIter *CallerEdgeI = nullptr, - bool NewClone = false); + bool NewClone = false, + DenseSet ContextIdsToMove = {}); /// Recursively perform cloning on the graph for the given Node and its /// callers, in order to uniquely identify the allocation behavior of an - /// allocation given its context. - void identifyClones(ContextNode *Node, - DenseSet &Visited); + /// allocation given its context. The context ids of the allocation being + /// processed are given in AllocContextIds. + void identifyClones(ContextNode *Node, DenseSet &Visited, + const DenseSet &AllocContextIds); /// Map from each context ID to the AllocationType assigned to that context. std::map ContextIdToAllocationType; @@ -2358,7 +2363,8 @@ void CallsiteContextGraph::exportToDot( template typename CallsiteContextGraph::ContextNode * CallsiteContextGraph::moveEdgeToNewCalleeClone( - const std::shared_ptr &Edge, EdgeIter *CallerEdgeI) { + const std::shared_ptr &Edge, EdgeIter *CallerEdgeI, + DenseSet ContextIdsToMove) { ContextNode *Node = Edge->Callee; NodeOwner.push_back( std::make_unique(Node->IsAllocation, Node->Call)); @@ -2366,7 +2372,8 @@ CallsiteContextGraph::moveEdgeToNewCalleeClone( Node->addClone(Clone); assert(NodeToCallingFunc.count(Node)); NodeToCallingFunc[Clone] = NodeToCallingFunc[Node]; - moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true); + moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true, + ContextIdsToMove); return Clone; } @@ -2374,23 +2381,81 @@ template void CallsiteContextGraph:: moveEdgeToExistingCalleeClone(const std::shared_ptr &Edge, ContextNode *NewCallee, EdgeIter *CallerEdgeI, - bool NewClone) { + bool NewClone, + DenseSet ContextIdsToMove) { // NewCallee and Edge's current callee must be clones of the same original // node (Edge's current callee may be the original node too). assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode()); - auto &EdgeContextIds = Edge->getContextIds(); + ContextNode *OldCallee = Edge->Callee; - if (CallerEdgeI) - *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI); - else - OldCallee->eraseCallerEdge(Edge.get()); - Edge->Callee = NewCallee; - NewCallee->CallerEdges.push_back(Edge); - // Don't need to update Edge's context ids since we are simply reconnecting - // it. - set_subtract(OldCallee->ContextIds, EdgeContextIds); - NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end()); - NewCallee->AllocTypes |= Edge->AllocTypes; + + // We might already have an edge to the new callee from earlier cloning for a + // different allocation. If one exists we will reuse it. + auto ExistingEdgeToNewCallee = NewCallee->findEdgeFromCaller(Edge->Caller); + + // Callers will pass an empty ContextIdsToMove set when they want to move the + // edge. Copy in Edge's ids for simplicity. + if (ContextIdsToMove.empty()) + ContextIdsToMove = Edge->getContextIds(); + + // If we are moving all of Edge's ids, then just move the whole Edge. + // Otherwise only move the specified subset, to a new edge if needed. + if (Edge->getContextIds().size() == ContextIdsToMove.size()) { + // Moving the whole Edge. + if (CallerEdgeI) + *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI); + else + OldCallee->eraseCallerEdge(Edge.get()); + if (ExistingEdgeToNewCallee) { + // Since we already have an edge to NewCallee, simply move the ids + // onto it, and remove the existing Edge. + ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(), + ContextIdsToMove.end()); + ExistingEdgeToNewCallee->AllocTypes |= Edge->AllocTypes; + assert(Edge->ContextIds == ContextIdsToMove); + Edge->ContextIds.clear(); + Edge->AllocTypes = (uint8_t)AllocationType::None; + Edge->Caller->eraseCalleeEdge(Edge.get()); + } else { + // Otherwise just reconnect Edge to NewCallee. + Edge->Callee = NewCallee; + NewCallee->CallerEdges.push_back(Edge); + // Don't need to update Edge's context ids since we are simply + // reconnecting it. + } + // In either case, need to update the alloc types on New Callee. + NewCallee->AllocTypes |= Edge->AllocTypes; + } else { + // Only moving a subset of Edge's ids. + if (CallerEdgeI) + ++CallerEdgeI; + // Compute the alloc type of the subset of ids being moved. + auto CallerEdgeAllocType = computeAllocType(ContextIdsToMove); + if (ExistingEdgeToNewCallee) { + // Since we already have an edge to NewCallee, simply move the ids + // onto it. + ExistingEdgeToNewCallee->getContextIds().insert(ContextIdsToMove.begin(), + ContextIdsToMove.end()); + ExistingEdgeToNewCallee->AllocTypes |= CallerEdgeAllocType; + } else { + // Otherwise, create a new edge to NewCallee for the ids being moved. + auto NewEdge = std::make_shared( + NewCallee, Edge->Caller, CallerEdgeAllocType, ContextIdsToMove); + Edge->Caller->CalleeEdges.push_back(NewEdge); + NewCallee->CallerEdges.push_back(NewEdge); + } + // In either case, need to update the alloc types on NewCallee, and remove + // those ids and update the alloc type on the original Edge. + NewCallee->AllocTypes |= CallerEdgeAllocType; + set_subtract(Edge->ContextIds, ContextIdsToMove); + Edge->AllocTypes = computeAllocType(Edge->ContextIds); + } + // Now perform some updates that are common to all cases: the NewCallee gets + // the moved ids added, and we need to remove those ids from OldCallee and + // update its alloc type (NewCallee alloc type updates handled above). + NewCallee->ContextIds.insert(ContextIdsToMove.begin(), + ContextIdsToMove.end()); + set_subtract(OldCallee->ContextIds, ContextIdsToMove); OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds); // OldCallee alloc type should be None iff its context id set is now empty. assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) == @@ -2402,7 +2467,7 @@ void CallsiteContextGraph:: // The context ids moving to the new callee are the subset of this edge's // context ids and the context ids on the caller edge being moved. DenseSet EdgeContextIdsToMove = - set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds); + set_intersection(OldCalleeEdge->getContextIds(), ContextIdsToMove); set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove); OldCalleeEdge->AllocTypes = computeAllocType(OldCalleeEdge->getContextIds()); @@ -2468,8 +2533,10 @@ void CallsiteContextGraph:: template void CallsiteContextGraph::identifyClones() { DenseSet Visited; - for (auto &Entry : AllocationCallToContextNodeMap) - identifyClones(Entry.second, Visited); + for (auto &Entry : AllocationCallToContextNodeMap) { + Visited.clear(); + identifyClones(Entry.second, Visited, Entry.second->ContextIds); + } Visited.clear(); for (auto &Entry : AllocationCallToContextNodeMap) recursivelyRemoveNoneTypeCalleeEdges(Entry.second, Visited); @@ -2487,7 +2554,8 @@ bool checkColdOrNotCold(uint8_t AllocType) { template void CallsiteContextGraph::identifyClones( - ContextNode *Node, DenseSet &Visited) { + ContextNode *Node, DenseSet &Visited, + const DenseSet &AllocContextIds) { if (VerifyNodes) checkNode(Node, /*CheckEdges=*/false); assert(!Node->CloneOf); @@ -2521,7 +2589,7 @@ void CallsiteContextGraph::identifyClones( } // Ignore any caller we previously visited via another edge. if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) { - identifyClones(Edge->Caller, Visited); + identifyClones(Edge->Caller, Visited, AllocContextIds); } } } @@ -2584,13 +2652,23 @@ void CallsiteContextGraph::identifyClones( if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) break; + // Only need to process the ids along this edge pertaining to the given + // allocation. + auto CallerEdgeContextsForAlloc = + set_intersection(CallerEdge->getContextIds(), AllocContextIds); + if (CallerEdgeContextsForAlloc.empty()) { + ++EI; + continue; + } + auto CallerAllocTypeForAlloc = computeAllocType(CallerEdgeContextsForAlloc); + // Compute the node callee edge alloc types corresponding to the context ids // for this caller edge. std::vector CalleeEdgeAllocTypesForCallerEdge; CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size()); for (auto &CalleeEdge : Node->CalleeEdges) CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes( - CalleeEdge->getContextIds(), CallerEdge->getContextIds())); + CalleeEdge->getContextIds(), CallerEdgeContextsForAlloc)); // Don't clone if doing so will not disambiguate any alloc types amongst // caller edges (including the callee edges that would be cloned). @@ -2605,7 +2683,7 @@ void CallsiteContextGraph::identifyClones( // disambiguated by splitting out different context ids. assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None); assert(Node->AllocTypes != (uint8_t)AllocationType::None); - if (allocTypeToUse(CallerEdge->AllocTypes) == + if (allocTypeToUse(CallerAllocTypeForAlloc) == allocTypeToUse(Node->AllocTypes) && allocTypesMatch( CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) { @@ -2618,7 +2696,7 @@ void CallsiteContextGraph::identifyClones( ContextNode *Clone = nullptr; for (auto *CurClone : Node->Clones) { if (allocTypeToUse(CurClone->AllocTypes) != - allocTypeToUse(CallerEdge->AllocTypes)) + allocTypeToUse(CallerAllocTypeForAlloc)) continue; if (!allocTypesMatch( @@ -2630,9 +2708,11 @@ void CallsiteContextGraph::identifyClones( // The edge iterator is adjusted when we move the CallerEdge to the clone. if (Clone) - moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI); + moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI, /*NewClone=*/false, + CallerEdgeContextsForAlloc); else - Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI); + Clone = + moveEdgeToNewCalleeClone(CallerEdge, &EI, CallerEdgeContextsForAlloc); assert(EI == Node->CallerEdges.end() || Node->AllocTypes != (uint8_t)AllocationType::None); diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index b5f45a252c7b4..0b3a6931e779b 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -2059,7 +2059,7 @@ bool SampleProfileLoader::doInitialization(Module &M, // Load pseudo probe descriptors for probe-based function samples. if (Reader->profileIsProbeBased()) { - ProbeManager = std::make_unique(M, LTOPhase); + ProbeManager = std::make_unique(M); if (!ProbeManager->moduleIsProbed(M)) { const char *Msg = "Pseudo-probe-based profile requires SampleProfileProbePass"; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index db302d7e52684..9ff1e3aa5502e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" @@ -2049,6 +2050,16 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp, } Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1); + + // (icmp eq/ne (or disjoint x, C0), C1) + // -> (icmp eq/ne x, C0^C1) + if (Cmp.isEquality() && match(OrOp1, m_ImmConstant()) && + cast(Or)->isDisjoint()) { + Value *NewC = + Builder.CreateXor(OrOp1, ConstantInt::get(OrOp1->getType(), C)); + return new ICmpInst(Pred, OrOp0, NewC); + } + const APInt *MaskC; if (match(OrOp1, m_APInt(MaskC)) && Cmp.isEquality()) { if (*MaskC == C && (C + 1).isPowerOf2()) { @@ -3453,6 +3464,11 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant( if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(Pred, NegVal, BOp1); if (BO->hasOneUse()) { + // (add nuw A, B) != 0 -> (or A, B) != 0 + if (match(BO, m_NUWAdd(m_Value(), m_Value()))) { + Value *Or = Builder.CreateOr(BOp0, BOp1); + return new ICmpInst(Pred, Or, Constant::getNullValue(BO->getType())); + } Value *Neg = Builder.CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(Pred, BOp0, Neg); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 4d3de76389c28..2d78fcee1152d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3091,6 +3091,13 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { return BinaryOperator::CreateOr(CondVal, FalseVal); } + if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_One(), m_Value(B)))) && + impliesPoison(FalseVal, B)) { + // (A || B) || C --> A || (B | C) + return replaceInstUsesWith( + SI, Builder.CreateLogicalOr(A, Builder.CreateOr(B, FalseVal))); + } + if (auto *LHS = dyn_cast(CondVal)) if (auto *RHS = dyn_cast(FalseVal)) if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ false, @@ -3132,6 +3139,13 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { return BinaryOperator::CreateAnd(CondVal, TrueVal); } + if (match(CondVal, m_OneUse(m_Select(m_Value(A), m_Value(B), m_Zero()))) && + impliesPoison(TrueVal, B)) { + // (A && B) && C --> A && (B & C) + return replaceInstUsesWith( + SI, Builder.CreateLogicalAnd(A, Builder.CreateAnd(B, TrueVal))); + } + if (auto *LHS = dyn_cast(CondVal)) if (auto *RHS = dyn_cast(TrueVal)) if (Value *V = foldLogicOfFCmps(LHS, RHS, /*IsAnd*/ true, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index c691c8b1c55b3..6739b8745d74e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -655,25 +655,33 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } } - // TODO: If we only want bits that already match the signbit then we don't + // We only want bits that already match the signbit then we don't // need to shift. + uint64_t ShiftAmt = SA->getLimitedValue(BitWidth - 1); + if (DemandedMask.countr_zero() >= ShiftAmt) { + if (I->hasNoSignedWrap()) { + unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero(); + unsigned SignBits = + ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI); + if (SignBits > ShiftAmt && SignBits - ShiftAmt >= NumHiDemandedBits) + return I->getOperand(0); + } - // If we can pre-shift a right-shifted constant to the left without - // losing any high bits amd we don't demand the low bits, then eliminate - // the left-shift: - // (C >> X) << LeftShiftAmtC --> (C << RightShiftAmtC) >> X - uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); - Value *X; - Constant *C; - if (DemandedMask.countr_zero() >= ShiftAmt && - match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) { - Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt); - Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C, - LeftShiftAmtC, DL); - if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC, LeftShiftAmtC, - DL) == C) { - Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X); - return InsertNewInstWith(Lshr, I->getIterator()); + // If we can pre-shift a right-shifted constant to the left without + // losing any high bits and we don't demand the low bits, then eliminate + // the left-shift: + // (C >> X) << LeftShiftAmtC --> (C << LeftShiftAmtC) >> X + Value *X; + Constant *C; + if (match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) { + Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt); + Constant *NewC = ConstantFoldBinaryOpOperands(Instruction::Shl, C, + LeftShiftAmtC, DL); + if (ConstantFoldBinaryOpOperands(Instruction::LShr, NewC, + LeftShiftAmtC, DL) == C) { + Instruction *Lshr = BinaryOperator::CreateLShr(NewC, X); + return InsertNewInstWith(Lshr, I->getIterator()); + } } } diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 4bcd5d3880b5f..bc9e35c4f763e 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -13,11 +13,11 @@ add_llvm_component_library(LLVMInstrumentation InstrOrderFile.cpp InstrProfiling.cpp KCFI.cpp + LowerAllowCheckPass.cpp PGOForceFunctionAttrs.cpp PGOInstrumentation.cpp PGOMemOPSizeOpt.cpp PoisonChecking.cpp - RemoveTrapsPass.cpp SanitizerCoverage.cpp SPIRITTAnnotations.cpp SanitizerBinaryMetadata.cpp diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index d0d349c891a37..82f60f4feed45 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -182,20 +182,13 @@ static cl::opt ClWithTls( "platforms that support this"), cl::Hidden, cl::init(true)); -static cl::opt - CSelectiveInstrumentation("hwasan-selective-instrumentation", - cl::desc("Use selective instrumentation"), - cl::Hidden, cl::init(false)); - -static cl::opt ClHotPercentileCutoff( - "hwasan-percentile-cutoff-hot", cl::init(0), - cl::desc("Alternative hot percentile cuttoff." - "By default `-profile-summary-cutoff-hot` is used.")); +static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", + cl::desc("Hot percentile cuttoff.")); static cl::opt - ClRandomSkipRate("hwasan-random-skip-rate", cl::init(0), + ClRandomSkipRate("hwasan-random-rate", cl::desc("Probability value in the range [0.0, 1.0] " - "to skip instrumentation of a function.")); + "to keep instrumentation of a function.")); STATISTIC(NumTotalFuncs, "Number of total funcs"); STATISTIC(NumInstrumentedFuncs, "Number of instrumented funcs"); @@ -317,7 +310,7 @@ class HWAddressSanitizer { }; bool selectiveInstrumentationShouldSkip(Function &F, - FunctionAnalysisManager &FAM); + FunctionAnalysisManager &FAM) const; void initializeModule(); void createHwasanCtorComdat(); @@ -1500,28 +1493,22 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo, } bool HWAddressSanitizer::selectiveInstrumentationShouldSkip( - Function &F, FunctionAnalysisManager &FAM) { + Function &F, FunctionAnalysisManager &FAM) const { if (ClRandomSkipRate.getNumOccurrences()) { std::bernoulli_distribution D(ClRandomSkipRate); - if (D(*Rng)) - return true; - } else { - auto &MAMProxy = FAM.getResult(F); - ProfileSummaryInfo *PSI = - MAMProxy.getCachedResult(*F.getParent()); - if (PSI && PSI->hasProfileSummary()) { - auto &BFI = FAM.getResult(F); - if ((ClHotPercentileCutoff.getNumOccurrences() && - ClHotPercentileCutoff >= 0) - ? PSI->isFunctionHotInCallGraphNthPercentile( - ClHotPercentileCutoff, &F, BFI) - : PSI->isFunctionHotInCallGraph(&F, BFI)) - return true; - } else { - ++NumNoProfileSummaryFuncs; - } + return !D(*Rng); } - return false; + if (!ClHotPercentileCutoff.getNumOccurrences()) + return false; + auto &MAMProxy = FAM.getResult(F); + ProfileSummaryInfo *PSI = + MAMProxy.getCachedResult(*F.getParent()); + if (!PSI || !PSI->hasProfileSummary()) { + ++NumNoProfileSummaryFuncs; + return false; + } + return PSI->isFunctionHotInCallGraphNthPercentile( + ClHotPercentileCutoff, &F, FAM.getResult(F)); } void HWAddressSanitizer::sanitizeFunction(Function &F, @@ -1537,7 +1524,7 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, NumTotalFuncs++; - if (CSelectiveInstrumentation && selectiveInstrumentationShouldSkip(F, FAM)) + if (selectiveInstrumentationShouldSkip(F, FAM)) return; NumInstrumentedFuncs++; diff --git a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp similarity index 59% rename from llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp rename to llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp index d87f7482a21d2..cdc8318f088c2 100644 --- a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/LowerAllowCheckPass.cpp @@ -1,4 +1,4 @@ -//===- RemoveTrapsPass.cpp --------------------------------------*- C++ -*-===// +//===- LowerAllowCheckPass.cpp ----------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,11 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Instrumentation/RemoveTrapsPass.h" +#include "llvm/Transforms/Instrumentation/LowerAllowCheckPass.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -20,33 +21,34 @@ using namespace llvm; -#define DEBUG_TYPE "remove-traps" +#define DEBUG_TYPE "lower-allow-check" -static cl::opt HotPercentileCutoff( - "remove-traps-percentile-cutoff-hot", cl::init(0), - cl::desc("Alternative hot percentile cuttoff. By default " - "`-profile-summary-cutoff-hot` is used.")); +static cl::opt + HotPercentileCutoff("lower-allow-check-percentile-cutoff-hot", + cl::desc("Hot percentile cuttoff.")); static cl::opt - RandomRate("remove-traps-random-rate", cl::init(0.0), + RandomRate("lower-allow-check-random-rate", cl::desc("Probability value in the range [0.0, 1.0] of " - "unconditional pseudo-random checks removal.")); + "unconditional pseudo-random checks.")); STATISTIC(NumChecksTotal, "Number of checks"); STATISTIC(NumChecksRemoved, "Number of removed checks"); static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, const ProfileSummaryInfo *PSI) { - SmallVector Remove; + SmallVector, 16> ReplaceWithValue; std::unique_ptr Rng; + // TODO: + // https://github.com/llvm/llvm-project/pull/84858#discussion_r1520603139 auto ShouldRemove = [&](bool IsHot) { if (!RandomRate.getNumOccurrences()) return IsHot; if (!Rng) Rng = F.getParent()->createRNG(F.getName()); std::bernoulli_distribution D(RandomRate); - return D(*Rng); + return !D(*Rng); }; for (BasicBlock &BB : F) { @@ -56,26 +58,23 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, continue; auto ID = II->getIntrinsicID(); switch (ID) { - case Intrinsic::ubsantrap: { + case Intrinsic::allow_ubsan_check: + case Intrinsic::allow_runtime_check: { ++NumChecksTotal; bool IsHot = false; if (PSI) { - uint64_t Count = 0; - for (const auto *PR : predecessors(&BB)) - Count += BFI.getBlockProfileCount(PR).value_or(0); - - IsHot = - HotPercentileCutoff.getNumOccurrences() - ? (HotPercentileCutoff > 0 && - PSI->isHotCountNthPercentile(HotPercentileCutoff, Count)) - : PSI->isHotCount(Count); + uint64_t Count = BFI.getBlockProfileCount(&BB).value_or(0); + IsHot = PSI->isHotCountNthPercentile(HotPercentileCutoff, Count); } - if (ShouldRemove(IsHot)) { - Remove.push_back(II); + bool ToRemove = ShouldRemove(IsHot); + ReplaceWithValue.push_back({ + II, + ToRemove, + }); + if (ToRemove) ++NumChecksRemoved; - } break; } default: @@ -84,14 +83,16 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, } } - for (IntrinsicInst *I : Remove) + for (auto [I, V] : ReplaceWithValue) { + I->replaceAllUsesWith(ConstantInt::getBool(I->getType(), !V)); I->eraseFromParent(); + } - return !Remove.empty(); + return !ReplaceWithValue.empty(); } -PreservedAnalyses RemoveTrapsPass::run(Function &F, - FunctionAnalysisManager &AM) { +PreservedAnalyses LowerAllowCheckPass::run(Function &F, + FunctionAnalysisManager &AM) { if (F.isDeclaration()) return PreservedAnalyses::all(); auto &MAMProxy = AM.getResult(F); @@ -102,3 +103,8 @@ PreservedAnalyses RemoveTrapsPass::run(Function &F, return removeUbsanTraps(F, BFI, PSI) ? PreservedAnalyses::none() : PreservedAnalyses::all(); } + +bool LowerAllowCheckPass::IsRequested() { + return RandomRate.getNumOccurrences() || + HotPercentileCutoff.getNumOccurrences(); +} diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp index 9caaf720ec913..056be8629b961 100644 --- a/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -4019,7 +4019,7 @@ bool NewGVN::eliminateInstructions(Function &F) { // dominated defs as dead. if (Def) { // For anything in this case, what and how we value number - // guarantees that any side-effets that would have occurred (ie + // guarantees that any side-effects that would have occurred (ie // throwing, etc) can be proven to either still occur (because it's // dominated by something that has the same side-effects), or never // occur. Otherwise, we would not have been able to prove it value diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 915cd81661f02..5396038d8b92b 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -784,7 +784,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, // If the successor only has a single pred, split the top of the successor // block. assert(SP == BB && "CFG broken"); - SP = nullptr; + (void)SP; return SplitBlock(Succ, &Succ->front(), DT, LI, MSSAU, BBName, /*Before=*/true); } diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp index 6a2dae5bab68e..ac106e4aa2a39 100644 --- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -336,9 +336,22 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint, if (isReachedBefore(&I, &InsertPoint, &DT, PDT)) for (const Use &U : I.uses()) - if (auto *UserInst = dyn_cast(U.getUser())) - if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U)) + if (auto *UserInst = dyn_cast(U.getUser())) { + // If InsertPoint is in a BB that comes after I, then we cannot move if + // I is used in the terminator of the current BB. + if (I.getParent() == InsertPoint.getParent() && + UserInst == I.getParent()->getTerminator()) return false; + if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U)) { + // If UserInst is an instruction that appears later in the same BB as + // I, then it is okay to move since I will still be available when + // UserInst is executed. + if (CheckForEntireBlock && I.getParent() == UserInst->getParent() && + DT.dominates(&I, UserInst)) + continue; + return false; + } + } if (isReachedBefore(&InsertPoint, &I, &DT, PDT)) for (const Value *Op : I.operands()) if (auto *OpInst = dyn_cast(Op)) { diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 73c5d63678229..9d816c5220532 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1034,15 +1034,6 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) { } } -Value *llvm::createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, - RecurKind RK, Value *Left, Value *Right) { - if (auto VTy = dyn_cast(Left->getType())) - StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal); - Value *Cmp = - Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp"); - return Builder.CreateSelect(Cmp, Left, Right, "rdx.select"); -} - Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right) { Type *Ty = Left->getType(); @@ -1151,16 +1142,13 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src, NewVal = SI->getTrueValue(); } - // Create a splat vector with the new value and compare this to the vector - // we want to reduce. - ElementCount EC = cast(Src->getType())->getElementCount(); - Value *Right = Builder.CreateVectorSplat(EC, InitVal); - Value *Cmp = - Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp"); - // If any predicate is true it means that we want to select the new value. - Cmp = Builder.CreateOrReduce(Cmp); - return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select"); + Value *AnyOf = + Src->getType()->isVectorTy() ? Builder.CreateOrReduce(Src) : Src; + // The compares in the loop may yield poison, which propagates through the + // bitwise ORs. Freeze it here before the condition is used. + AnyOf = Builder.CreateFreeze(AnyOf); + return Builder.CreateSelect(AnyOf, NewVal, InitVal, "rdx.select"); } Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index ebca2d855a467..ece2a34f180cb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -68,9 +68,7 @@ class VPBuilder { public: VPBuilder() = default; VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); } - VPBuilder(VPRecipeBase *InsertPt) { - setInsertPoint(InsertPt->getParent(), InsertPt->getIterator()); - } + VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); } /// Clear the insertion point: created instructions will not be inserted into /// a block. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0834865173b2f..5535cc55e9321 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -124,6 +124,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/VectorBuilder.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -248,10 +249,12 @@ static cl::opt ForceTailFoldingStyle( clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), - clEnumValN( - TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, - "data-and-control-without-rt-check", - "Similar to data-and-control, but remove the runtime check"))); + clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, + "data-and-control-without-rt-check", + "Similar to data-and-control, but remove the runtime check"), + clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", + "Use predicated EVL instructions for tail folding. If EVL " + "is unsupported, fallback to data-without-lane-mask."))); static cl::opt MaximizeBandwidth( "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, @@ -570,10 +573,6 @@ class InnerLoopVectorizer { /// Fix the non-induction PHIs in \p Plan. void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State); - /// Returns true if the reordering of FP operations is not allowed, but we are - /// able to vectorize with strict in-order reductions for the given RdxDesc. - bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc); - /// Create a new phi node for the induction variable \p OrigPhi to resume /// iteration count in the scalar epilogue, from where the vectorized loop /// left off. \p Step is the SCEV-expanded induction step to use. In cases @@ -1505,29 +1504,62 @@ class LoopVectorizationCostModel { /// Returns the TailFoldingStyle that is best for the current loop. TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const { - return IVUpdateMayOverflow ? ChosenTailFoldingStyle.first - : ChosenTailFoldingStyle.second; + if (!ChosenTailFoldingStyle) + return TailFoldingStyle::None; + return IVUpdateMayOverflow ? ChosenTailFoldingStyle->first + : ChosenTailFoldingStyle->second; } /// Selects and saves TailFoldingStyle for 2 options - if IV update may /// overflow or not. - void setTailFoldingStyles() { - assert(ChosenTailFoldingStyle.first == TailFoldingStyle::None && - ChosenTailFoldingStyle.second == TailFoldingStyle::None && - "Tail folding must not be selected yet."); - if (!Legal->prepareToFoldTailByMasking()) + /// \param IsScalableVF true if scalable vector factors enabled. + /// \param UserIC User specific interleave count. + void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) { + assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet."); + if (!Legal->prepareToFoldTailByMasking()) { + ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); return; + } - if (ForceTailFoldingStyle.getNumOccurrences()) { - ChosenTailFoldingStyle.first = ChosenTailFoldingStyle.second = - ForceTailFoldingStyle; + if (!ForceTailFoldingStyle.getNumOccurrences()) { + ChosenTailFoldingStyle = std::make_pair( + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true), + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false)); return; } - ChosenTailFoldingStyle.first = - TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true); - ChosenTailFoldingStyle.second = - TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false); + // Set styles when forced. + ChosenTailFoldingStyle = std::make_pair(ForceTailFoldingStyle.getValue(), + ForceTailFoldingStyle.getValue()); + if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL) + return; + // Override forced styles if needed. + // FIXME: use actual opcode/data type for analysis here. + // FIXME: Investigate opportunity for fixed vector factor. + bool EVLIsLegal = + IsScalableVF && UserIC <= 1 && + TTI.hasActiveVectorLength(0, nullptr, Align()) && + !EnableVPlanNativePath && + // FIXME: implement support for max safe dependency distance. + Legal->isSafeForAnyVectorWidth() && + // FIXME: remove this once reductions are supported. + Legal->getReductionVars().empty(); + if (!EVLIsLegal) { + // If for some reason EVL mode is unsupported, fallback to + // DataWithoutLaneMask to try to vectorize the loop with folded tail + // in a generic way. + ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::DataWithoutLaneMask, + TailFoldingStyle::DataWithoutLaneMask); + LLVM_DEBUG( + dbgs() + << "LV: Preference for VP intrinsics indicated. Will " + "not try to generate VP Intrinsics " + << (UserIC > 1 + ? "since interleave count specified is greater than 1.\n" + : "due to non-interleaving reasons.\n")); + } } /// Returns true if all loop blocks should be masked to fold tail loop. @@ -1544,6 +1576,18 @@ class LoopVectorizationCostModel { return foldTailByMasking() || Legal->blockNeedsPredication(BB); } + /// Returns true if VP intrinsics with explicit vector length support should + /// be generated in the tail folded loop. + bool foldTailWithEVL() const { + return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL && + // FIXME: remove this once vp_reverse is supported. + none_of( + WideningDecisions, + [](const std::pair, + std::pair> + &Data) { return Data.second.first == CM_Widen_Reverse; }); + } + /// Returns true if the Phi is part of an inloop reduction. bool isInLoopReduction(PHINode *Phi) const { return InLoopReductions.contains(Phi); @@ -1688,8 +1732,8 @@ class LoopVectorizationCostModel { /// Control finally chosen tail folding style. The first element is used if /// the IV update may overflow, the second element - if it does not. - std::pair ChosenTailFoldingStyle = - std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); + std::optional> + ChosenTailFoldingStyle; /// A map holding scalar costs for different vectorization factors. The /// presence of a cost for an instruction in the mapping indicates that the @@ -3007,9 +3051,8 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue( } // Create phi nodes to merge from the backedge-taken check block. - PHINode *BCResumeVal = - PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val", - LoopScalarPreHeader->getTerminator()->getIterator()); + PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val", + LoopScalarPreHeader->getFirstNonPHI()); // Copy original phi DL over to the new one. BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc()); @@ -3667,11 +3710,6 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan, } } -bool InnerLoopVectorizer::useOrderedReductions( - const RecurrenceDescriptor &RdxDesc) { - return Cost->useOrderedReductions(RdxDesc); -} - void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) { // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does @@ -4647,9 +4685,24 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - setTailFoldingStyles(); - if (foldTailByMasking()) + setTailFoldingStyles(MaxFactors.ScalableVF.isScalable(), UserIC); + if (foldTailByMasking()) { + if (getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) { + LLVM_DEBUG( + dbgs() + << "LV: tail is folded with EVL, forcing unroll factor to be 1. Will " + "try to generate VP Intrinsics with scalable vector " + "factors only.\n"); + // Tail folded loop using VP intrinsics restricts the VF to be scalable + // for now. + // TODO: extend it for fixed vectors, if required. + assert(MaxFactors.ScalableVF.isScalable() && + "Expected scalable vector factor."); + + MaxFactors.FixedVF = ElementCount::getFixed(1); + } return MaxFactors; + } // If there was a tail-folding hint/switch, but we can't fold the tail by // masking, fallback to a vectorization with a scalar epilogue. @@ -5257,6 +5310,13 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, if (!isScalarEpilogueAllowed()) return 1; + // Do not interleave if EVL is preferred and no User IC is specified. + if (foldTailWithEVL()) { + LLVM_DEBUG(dbgs() << "LV: Preference for VP intrinsics indicated. " + "Unroll factor forced to be 1.\n"); + return 1; + } + // We used the distance for the interleave count. if (!Legal->isSafeForAnyVectorWidth()) return 1; @@ -7390,7 +7450,6 @@ static void createAndCollectMergePhiForReduction( auto *PhiR = cast(RedResult->getOperand(0)); const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - TrackingVH ReductionStartValue = RdxDesc.getRecurrenceStartValue(); Value *FinalValue = State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane())); auto *ResumePhi = @@ -7415,7 +7474,7 @@ static void createAndCollectMergePhiForReduction( BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming), Incoming); else - BCBlockPhi->addIncoming(ReductionStartValue, Incoming); + BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming); } auto *OrigPhi = cast(PhiR->getUnderlyingValue()); @@ -7708,11 +7767,10 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( // Now, compare the remaining count and if there aren't enough iterations to // execute the vectorized epilogue skip to the scalar part. - BasicBlock *VecEpilogueIterationCountCheck = LoopVectorPreHeader; - VecEpilogueIterationCountCheck->setName("vec.epilog.iter.check"); - LoopVectorPreHeader = - SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT, - LI, nullptr, "vec.epilog.ph"); + LoopVectorPreHeader->setName("vec.epilog.ph"); + BasicBlock *VecEpilogueIterationCountCheck = + SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->begin(), DT, LI, + nullptr, "vec.epilog.iter.check", true); emitMinimumVectorEpilogueIterCountCheck(LoopScalarPreHeader, VecEpilogueIterationCountCheck); @@ -8163,18 +8221,22 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, // builder. At this point we generate the predication tree. There may be // duplications since this is a simple recursive scan, but future // optimizations will clean it up. + // TODO: At the moment the first mask is always skipped, but it would be + // better to skip the most expensive mask. SmallVector OperandsWithMask; for (unsigned In = 0; In < NumIncoming; In++) { OperandsWithMask.push_back(Operands[In]); VPValue *EdgeMask = - createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent()); + getEdgeMask(Phi->getIncomingBlock(In), Phi->getParent()); if (!EdgeMask) { assert(In == 0 && "Both null and non-null edge masks found"); assert(all_equal(Operands) && "Distinct incoming values with one having a full mask"); break; } + if (In == 0) + continue; OperandsWithMask.push_back(EdgeMask); } return new VPBlendRecipe(Phi, OperandsWithMask); @@ -8487,6 +8549,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, VPlanTransforms::truncateToMinimalBitwidths( *Plan, CM.getMinimalBitwidths(), PSE.getSE()->getContext()); VPlanTransforms::optimize(*Plan, *PSE.getSE()); + // TODO: try to put it close to addActiveLaneMask(). + if (CM.foldTailWithEVL()) + VPlanTransforms::addExplicitVectorLength(*Plan); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); VPlans.push_back(std::move(Plan)); } @@ -8828,6 +8893,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // A ComputeReductionResult recipe is added to the middle block, also for // in-loop reductions which compute their result in-loop, because generating // the subsequent bc.merge.rdx phi is driven by ComputeReductionResult recipes. +// +// Adjust AnyOf reductions; replace the reduction phi for the selected value +// with a boolean reduction phi node to check if the condition is true in any +// iteration. The final value is selected by the final ComputeReductionResult. void LoopVectorizationPlanner::adjustRecipesForReductions( VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) { @@ -8982,8 +9051,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( if (CM.blockNeedsPredicationForAnyReason(BB)) CondOp = RecipeBuilder.getBlockInMask(BB); - VPReductionRecipe *RedRecipe = new VPReductionRecipe( - RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp); + VPReductionRecipe *RedRecipe = + new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp, + CondOp, CM.useOrderedReductions(RdxDesc)); // Append the recipe to the end of the VPBasicBlock because we need to // ensure that it comes after all of it's inputs, including CondOp. // Note that this transformation may leave over dead recipes (including @@ -9001,6 +9071,41 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( continue; const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); + // Adjust AnyOf reductions; replace the reduction phi for the selected value + // with a boolean reduction phi node to check if the condition is true in + // any iteration. The final value is selected by the final + // ComputeReductionResult. + if (RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + auto *Select = cast(*find_if(PhiR->users(), [](VPUser *U) { + return isa(U) || + (isa(U) && + cast(U)->getUnderlyingInstr()->getOpcode() == + Instruction::Select); + })); + VPValue *Cmp = Select->getOperand(0); + // If the compare is checking the reduction PHI node, adjust it to check + // the start value. + if (VPRecipeBase *CmpR = Cmp->getDefiningRecipe()) { + for (unsigned I = 0; I != CmpR->getNumOperands(); ++I) + if (CmpR->getOperand(I) == PhiR) + CmpR->setOperand(I, PhiR->getStartValue()); + } + VPBuilder::InsertPointGuard Guard(Builder); + Builder.setInsertPoint(Select); + + // If the true value of the select is the reduction phi, the new value is + // selected if the negated condition is true in any iteration. + if (Select->getOperand(1) == PhiR) + Cmp = Builder.createNot(Cmp); + VPValue *Or = Builder.createOr(PhiR, Cmp); + Select->getVPSingleValue()->replaceAllUsesWith(Or); + + // Convert the reduction phi to operate on bools. + PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse( + OrigLoop->getHeader()->getContext()))); + } + // If tail is folded by masking, introduce selects between the phi // and the live-out instruction of each reduction, at the beginning of the // dedicated latch block. @@ -9033,7 +9138,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( // then extend the loop exit value to enable InstCombine to evaluate the // entire expression in the smaller type. Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType(); - if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) { + if (MinVF.isVector() && PhiTy != RdxDesc.getRecurrenceType() && + !RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!"); Type *RdxTy = RdxDesc.getRecurrenceType(); auto *Trunc = @@ -9179,7 +9286,7 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags()); Value *Step = State.get(getStepValue(), VPIteration(0, 0)); - Value *CanonicalIV = State.get(getCanonicalIV(), VPIteration(0, 0)); + Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0)); Value *DerivedIV = emitTransformedIndex( State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); @@ -9196,57 +9303,6 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { NeedsMaskForGaps); } -void VPReductionRecipe::execute(VPTransformState &State) { - assert(!State.Instance && "Reduction being replicated."); - Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true); - RecurKind Kind = RdxDesc.getRecurrenceKind(); - bool IsOrdered = State.ILV->useOrderedReductions(RdxDesc); - // Propagate the fast-math flags carried by the underlying instruction. - IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); - State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *NewVecOp = State.get(getVecOp(), Part); - if (VPValue *Cond = getCondOp()) { - Value *NewCond = State.get(Cond, Part, State.VF.isScalar()); - VectorType *VecTy = dyn_cast(NewVecOp->getType()); - Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType(); - Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy, - RdxDesc.getFastMathFlags()); - if (State.VF.isVector()) { - Iden = - State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden); - } - - Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden); - NewVecOp = Select; - } - Value *NewRed; - Value *NextInChain; - if (IsOrdered) { - if (State.VF.isVector()) - NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp, - PrevInChain); - else - NewRed = State.Builder.CreateBinOp( - (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, - NewVecOp); - PrevInChain = NewRed; - } else { - PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true); - NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp); - } - if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { - NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), - NewRed, PrevInChain); - } else if (IsOrdered) - NextInChain = NewRed; - else - NextInChain = State.Builder.CreateBinOp( - (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); - State.set(this, NextInChain, Part, /*IsScalar*/ true); - } -} - void VPReplicateRecipe::execute(VPTransformState &State) { Instruction *UI = getUnderlyingInstr(); if (State.Instance) { // Generate a single instance. @@ -9307,6 +9363,52 @@ void VPReplicateRecipe::execute(VPTransformState &State) { State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State); } +/// Creates either vp_store or vp_scatter intrinsics calls to represent +/// predicated store/scatter. +static Instruction * +lowerStoreUsingVectorIntrinsics(IRBuilderBase &Builder, Value *Addr, + Value *StoredVal, bool IsScatter, Value *Mask, + Value *EVL, const Align &Alignment) { + CallInst *Call; + if (IsScatter) { + Call = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()), + Intrinsic::vp_scatter, + {StoredVal, Addr, Mask, EVL}); + } else { + VectorBuilder VBuilder(Builder); + VBuilder.setEVL(EVL).setMask(Mask); + Call = cast(VBuilder.createVectorInstruction( + Instruction::Store, Type::getVoidTy(EVL->getContext()), + {StoredVal, Addr})); + } + Call->addParamAttr( + 1, Attribute::getWithAlignment(Call->getContext(), Alignment)); + return Call; +} + +/// Creates either vp_load or vp_gather intrinsics calls to represent +/// predicated load/gather. +static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder, + VectorType *DataTy, + Value *Addr, bool IsGather, + Value *Mask, Value *EVL, + const Align &Alignment) { + CallInst *Call; + if (IsGather) { + Call = + Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL}, + nullptr, "wide.masked.gather"); + } else { + VectorBuilder VBuilder(Builder); + VBuilder.setEVL(EVL).setMask(Mask); + Call = cast(VBuilder.createVectorInstruction( + Instruction::Load, DataTy, Addr, "vp.op.load")); + } + Call->addParamAttr( + 0, Attribute::getWithAlignment(Call->getContext(), Alignment)); + return Call; +} + void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; @@ -9345,7 +9447,25 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { for (unsigned Part = 0; Part < State.UF; ++Part) { Instruction *NewSI = nullptr; Value *StoredVal = State.get(StoredValue, Part); - if (CreateGatherScatter) { + // TODO: split this into several classes for better design. + if (State.EVL) { + assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " + "explicit vector length."); + assert(cast(State.EVL)->getOpcode() == + VPInstruction::ExplicitVectorLength && + "EVL must be VPInstruction::ExplicitVectorLength."); + Value *EVL = State.get(State.EVL, VPIteration(0, 0)); + // If EVL is not nullptr, then EVL must be a valid value set during plan + // creation, possibly default value = whole vector register length. EVL + // is created only if TTI prefers predicated vectorization, thus if EVL + // is not nullptr it also implies preference for predicated + // vectorization. + // FIXME: Support reverse store after vp_reverse is added. + Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + NewSI = lowerStoreUsingVectorIntrinsics( + Builder, State.get(getAddr(), Part, !CreateGatherScatter), + StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment); + } else if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; Value *VectorGep = State.get(getAddr(), Part); NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, @@ -9375,7 +9495,25 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); for (unsigned Part = 0; Part < State.UF; ++Part) { Value *NewLI; - if (CreateGatherScatter) { + // TODO: split this into several classes for better design. + if (State.EVL) { + assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " + "explicit vector length."); + assert(cast(State.EVL)->getOpcode() == + VPInstruction::ExplicitVectorLength && + "EVL must be VPInstruction::ExplicitVectorLength."); + Value *EVL = State.get(State.EVL, VPIteration(0, 0)); + // If EVL is not nullptr, then EVL must be a valid value set during plan + // creation, possibly default value = whole vector register length. EVL + // is created only if TTI prefers predicated vectorization, thus if EVL + // is not nullptr it also implies preference for predicated + // vectorization. + // FIXME: Support reverse loading after vp_reverse is added. + Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + NewLI = lowerLoadUsingVectorIntrinsics( + Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter), + CreateGatherScatter, MaskPart, EVL, Alignment); + } else if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; Value *VectorGep = State.get(getAddr(), Part); NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart, @@ -10043,9 +10181,19 @@ bool LoopVectorizePass::processLoop(Loop *L) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast(&R)) { - ResumeV = ReductionResumeValues - .find(&ReductionPhi->getRecurrenceDescriptor()) - ->second; + const RecurrenceDescriptor &RdxDesc = + ReductionPhi->getRecurrenceDescriptor(); + RecurKind RK = RdxDesc.getRecurrenceKind(); + ResumeV = ReductionResumeValues.find(&RdxDesc)->second; + if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { + // VPReductionPHIRecipes for AnyOf reductions expect a boolean as + // start value; compare the final value from the main vector loop + // to the start value. + IRBuilder<> Builder( + cast(ResumeV)->getParent()->getFirstNonPHI()); + ResumeV = Builder.CreateICmpNE(ResumeV, + RdxDesc.getRecurrenceStartValue()); + } } else { // Create induction resume values for both widened pointer and // integer/fp inductions and update the start value of the induction diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 79d146aec0c56..c3dcf73b0b762 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7056,19 +7056,16 @@ bool BoUpSLP::areAllUsersVectorized( static std::pair getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy, - TargetTransformInfo *TTI, TargetLibraryInfo *TLI) { + TargetTransformInfo *TTI, TargetLibraryInfo *TLI, + ArrayRef ArgTys) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. - SmallVector VecTys; - for (Use &Arg : CI->args()) - VecTys.push_back( - FixedVectorType::get(Arg->getType(), VecTy->getNumElements())); FastMathFlags FMF; if (auto *FPCI = dyn_cast(CI)) FMF = FPCI->getFastMathFlags(); SmallVector Arguments(CI->args()); - IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF, + IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, ArgTys, FMF, dyn_cast(CI)); auto IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput); @@ -7081,8 +7078,8 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy, if (!CI->isNoBuiltin() && VecFunc) { // Calculate the cost of the vector library call. // If the corresponding vector call is cheaper, return its cost. - LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys, - TTI::TCK_RecipThroughput); + LibCost = + TTI->getCallInstrCost(nullptr, VecTy, ArgTys, TTI::TCK_RecipThroughput); } return {IntrinsicCost, LibCost}; } @@ -8508,6 +8505,30 @@ TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const { return TTI::CastContextHint::None; } +/// Builds the arguments types vector for the given call instruction with the +/// given \p ID for the specified vector factor. +static SmallVector buildIntrinsicArgTypes(const CallInst *CI, + const Intrinsic::ID ID, + const unsigned VF, + unsigned MinBW) { + SmallVector ArgTys; + for (auto [Idx, Arg] : enumerate(CI->args())) { + if (ID != Intrinsic::not_intrinsic) { + if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx)) { + ArgTys.push_back(Arg->getType()); + continue; + } + if (MinBW > 0) { + ArgTys.push_back(FixedVectorType::get( + IntegerType::get(CI->getContext(), MinBW), VF)); + continue; + } + } + ArgTys.push_back(FixedVectorType::get(Arg->getType(), VF)); + } + return ArgTys; +} + InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, SmallPtrSetImpl &CheckedExtracts) { @@ -9074,7 +9095,11 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, }; auto GetVectorCost = [=](InstructionCost CommonCost) { auto *CI = cast(VL0); - auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); + Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); + SmallVector ArgTys = + buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(), + It != MinBWs.end() ? It->second.first : 0); + auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys); return std::min(VecCallCosts.first, VecCallCosts.second) + CommonCost; }; return GetCostDiff(GetScalarCost, GetVectorCost); @@ -9824,11 +9849,13 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { if (BWIt != MinBWs.end()) { Type *DstTy = Root.Scalars.front()->getType(); unsigned OriginalSz = DL->getTypeSizeInBits(DstTy); - if (OriginalSz != BWIt->second.first) { + unsigned SrcSz = + ReductionBitWidth == 0 ? BWIt->second.first : ReductionBitWidth; + if (OriginalSz != SrcSz) { unsigned Opcode = Instruction::Trunc; - if (OriginalSz < BWIt->second.first) + if (OriginalSz > SrcSz) Opcode = BWIt->second.second ? Instruction::SExt : Instruction::ZExt; - Type *SrcTy = IntegerType::get(DstTy->getContext(), BWIt->second.first); + Type *SrcTy = IntegerType::get(DstTy->getContext(), SrcSz); Cost += TTI->getCastInstrCost(Opcode, DstTy, SrcTy, TTI::CastContextHint::None, TTI::TCK_RecipThroughput); @@ -11142,7 +11169,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { VF = std::max(VF, SubVecVF); } // Adjust SubMask. - for (auto [I, Idx] : enumerate(SubMask)) + for (int &Idx : SubMask) if (Idx != PoisonMaskElem) Idx += VF; copy(SubMask, std::next(VecMask.begin(), Part * SliceSize)); @@ -11728,8 +11755,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { VecOp = TE->VectorizedValue; if (!Vec1) { Vec1 = VecOp; - } else if (Vec1 != EI->getVectorOperand()) { - assert((!Vec2 || Vec2 == EI->getVectorOperand()) && + } else if (Vec1 != VecOp) { + assert((!Vec2 || Vec2 == VecOp) && "Expected only 1 or 2 vectors shuffle."); Vec2 = VecOp; } @@ -11769,8 +11796,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) { VecMask.assign(VecMask.size(), PoisonMaskElem); copy(SubMask, std::next(VecMask.begin(), I * SliceSize)); if (TEs.size() == 1) { - IsUsedInExpr &= - FindReusedSplat(VecMask, TEs.front()->getVectorFactor(), I, SliceSize); + IsUsedInExpr &= FindReusedSplat( + VecMask, TEs.front()->getVectorFactor(), I, SliceSize); ShuffleBuilder.add(*TEs.front(), VecMask); if (TEs.front()->VectorizedValue) IsNonPoisoned &= @@ -12546,7 +12573,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); + SmallVector ArgTys = + buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(), + It != MinBWs.end() ? It->second.first : 0); + auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys); bool UseIntrinsic = ID != Intrinsic::not_intrinsic && VecCallCosts.first <= VecCallCosts.second; @@ -12555,8 +12585,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { SmallVector TysForDecl; // Add return type if intrinsic is overloaded on it. if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) - TysForDecl.push_back( - FixedVectorType::get(CI->getType(), E->Scalars.size())); + TysForDecl.push_back(VecTy); auto *CEI = cast(VL0); for (unsigned I : seq(0, CI->arg_size())) { ValueList OpVL; @@ -12564,7 +12593,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { ScalarArg = CEI->getArgOperand(I); - OpVecs.push_back(CEI->getArgOperand(I)); + // if decided to reduce bitwidth of abs intrinsic, it second argument + // must be set false (do not return poison, if value issigned min). + if (ID == Intrinsic::abs && It != MinBWs.end() && + It->second.first < DL->getTypeSizeInBits(CEI->getType())) + ScalarArg = Builder.getFalse(); + OpVecs.push_back(ScalarArg); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) TysForDecl.push_back(ScalarArg->getType()); continue; @@ -12577,10 +12611,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } ScalarArg = CEI->getArgOperand(I); if (cast(OpVec->getType())->getElementType() != - ScalarArg->getType()) { + ScalarArg->getType() && + It == MinBWs.end()) { auto *CastTy = FixedVectorType::get(ScalarArg->getType(), VecTy->getNumElements()); OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); + } else if (It != MinBWs.end()) { + OpVec = Builder.CreateIntCast(OpVec, VecTy, GetOperandSignedness(I)); } LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); @@ -14123,6 +14160,11 @@ bool BoUpSLP::collectValuesToDemote( unsigned BitWidth1 = OrigBitWidth - NumSignBits; if (!isKnownNonNegative(V, SimplifyQuery(*DL))) ++BitWidth1; + if (auto *I = dyn_cast(V)) { + APInt Mask = DB->getDemandedBits(I); + unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero(); + BitWidth1 = std::min(BitWidth1, BitWidth2); + } BitWidth = std::max(BitWidth, BitWidth1); return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2); }; @@ -14141,6 +14183,17 @@ bool BoUpSLP::collectValuesToDemote( })) return FinalAnalysis(); + if (!all_of(I->users(), + [=](User *U) { + return getTreeEntry(U) || + (UserIgnoreList && UserIgnoreList->contains(U)) || + (U->getType()->isSized() && + !U->getType()->isScalableTy() && + DL->getTypeSizeInBits(U->getType()) <= BitWidth); + }) && + !IsPotentiallyTruncated(I, BitWidth)) + return false; + unsigned Start = 0; unsigned End = I->getNumOperands(); @@ -14313,6 +14366,62 @@ bool BoUpSLP::collectValuesToDemote( return TryProcessInstruction(I, *ITE, BitWidth, Ops); } + case Instruction::Call: { + auto *IC = dyn_cast(I); + if (!IC) + break; + Intrinsic::ID ID = getVectorIntrinsicIDForCall(IC, TLI); + if (ID != Intrinsic::abs && ID != Intrinsic::smin && + ID != Intrinsic::smax && ID != Intrinsic::umin && ID != Intrinsic::umax) + break; + SmallVector Operands(1, I->getOperand(0)); + function_ref CallChecker; + auto CompChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { + assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); + if (ID == Intrinsic::umin || ID == Intrinsic::umax) { + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) && + MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)); + } + assert((ID == Intrinsic::smin || ID == Intrinsic::smax) && + "Expected min/max intrinsics only."); + unsigned SignBits = OrigBitWidth - BitWidth; + return SignBits <= ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, + nullptr, DT) && + SignBits <= + ComputeNumSignBits(I->getOperand(1), *DL, 0, AC, nullptr, DT); + }; + End = 1; + if (ID != Intrinsic::abs) { + Operands.push_back(I->getOperand(1)); + End = 2; + CallChecker = CompChecker; + } + InstructionCost BestCost = + std::numeric_limits::max(); + unsigned BestBitWidth = BitWidth; + unsigned VF = ITE->Scalars.size(); + // Choose the best bitwidth based on cost estimations. + auto Checker = [&](unsigned BitWidth, unsigned) { + unsigned MinBW = PowerOf2Ceil(BitWidth); + SmallVector ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW); + auto VecCallCosts = getVectorCallCosts( + IC, + FixedVectorType::get(IntegerType::get(IC->getContext(), MinBW), VF), + TTI, TLI, ArgTys); + InstructionCost Cost = std::min(VecCallCosts.first, VecCallCosts.second); + if (Cost < BestCost) { + BestCost = Cost; + BestBitWidth = BitWidth; + } + return false; + }; + [[maybe_unused]] bool NeedToExit; + (void)AttemptCheckBitwidth(Checker, NeedToExit); + BitWidth = BestBitWidth; + return TryProcessInstruction(I, *ITE, BitWidth, Operands, CallChecker); + } + // Otherwise, conservatively give up. default: break; @@ -14321,6 +14430,8 @@ bool BoUpSLP::collectValuesToDemote( return FinalAnalysis(); } +static RecurKind getRdxKind(Value *V); + void BoUpSLP::computeMinimumValueSizes() { // We only attempt to truncate integer expressions. bool IsStoreOrInsertElt = @@ -14371,7 +14482,7 @@ void BoUpSLP::computeMinimumValueSizes() { auto ComputeMaxBitWidth = [&](ArrayRef TreeRoot, unsigned VF, bool IsTopRoot, bool IsProfitableToDemoteRoot, unsigned Opcode, unsigned Limit, - bool IsTruncRoot) { + bool IsTruncRoot, bool IsSignedCmp) { ToDemote.clear(); auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); if (!TreeRootIT || !Opcode) @@ -14394,7 +14505,7 @@ void BoUpSLP::computeMinimumValueSizes() { // True. // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. - bool IsKnownPositive = all_of(TreeRoot, [&](Value *R) { + bool IsKnownPositive = !IsSignedCmp && all_of(TreeRoot, [&](Value *R) { KnownBits Known = computeKnownBits(R, *DL); return Known.isNonNegative(); }); @@ -14481,8 +14592,11 @@ void BoUpSLP::computeMinimumValueSizes() { unsigned BitWidth1 = NumTypeBits - NumSignBits; if (!isKnownNonNegative(V, SimplifyQuery(*DL))) ++BitWidth1; - auto Mask = DB->getDemandedBits(cast(V)); - unsigned BitWidth2 = Mask.getBitWidth() - Mask.countl_zero(); + unsigned BitWidth2 = BitWidth1; + if (!RecurrenceDescriptor::isIntMinMaxRecurrenceKind(::getRdxKind(V))) { + auto Mask = DB->getDemandedBits(cast(V)); + BitWidth2 = Mask.getBitWidth() - Mask.countl_zero(); + } ReductionBitWidth = std::max(std::min(BitWidth1, BitWidth2), ReductionBitWidth); } @@ -14499,6 +14613,7 @@ void BoUpSLP::computeMinimumValueSizes() { ++NodeIdx; IsTruncRoot = true; } + bool IsSignedCmp = false; while (NodeIdx < VectorizableTree.size()) { ArrayRef TreeRoot = VectorizableTree[NodeIdx]->Scalars; unsigned Limit = 2; @@ -14510,7 +14625,7 @@ void BoUpSLP::computeMinimumValueSizes() { Limit = 3; unsigned MaxBitWidth = ComputeMaxBitWidth( TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot, - IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot); + IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot, IsSignedCmp); if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) { if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth) ReductionBitWidth = bit_ceil(MaxBitWidth); @@ -14548,6 +14663,16 @@ void BoUpSLP::computeMinimumValueSizes() { EI.UserTE->getOpcode() == Instruction::Trunc && !EI.UserTE->isAltShuffle(); }); + IsSignedCmp = + NodeIdx < VectorizableTree.size() && + any_of(VectorizableTree[NodeIdx]->UserTreeIndices, + [](const EdgeInfo &EI) { + return EI.UserTE->getOpcode() == Instruction::ICmp && + any_of(EI.UserTE->Scalars, [](Value *V) { + auto *IC = dyn_cast(V); + return IC && IC->isSigned(); + }); + }); } // If the maximum bit width we compute is less than the with of the roots' @@ -16588,6 +16713,10 @@ class HorizontalReduction { }; } // end anonymous namespace +/// Gets recurrence kind from the specified value. +static RecurKind getRdxKind(Value *V) { + return HorizontalReduction::getRdxKind(V); +} static std::optional getAggregateSize(Instruction *InsertInst) { if (auto *IE = dyn_cast(InsertInst)) return cast(IE->getType())->getNumElements(); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index f0b7008992d7b..3e1069d82ddad 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -871,13 +871,15 @@ void VPlan::execute(VPTransformState *State) { // only a single part is generated, which provides the last part from the // previous iteration. For non-ordered reductions all UF parts are // generated. - bool SinglePartNeeded = isa(PhiR) || - isa(PhiR) || - (isa(PhiR) && - cast(PhiR)->isOrdered()); - bool NeedsScalar = isa(PhiR) || - (isa(PhiR) && - cast(PhiR)->isInLoop()); + bool SinglePartNeeded = + isa(PhiR) || + isa(PhiR) || + (isa(PhiR) && + cast(PhiR)->isOrdered()); + bool NeedsScalar = + isa(PhiR) || + (isa(PhiR) && + cast(PhiR)->isInLoop()); unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF; for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { @@ -1372,6 +1374,8 @@ VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, } void VPSlotTracker::assignSlot(const VPValue *V) { + if (V->getUnderlyingValue()) + return; assert(!Slots.contains(V) && "VPValue already has a slot!"); Slots[V] = NextSlot++; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 813ebda29ffd9..5dc905a3c407b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -242,6 +242,15 @@ struct VPTransformState { ElementCount VF; unsigned UF; + /// If EVL (Explicit Vector Length) is not nullptr, then EVL must be a valid + /// value set during plan transformation, possibly a default value = whole + /// vector register length. EVL is created only if TTI prefers predicated + /// vectorization, thus if EVL is not nullptr it also implies preference for + /// predicated vectorization. + /// TODO: this is a temporarily solution, the EVL must be explicitly used by + /// the recipes and must be removed here. + VPValue *EVL = nullptr; + /// Hold the indices to generate specific scalar instructions. Null indicates /// that all instances are to be generated, using either scalar or vector /// instructions. @@ -1159,6 +1168,7 @@ class VPInstruction : public VPRecipeWithIRFlags { SLPLoad, SLPStore, ActiveLaneMask, + ExplicitVectorLength, CalculateTripCountMinusVF, // Increment the canonical IV separately for each unrolled part. CanonicalIVIncrementForPart, @@ -1922,14 +1932,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe { class VPBlendRecipe : public VPSingleDefRecipe { public: /// The blend operation is a User of the incoming values and of their - /// respective masks, ordered [I0, M0, I1, M1, ...]. Note that a single value - /// might be incoming with a full mask for which there is no VPValue. + /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first + /// incoming value does not have a mask associated. VPBlendRecipe(PHINode *Phi, ArrayRef Operands) : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) { - assert(Operands.size() > 0 && - ((Operands.size() == 1) || (Operands.size() % 2 == 0)) && - "Expected either a single incoming value or a positive even number " - "of operands"); + assert((Operands.size() + 1) % 2 == 0 && + "Expected an odd number of operands"); } VPRecipeBase *clone() override { @@ -1939,15 +1947,20 @@ class VPBlendRecipe : public VPSingleDefRecipe { VP_CLASSOF_IMPL(VPDef::VPBlendSC) - /// Return the number of incoming values, taking into account that a single + /// Return the number of incoming values, taking into account that the first /// incoming value has no mask. unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; } /// Return incoming value number \p Idx. - VPValue *getIncomingValue(unsigned Idx) const { return getOperand(Idx * 2); } + VPValue *getIncomingValue(unsigned Idx) const { + return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1); + } /// Return mask number \p Idx. - VPValue *getMask(unsigned Idx) const { return getOperand(Idx * 2 + 1); } + VPValue *getMask(unsigned Idx) const { + assert(Idx > 0 && "First index has no mask associated."); + return getOperand(Idx * 2); + } /// Generate the phi/select nodes. void execute(VPTransformState &State) override; @@ -2065,13 +2078,15 @@ class VPInterleaveRecipe : public VPRecipeBase { class VPReductionRecipe : public VPSingleDefRecipe { /// The recurrence decriptor for the reduction in question. const RecurrenceDescriptor &RdxDesc; + bool IsOrdered; public: VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, - VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp) + VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, + bool IsOrdered) : VPSingleDefRecipe(VPDef::VPReductionSC, ArrayRef({ChainOp, VecOp}), I), - RdxDesc(R) { + RdxDesc(R), IsOrdered(IsOrdered) { if (CondOp) addOperand(CondOp); } @@ -2080,7 +2095,7 @@ class VPReductionRecipe : public VPSingleDefRecipe { VPRecipeBase *clone() override { return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(), - getVecOp(), getCondOp()); + getVecOp(), getCondOp(), IsOrdered); } VP_CLASSOF_IMPL(VPDef::VPReductionSC) @@ -2489,6 +2504,45 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { #endif }; +/// A recipe for generating the phi node for the current index of elements, +/// adjusted in accordance with EVL value. It starts at the start value of the +/// canonical induction and gets incremented by EVL in each iteration of the +/// vector loop. +class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { +public: + VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL) + : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {} + + ~VPEVLBasedIVPHIRecipe() override = default; + + VPEVLBasedIVPHIRecipe *clone() override { + llvm_unreachable("cloning not implemented yet"); + } + + VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC) + + static inline bool classof(const VPHeaderPHIRecipe *D) { + return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC; + } + + /// Generate phi for handling IV based on EVL over iterations correctly. + /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe. + void execute(VPTransformState &State) override; + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe { public: @@ -2522,8 +2576,8 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe { } }; -/// A recipe for converting the canonical IV value to the corresponding value of -/// an IV with different start and step values, using Start + CanonicalIV * +/// A recipe for converting the input value \p IV value to the corresponding +/// value of an IV with different start and step values, using Start + IV * /// Step. class VPDerivedIVRecipe : public VPSingleDefRecipe { /// Kind of the induction. @@ -2541,16 +2595,16 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { Start, CanonicalIV, Step) {} VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, - const FPMathOperator *FPBinOp, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) - : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), - Kind(Kind), FPBinOp(FPBinOp) {} + const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, + VPValue *Step) + : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind), + FPBinOp(FPBinOp) {} ~VPDerivedIVRecipe() override = default; VPRecipeBase *clone() override { - return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), - getCanonicalIV(), getStepValue()); + return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1), + getStepValue()); } VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC) @@ -2570,9 +2624,6 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { } VPValue *getStartValue() const { return getOperand(0); } - VPCanonicalIVPHIRecipe *getCanonicalIV() const { - return cast(getOperand(1)); - } VPValue *getStepValue() const { return getOperand(2); } /// Returns true if the recipe only uses the first lane of operand \p Op. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 04e30312dc23a..c8ae2ee5a30fe 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -216,14 +216,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { Type *ResultTy = TypeSwitch(V->getDefiningRecipe()) .Case( - [this](const auto *R) { - // Handle header phi recipes, except VPWienIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + VPReductionPHIRecipe, VPWidenPointerInductionRecipe, + VPEVLBasedIVPHIRecipe>([this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) .CasegetType(), 0); return Builder.CreateSelect(Cmp, Sub, Zero); } + case VPInstruction::ExplicitVectorLength: { + // Compute EVL + auto GetEVL = [=](VPTransformState &State, Value *AVL) { + assert(AVL->getType()->isIntegerTy() && + "Requested vector length should be an integer."); + + // TODO: Add support for MaxSafeDist for correct loop emission. + assert(State.VF.isScalable() && "Expected scalable vector factor."); + Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); + + Value *EVL = State.Builder.CreateIntrinsic( + State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, + {AVL, VFArg, State.Builder.getTrue()}); + return EVL; + }; + // TODO: Restructure this code with an explicit remainder loop, vsetvli can + // be outside of the main loop. + assert(Part == 0 && "No unrolling expected for predicated vectorization."); + // Compute VTC - IV as the AVL (requested vector length). + Value *Index = State.get(getOperand(0), VPIteration(0, 0)); + Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); + Value *AVL = State.Builder.CreateSub(TripCount, Index); + Value *EVL = GetEVL(State, AVL); + assert(!State.EVL && "multiple EVL recipes"); + State.EVL = this; + return EVL; + } case VPInstruction::CanonicalIVIncrementForPart: { auto *IV = State.get(getOperand(0), VPIteration(0, 0)); if (Part == 0) @@ -473,6 +501,8 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { // Reduce all of the unrolled parts into a single vector. Value *ReducedPartRdx = RdxParts[0]; unsigned Op = RecurrenceDescriptor::getOpcode(RK); + if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) + Op = Instruction::Or; if (PhiR->isOrdered()) { ReducedPartRdx = RdxParts[State.UF - 1]; @@ -485,19 +515,16 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { if (Op != Instruction::ICmp && Op != Instruction::FCmp) ReducedPartRdx = Builder.CreateBinOp( (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); - else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { - TrackingVH ReductionStartValue = - RdxDesc.getRecurrenceStartValue(); - ReducedPartRdx = createAnyOfOp(Builder, ReductionStartValue, RK, - ReducedPartRdx, RdxPart); - } else + else ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); } } // Create the reduction after the loop. Note that inloop reductions create // the target reduction in the loop using a Reduction recipe. - if (State.VF.isVector() && !PhiR->isInLoop()) { + if ((State.VF.isVector() || + RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && + !PhiR->isInLoop()) { ReducedPartRdx = createTargetReduction(Builder, RdxDesc, ReducedPartRdx, OrigPhi); // If the reduction can be performed in a smaller type, we need to extend @@ -592,6 +619,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { // TODO: Cover additional opcodes. return vputils::onlyFirstLaneUsed(this); case VPInstruction::ActiveLaneMask: + case VPInstruction::ExplicitVectorLength: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: @@ -628,6 +656,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; + case VPInstruction::ExplicitVectorLength: + O << "EXPLICIT-VECTOR-LENGTH"; + break; case VPInstruction::FirstOrderRecurrenceSplice: O << "first-order splice"; break; @@ -1184,7 +1215,7 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, O << Indent << "= DERIVED-IV "; getStartValue()->printAsOperand(O, SlotTracker); O << " + "; - getCanonicalIV()->printAsOperand(O, SlotTracker); + getOperand(1)->printAsOperand(O, SlotTracker); O << " * "; getStepValue()->printAsOperand(O, SlotTracker); } @@ -1482,12 +1513,65 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { O << " "; getIncomingValue(I)->printAsOperand(O, SlotTracker); + if (I == 0) + continue; O << "/"; getMask(I)->printAsOperand(O, SlotTracker); } } } +#endif + +void VPReductionRecipe::execute(VPTransformState &State) { + assert(!State.Instance && "Reduction being replicated."); + Value *PrevInChain = State.get(getChainOp(), 0, /*IsScalar*/ true); + RecurKind Kind = RdxDesc.getRecurrenceKind(); + // Propagate the fast-math flags carried by the underlying instruction. + IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); + State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags()); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *NewVecOp = State.get(getVecOp(), Part); + if (VPValue *Cond = getCondOp()) { + Value *NewCond = State.get(Cond, Part, State.VF.isScalar()); + VectorType *VecTy = dyn_cast(NewVecOp->getType()); + Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType(); + Value *Iden = RdxDesc.getRecurrenceIdentity(Kind, ElementTy, + RdxDesc.getFastMathFlags()); + if (State.VF.isVector()) { + Iden = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden); + } + + Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Iden); + NewVecOp = Select; + } + Value *NewRed; + Value *NextInChain; + if (IsOrdered) { + if (State.VF.isVector()) + NewRed = createOrderedReduction(State.Builder, RdxDesc, NewVecOp, + PrevInChain); + else + NewRed = State.Builder.CreateBinOp( + (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, + NewVecOp); + PrevInChain = NewRed; + } else { + PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true); + NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp); + } + if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { + NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), + NewRed, PrevInChain); + } else if (IsOrdered) + NextInChain = NewRed; + else + NextInChain = State.Builder.CreateBinOp( + (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); + State.set(this, NextInChain, Part, /*IsScalar*/ true); + } +} +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "REDUCE "; @@ -1974,3 +2058,25 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, printOperands(O, SlotTracker); } #endif + +void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization."); + Value *Start = State.get(getOperand(0), VPIteration(0, 0)); + PHINode *EntryPart = + State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); + EntryPart->addIncoming(Start, VectorPH); + EntryPart->setDebugLoc(getDebugLoc()); + State.set(this, EntryPart, 0, /*IsScalar=*/true); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; + + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7d4e54d099455..1256e4d8fda50 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1209,6 +1209,45 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( return LaneMaskPhi; } +/// Replaces (ICMP_ULE, WideCanonicalIV, backedge-taken-count) pattern using +/// the given \p Idiom. +static void +replaceHeaderPredicateWith(VPlan &Plan, VPValue &Idiom, + function_ref Cond = {}) { + auto *FoundWidenCanonicalIVUser = + find_if(Plan.getCanonicalIV()->users(), + [](VPUser *U) { return isa(U); }); + if (FoundWidenCanonicalIVUser == Plan.getCanonicalIV()->users().end()) + return; + auto *WideCanonicalIV = + cast(*FoundWidenCanonicalIVUser); + // Walk users of WideCanonicalIV and replace all compares of the form + // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with + // the given idiom VPValue. + VPValue *BTC = Plan.getOrCreateBackedgeTakenCount(); + for (VPUser *U : SmallVector(WideCanonicalIV->users())) { + auto *CompareToReplace = dyn_cast(U); + if (!CompareToReplace || + CompareToReplace->getOpcode() != Instruction::ICmp || + CompareToReplace->getPredicate() != CmpInst::ICMP_ULE || + CompareToReplace->getOperand(1) != BTC) + continue; + + assert(CompareToReplace->getOperand(0) == WideCanonicalIV && + "WidenCanonicalIV must be the first operand of the compare"); + if (Cond) { + CompareToReplace->replaceUsesWithIf(&Idiom, Cond); + if (!CompareToReplace->getNumUsers()) + CompareToReplace->eraseFromParent(); + } else { + CompareToReplace->replaceAllUsesWith(&Idiom); + CompareToReplace->eraseFromParent(); + } + } + if (!WideCanonicalIV->getNumUsers()) + WideCanonicalIV->eraseFromParent(); +} + void VPlanTransforms::addActiveLaneMask( VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck) { @@ -1238,20 +1277,77 @@ void VPlanTransforms::addActiveLaneMask( // Walk users of WideCanonicalIV and replace all compares of the form // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an // active-lane-mask. - VPValue *BTC = Plan.getOrCreateBackedgeTakenCount(); - for (VPUser *U : SmallVector(WideCanonicalIV->users())) { - auto *CompareToReplace = dyn_cast(U); - if (!CompareToReplace || - CompareToReplace->getOpcode() != Instruction::ICmp || - CompareToReplace->getPredicate() != CmpInst::ICMP_ULE || - CompareToReplace->getOperand(1) != BTC) - continue; + replaceHeaderPredicateWith(Plan, *LaneMask); +} - assert(CompareToReplace->getOperand(0) == WideCanonicalIV && - "WidenCanonicalIV must be the first operand of the compare"); - CompareToReplace->replaceAllUsesWith(LaneMask); - CompareToReplace->eraseFromParent(); +/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and +/// replaces all uses except the canonical IV increment of +/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe +/// is used only for loop iterations counting after this transformation. +/// +/// The function uses the following definitions: +/// %StartV is the canonical induction start value. +/// +/// The function adds the following recipes: +/// +/// vector.ph: +/// ... +/// +/// vector.body: +/// ... +/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ], +/// [ %NextEVLIV, %vector.body ] +/// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC +/// ... +/// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi +/// ... +/// +void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) { + VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + auto *CanonicalIVPHI = Plan.getCanonicalIV(); + VPValue *StartV = CanonicalIVPHI->getStartValue(); + + // TODO: revisit this and try to remove the mask operand. + // Walk VPWidenMemoryInstructionRecipe users of WideCanonicalIV and replace + // all compares of the form (ICMP_ULE, WideCanonicalIV, backedge-taken-count), + // used as mask in VPWidenMemoryInstructionRecipe, with an all-true-mask. + Value *TrueMask = + ConstantInt::getTrue(CanonicalIVPHI->getScalarType()->getContext()); + VPValue *VPTrueMask = Plan.getOrAddLiveIn(TrueMask); + replaceHeaderPredicateWith(Plan, *VPTrueMask, [](VPUser &U, unsigned) { + return isa(U); + }); + // Now create the ExplicitVectorLengthPhi recipe in the main loop. + auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); + EVLPhi->insertAfter(CanonicalIVPHI); + auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, + {EVLPhi, Plan.getTripCount()}); + VPEVL->insertBefore(*Header, Header->getFirstNonPhi()); + + auto *CanonicalIVIncrement = + cast(CanonicalIVPHI->getBackedgeValue()); + VPSingleDefRecipe *OpVPEVL = VPEVL; + if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits(); + IVSize != 32) { + OpVPEVL = new VPScalarCastRecipe(IVSize < 32 ? Instruction::Trunc + : Instruction::ZExt, + OpVPEVL, CanonicalIVPHI->getScalarType()); + OpVPEVL->insertBefore(CanonicalIVIncrement); } + auto *NextEVLIV = + new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi}, + {CanonicalIVIncrement->hasNoUnsignedWrap(), + CanonicalIVIncrement->hasNoSignedWrap()}, + CanonicalIVIncrement->getDebugLoc(), "index.evl.next"); + NextEVLIV->insertBefore(CanonicalIVIncrement); + EVLPhi->addOperand(NextEVLIV); + + // Replace all uses of VPCanonicalIVPHIRecipe by + // VPEVLBasedIVPHIRecipe except for the canonical IV increment. + CanonicalIVPHI->replaceAllUsesWith(EVLPhi); + CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); + // TODO: support unroll factor > 1. + Plan.setUF(1); } void VPlanTransforms::dropPoisonGeneratingRecipes( @@ -1277,9 +1373,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( // handled. if (isa(CurRec) || isa(CurRec) || - isa(CurRec) || - isa(CurRec) || - isa(CurRec)) + isa(CurRec) || isa(CurRec)) continue; // This recipe contributes to the address computation of a widen diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ff83c3f083b09..0cbc70713d9c1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -98,6 +98,13 @@ struct VPlanTransforms { /// VPlan directly. static void dropPoisonGeneratingRecipes( VPlan &Plan, function_ref BlockNeedsPredication); + + /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and + /// replaces all uses except the canonical IV increment of + /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. + /// VPCanonicalIVPHIRecipe is only used to control the loop after + /// this transformation. + static void addExplicitVectorLength(VPlan &Plan); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 1d2c17e91b7ab..8b221d30e5254 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -368,6 +368,7 @@ class VPDef { // VPHeaderPHIRecipe need to be kept together. VPCanonicalIVPHISC, VPActiveLaneMaskPHISC, + VPEVLBasedIVPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 7ebdb914fb852..12d37fa711db9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -92,7 +92,50 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB, for (const VPRecipeBase &R : *VPBB) RecipeNumbering[&R] = Cnt++; + // Set of recipe types along with VPInstruction Opcodes of all EVL-related + // recipes that must appear at most once in the header block. + DenseSet EVLFound; + const VPRecipeBase *VPWidenMemRecipe = nullptr; + const VPlan *Plan = VPBB->getPlan(); + bool IsHeader = Plan->getEntry()->getNumSuccessors() == 1 && + Plan->getVectorLoopRegion()->getEntry() == VPBB; + auto CheckEVLRecipiesInsts = [&](const VPRecipeBase *R) { + if (isa(R)) { + if (!IsHeader) { + errs() << "EVL PHI recipe not in entry block!\n"; + return false; + } + if (!EVLFound.insert(VPDef::VPEVLBasedIVPHISC).second) { + errs() << "EVL PHI recipe inserted more than once!\n"; + return false; + } + return true; + } + if (const auto *RInst = dyn_cast(R); + RInst && RInst->getOpcode() == VPInstruction::ExplicitVectorLength) { + if (!IsHeader) { + errs() << "EVL instruction not in the header block!\n"; + return false; + } + if (!EVLFound.insert(RInst->getOpcode() + VPDef::VPLastPHISC).second) { + errs() << "EVL instruction inserted more than once!\n"; + return false; + } + if (VPWidenMemRecipe) { + errs() << "Use of EVL instruction by widen memory recipe before " + "definition!\n"; + return false; + } + return true; + } + if (isa(R)) + VPWidenMemRecipe = R; + return true; + }; + for (const VPRecipeBase &R : *VPBB) { + if (!CheckEVLRecipiesInsts(&R)) + return false; for (const VPValue *V : R.definedValues()) { for (const VPUser *U : V->users()) { auto *UI = dyn_cast(U); diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 9e4284474f2ef..a3331e26035b9 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1484,13 +1484,18 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { TTI::CastContextHint::None, CostKind) + TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, TTI::CastContextHint::None, CostKind); - OldCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - CastDstTy, Mask, CostKind); + OldCost += + TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, CastDstTy, Mask, + CostKind, 0, nullptr, std::nullopt, &I); InstructionCost NewCost = TTI.getShuffleCost( TargetTransformInfo::SK_PermuteTwoSrc, CastSrcTy, Mask, CostKind); NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy, TTI::CastContextHint::None, CostKind); + + LLVM_DEBUG(dbgs() << "Found a shuffle feeding two casts: " << I + << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost + << "\n"); if (NewCost > OldCost) return false; diff --git a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll index 652d36c01a77e..f5ca6a22b60ac 100644 --- a/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll @@ -5,24 +5,24 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @fixed() { ; CHECK-LABEL: 'fixed' -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll index bf8458ba56e84..521a0900c844e 100644 --- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll +++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll @@ -190,11 +190,11 @@ declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) { ; CHECK: gather_load_4xi8_constant_mask ; CHECK-NEON-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-128-LABEL: 'gather_load_4xi8_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i8> undef) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-256-LABEL: 'gather_load_4xi8_constant_mask' @@ -212,11 +212,11 @@ define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) { define <4 x i8> @gather_load_4xi8_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) { ; CHECK: gather_load_4xi8_variable_mask ; CHECK-NEON-LABEL: 'gather_load_4xi8_variable_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-128-LABEL: 'gather_load_4xi8_variable_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv ; ; CHECK-SVE-256-LABEL: 'gather_load_4xi8_variable_mask' @@ -235,11 +235,11 @@ declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32 immarg, <4 define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) { ; CHECK: scatter_store_4xi8_constant_mask ; CHECK-NEON-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_constant_mask' @@ -257,11 +257,11 @@ define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) { define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %cond) { ; CHECK: scatter_store_4xi8_variable_mask ; CHECK-NEON-LABEL: 'scatter_store_4xi8_variable_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_variable_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_variable_mask' @@ -280,11 +280,11 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1> define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) { ; CHECK: gather_load_4xi32_constant_mask ; CHECK-NEON-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-128-LABEL: 'gather_load_4xi32_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> , <4 x i32> undef) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-256-LABEL: 'gather_load_4xi32_constant_mask' @@ -302,11 +302,11 @@ define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) { define <4 x i32> @gather_load_4xi32_variable_mask(<4 x ptr> %ptrs, <4 x i1> %cond) { ; CHECK: gather_load_4xi32_variable_mask ; CHECK-NEON-LABEL: 'gather_load_4xi32_variable_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-128-LABEL: 'gather_load_4xi32_variable_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv ; ; CHECK-SVE-256-LABEL: 'gather_load_4xi32_variable_mask' @@ -325,11 +325,11 @@ declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, < define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) { ; CHECK: scatter_store_4xi32_constant_mask ; CHECK-NEON-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_constant_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> ) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_constant_mask' @@ -347,11 +347,11 @@ define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %cond) { ; CHECK: scatter_store_4xi32_variable_mask ; CHECK-NEON-LABEL: 'scatter_store_4xi32_variable_mask' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_variable_mask' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %cond) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_variable_mask' @@ -370,11 +370,11 @@ declare <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr>, i32, <256 x define void @sve_gather_vls(<256 x i1> %v256i1mask) { ; CHECK-LABEL: 'sve_scatter_vls' ; CHECK-NEON-LABEL: 'sve_gather_vls' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1792 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2304 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'sve_gather_vls' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 1792 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 2304 for instruction: %res.v256i16 = call <256 x i16> @llvm.masked.gather.v256i16.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x i16> zeroinitializer) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'sve_gather_vls' @@ -394,11 +394,11 @@ declare <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr>, i32, <256 define void @sve_gather_vls_float(<256 x i1> %v256i1mask) { ; CHECK-LABEL: 'sve_gather_vls_float' ; CHECK-NEON-LABEL: 'sve_gather_vls_float' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1664 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2176 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'sve_gather_vls_float' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 1664 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 2176 for instruction: %res.v256f32 = call <256 x float> @llvm.masked.gather.v256f32.v256p0(<256 x ptr> undef, i32 0, <256 x i1> %v256i1mask, <256 x float> zeroinitializer) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'sve_gather_vls_float' @@ -418,11 +418,11 @@ declare void @llvm.masked.scatter.v256i8.v256p0(<256 x i8>, <256 x ptr>, i32, <2 define void @sve_scatter_vls(<256 x i1> %v256i1mask){ ; CHECK-LABEL: 'sve_scatter_vls' ; CHECK-NEON-LABEL: 'sve_scatter_vls' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 1792 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 2304 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'sve_scatter_vls' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 1792 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 2304 for instruction: call void @llvm.masked.scatter.v256i8.v256p0(<256 x i8> undef, <256 x ptr> undef, i32 0, <256 x i1> %v256i1mask) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'sve_scatter_vls' @@ -442,11 +442,11 @@ declare void @llvm.masked.scatter.v512f16.v512p0(<512 x half>, <512 x ptr>, i32, define void @sve_scatter_vls_float(<512 x i1> %v512i1mask){ ; CHECK-LABEL: 'sve_scatter_vls_float' ; CHECK-NEON-LABEL: 'sve_scatter_vls_float' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 3456 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask) +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4480 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask) ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-128-LABEL: 'sve_scatter_vls_float' -; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 3456 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask) +; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 4480 for instruction: call void @llvm.masked.scatter.v512f16.v512p0(<512 x half> undef, <512 x ptr> undef, i32 0, <512 x i1> %v512i1mask) ; CHECK-SVE-128-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-SVE-256-LABEL: 'sve_scatter_vls_float' diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll index 776c80c4bd1e2..d469ce6305932 100644 --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-other.ll @@ -106,7 +106,7 @@ define void @insert_subvec() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2_2 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_2_3 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i16_2_05 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i16_4_0 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_4_0 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_4_1 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_4_2 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_4_3 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> @@ -167,6 +167,197 @@ define void @insert_subvec() { ret void } +define void @zip() { +; CHECK-LABEL: 'zip' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %zipv2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %zip1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %zip2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zipv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zip1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zip2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zipv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %zip1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %zip2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %zipv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zipv2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zipv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zip1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %zip2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %zipv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %zip1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %zip2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %zipv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zipv2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zipv4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zip1v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zip2v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zipv8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zip1v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zip2v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zipv16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip1v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zip2v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zipv2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zip1v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %zip2v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zipv4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zip1v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %zip2v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zipv8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zip1v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %zip2v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %zipv16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %zip1v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %zip2v2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <2 x i32> + %zipv2i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> + %zip1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> + %zip2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> + %zipv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> + %zip1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + %zip2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + %zipv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> + %zip1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + %zip2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + %zipv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> + + %zip1v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %zip2v2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <2 x i32> + %zipv2i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> + %zip1v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> + %zip2v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> + %zipv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> + %zip1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + %zip2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + %zipv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> + %zip1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> + %zip2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> + %zipv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> + + %zip1v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %zip2v2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> + %zipv2i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> + %zip1v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + %zip2v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + %zipv4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> + %zip1v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> + %zip2v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> + %zipv8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> + %zip1v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> + %zip2v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> + %zipv16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> + + %zip1v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> + %zip2v2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> + %zipv2i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> + %zip1v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> + %zip2v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> + %zipv4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> + %zip1v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> + %zip2v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> + %zipv8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> + %zip1v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> + %zip2v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> + %zipv16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> + + ret void +} + +define void @uzp() { +; CHECK-LABEL: 'uzp' +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %uzp1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %uzp2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzpv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %uzp1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %uzp2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzpv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %uzp1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %uzp2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %uzpv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp1v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp2v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzpv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %uzp1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %uzp2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %uzpv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %uzp1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %uzp2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %uzpv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp1v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %uzp2v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzpv4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzp1v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzp2v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uzpv8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uzp1v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uzp2v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzpv16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzp1v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %uzp2v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uzpv4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uzp1v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %uzp2v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzpv8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzp1v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %uzp2v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %uzpv16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %uzp1v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> + %uzp2v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> + %uzpv4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> + %uzp1v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + %uzp2v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + %uzpv8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> + %uzp1v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + %uzp2v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + %uzpv16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> + + %uzp1v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> + %uzp2v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> + %uzpv4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> + %uzp1v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + %uzp2v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + %uzpv8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> + %uzp1v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> + %uzp2v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> + %uzpv16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> + + %uzp1v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + %uzp2v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> + %uzpv4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> + %uzp1v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> + %uzp2v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> + %uzpv8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> + %uzp1v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> + %uzp2v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> + %uzpv16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> + + %uzp1v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> + %uzp2v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> + %uzpv4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> + %uzp1v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> + %uzp2v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> + %uzpv8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> + %uzp1v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> + %uzp2v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> + %uzpv16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> + + ret void +} + + define void @multipart() { ; CHECK-LABEL: 'multipart' ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v16a = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> @@ -211,3 +402,91 @@ define void @multipart() { ret void } + + +define void @vst3(ptr %p) { +; CHECK-LABEL: 'vst3' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <12 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <24 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v64i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <48 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <6 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <12 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <24 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v64i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <48 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <6 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <12 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <24 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v64i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <48 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <6 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <12 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <24 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v64i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <48 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> + %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <12 x i32> + %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <24 x i32> + %v64i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <48 x i32> + + %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <6 x i32> + %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <12 x i32> + %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <24 x i32> + %v64i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <48 x i32> + + %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <6 x i32> + %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <12 x i32> + %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <24 x i32> + %v64i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <48 x i32> + + %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <6 x i32> + %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <12 x i32> + %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <24 x i32> + %v64i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <48 x i32> + + ret void +} + + +define void @vst4(ptr %p) { +; CHECK-LABEL: 'vst4' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> + %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> + + %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> + %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> + %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> + + %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> + %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> + %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> + %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> + + %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> + %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> + %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> + %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> + + ret void +} diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll new file mode 100644 index 0000000000000..12de334574f5c --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-store.ll @@ -0,0 +1,324 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print" 2>&1 -disable-output | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @vst2(ptr %p) { +; CHECK-LABEL: 'vst2' +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4i8, ptr %p, align 4 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8i8, ptr %p, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16i8, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> %v32i8, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v4i16, ptr %p, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v8i16, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %v16i16, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> %v32i16, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v4i32, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %v8i32, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %v16i32, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i32> %v32i32, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %v4i64, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> %v8i64, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> %v16i64, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i64> %v32i64, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> + store <4 x i8> %v4i8, ptr %p + %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> + store <8 x i8> %v8i8, ptr %p + %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> + store <16 x i8> %v16i8, ptr %p + %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <32 x i32> + store <32 x i8> %v32i8, ptr %p + + %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> + store <4 x i16> %v4i16, ptr %p + %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> + store <8 x i16> %v8i16, ptr %p + %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> + store <16 x i16> %v16i16, ptr %p + %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <32 x i32> + store <32 x i16> %v32i16, ptr %p + + %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> + store <4 x i32> %v4i32, ptr %p + %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> + store <8 x i32> %v8i32, ptr %p + %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <16 x i32> + store <16 x i32> %v16i32, ptr %p + %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <32 x i32> + store <32 x i32> %v32i32, ptr %p + + %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> + store <4 x i64> %v4i64, ptr %p + %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <8 x i32> + store <8 x i64> %v8i64, ptr %p + %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <16 x i32> + store <16 x i64> %v16i64, ptr %p + %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <32 x i32> + store <32 x i64> %v32i64, ptr %p + + ret void +} + + +define void @vst3(ptr %p) { +; CHECK-LABEL: 'vst3' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i8> %v8i8, ptr %p, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <12 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <12 x i8> %v16i8, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <24 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <24 x i8> %v32i8, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <48 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <48 x i8> %v64i8, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <6 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <6 x i16> %v8i16, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <12 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <12 x i16> %v16i16, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <24 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <24 x i16> %v32i16, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <48 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <48 x i16> %v64i16, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <6 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <6 x i32> %v8i32, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <12 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <12 x i32> %v16i32, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <24 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <24 x i32> %v32i32, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <48 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <48 x i32> %v64i32, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <6 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <6 x i64> %v8i64, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <12 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <12 x i64> %v16i64, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <24 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <24 x i64> %v32i64, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <48 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <48 x i64> %v64i64, ptr %p, align 512 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <6 x i32> + store <6 x i8> %v8i8, ptr %p + %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <12 x i32> + store <12 x i8> %v16i8, ptr %p + %v32i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <24 x i32> + store <24 x i8> %v32i8, ptr %p + %v64i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <48 x i32> + store <48 x i8> %v64i8, ptr %p + + %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <6 x i32> + store <6 x i16> %v8i16, ptr %p + %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <12 x i32> + store <12 x i16> %v16i16, ptr %p + %v32i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <24 x i32> + store <24 x i16> %v32i16, ptr %p + %v64i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <48 x i32> + store <48 x i16> %v64i16, ptr %p + + %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <6 x i32> + store <6 x i32> %v8i32, ptr %p + %v16i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <12 x i32> + store <12 x i32> %v16i32, ptr %p + %v32i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <24 x i32> + store <24 x i32> %v32i32, ptr %p + %v64i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <48 x i32> + store <48 x i32> %v64i32, ptr %p + + %v8i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <6 x i32> + store <6 x i64> %v8i64, ptr %p + %v16i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <12 x i32> + store <12 x i64> %v16i64, ptr %p + %v32i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <24 x i32> + store <24 x i64> %v32i64, ptr %p + %v64i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <48 x i32> + store <48 x i64> %v64i64, ptr %p + + ret void +} + + +define void @vst4(ptr %p) { +; CHECK-LABEL: 'vst4' +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8i8, ptr %p, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16i8, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> %v32i8, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> %v64i8, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v8i16, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %v16i16, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> %v32i16, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <64 x i16> %v64i16, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %v8i32, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %v16i32, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i32> %v32i32, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <64 x i32> %v64i32, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> %v8i64, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> %v16i64, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i64> %v32i64, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <64 x i64> %v64i64, ptr %p, align 512 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> + store <8 x i8> %v8i8, ptr %p + %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> + store <16 x i8> %v16i8, ptr %p + %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> + store <32 x i8> %v32i8, ptr %p + %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> + store <64 x i8> %v64i8, ptr %p + + %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> + store <8 x i16> %v8i16, ptr %p + %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> + store <16 x i16> %v16i16, ptr %p + %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> + store <32 x i16> %v32i16, ptr %p + %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> + store <64 x i16> %v64i16, ptr %p + + %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> + store <8 x i32> %v8i32, ptr %p + %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> + store <16 x i32> %v16i32, ptr %p + %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> + store <32 x i32> %v32i32, ptr %p + %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> + store <64 x i32> %v64i32, ptr %p + + %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> + store <8 x i64> %v8i64, ptr %p + %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> + store <16 x i64> %v16i64, ptr %p + %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> + store <32 x i64> %v32i64, ptr %p + %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> + store <64 x i64> %v64i64, ptr %p + + ret void +} + + +define void @splatstore(ptr %p) { +; CHECK-LABEL: 'splatstore' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i8> %v4i8, ptr %p, align 4 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i8> %v8i8, ptr %p, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <16 x i8> %v16i8, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <32 x i8> %v32i8, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <64 x i8> %v64i8, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %v4i16, ptr %p, align 8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v8i16, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %v16i16, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <32 x i16> %v32i16, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <64 x i16> %v64i16, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v4i32, ptr %p, align 16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %v8i32, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %v16i32, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <32 x i32> %v32i32, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <64 x i32> %v64i32, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %v4i64, ptr %p, align 32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <8 x i64> %v8i64, ptr %p, align 64 +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <16 x i64> %v16i64, ptr %p, align 128 +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: store <32 x i64> %v32i64, ptr %p, align 256 +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> zeroinitializer +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: store <64 x i64> %v64i64, ptr %p, align 512 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v4i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <4 x i32> zeroinitializer + store <4 x i8> %v4i8, ptr %p + %v8i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <8 x i32> zeroinitializer + store <8 x i8> %v8i8, ptr %p + %v16i8 = shufflevector <16 x i8> undef, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %v16i8, ptr %p + %v32i8 = shufflevector <32 x i8> undef, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %v32i8, ptr %p + %v64i8 = shufflevector <64 x i8> undef, <64 x i8> undef, <64 x i32> zeroinitializer + store <64 x i8> %v64i8, ptr %p + + %v4i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <4 x i32> zeroinitializer + store <4 x i16> %v4i16, ptr %p + %v8i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %v8i16, ptr %p + %v16i16 = shufflevector <16 x i16> undef, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %v16i16, ptr %p + %v32i16 = shufflevector <32 x i16> undef, <32 x i16> undef, <32 x i32> zeroinitializer + store <32 x i16> %v32i16, ptr %p + %v64i16 = shufflevector <64 x i16> undef, <64 x i16> undef, <64 x i32> zeroinitializer + store <64 x i16> %v64i16, ptr %p + + %v4i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %v4i32, ptr %p + %v8i32 = shufflevector <8 x i32> undef, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %v8i32, ptr %p + %v16i32 = shufflevector <16 x i32> undef, <16 x i32> undef, <16 x i32> zeroinitializer + store <16 x i32> %v16i32, ptr %p + %v32i32 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> zeroinitializer + store <32 x i32> %v32i32, ptr %p + %v64i32 = shufflevector <64 x i32> undef, <64 x i32> undef, <64 x i32> zeroinitializer + store <64 x i32> %v64i32, ptr %p + + %v4i64 = shufflevector <4 x i64> undef, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %v4i64, ptr %p + %v8i64 = shufflevector <8 x i64> undef, <8 x i64> undef, <8 x i32> zeroinitializer + store <8 x i64> %v8i64, ptr %p + %v16i64 = shufflevector <16 x i64> undef, <16 x i64> undef, <16 x i32> zeroinitializer + store <16 x i64> %v16i64, ptr %p + %v32i64 = shufflevector <32 x i64> undef, <32 x i64> undef, <32 x i32> zeroinitializer + store <32 x i64> %v32i64, ptr %p + %v64i64 = shufflevector <64 x i64> undef, <64 x i64> undef, <64 x i32> zeroinitializer + store <64 x i64> %v64i64, ptr %p + + ret void +} + diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll index c05339d89d35c..a9c18e20c1f58 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll @@ -107,15 +107,15 @@ define void @masked_gathers_no_vscale_range() #2 { define <2 x i128> @masked_gather_v1i128(<2 x ptr> %ld, <2 x i1> %masks, <2 x i128> %passthru) #3 { ; CHECK-LABEL: 'masked_gather_v1i128' -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %res ; ; CHECK-VSCALE-2-LABEL: 'masked_gather_v1i128' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %res ; ; CHECK-VSCALE-1-LABEL: 'masked_gather_v1i128' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i128> %res ; %res = call <2 x i128> @llvm.masked.gather.v2i128.v2p0(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i128> %passthru) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 937c383aedc78..8d5535e2a82f7 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -829,7 +829,7 @@ define @masked_gather_nxv8i32( %ld, @masked_gather_v4i32(<4 x ptr> %ld, <4 x i1> %masks, <4 x i32> %passthru) { ; CHECK-LABEL: 'masked_gather_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru) +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i32> %passthru) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; TYPE_BASED_ONLY-LABEL: 'masked_gather_v4i32' @@ -842,7 +842,7 @@ define <4 x i32> @masked_gather_v4i32(<4 x ptr> %ld, <4 x i1> %masks, <4 x i32> define <1 x i128> @masked_gather_v1i128(<1 x ptr> %ld, <1 x i1> %masks, <1 x i128> %passthru) { ; CHECK-LABEL: 'masked_gather_v1i128' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0(<1 x ptr> %ld, i32 0, <1 x i1> %masks, <1 x i128> %passthru) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <1 x i128> %res ; ; TYPE_BASED_ONLY-LABEL: 'masked_gather_v1i128' @@ -883,7 +883,7 @@ define void @masked_scatter_nxv8i32( %data, define void @masked_scatter_v4i32(<4 x i32> %data, <4 x ptr> %ptrs, <4 x i1> %masks) { ; CHECK-LABEL: 'masked_scatter_v4i32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks) +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %ptrs, i32 0, <4 x i1> %masks) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v4i32' @@ -897,7 +897,7 @@ define void @masked_scatter_v4i32(<4 x i32> %data, <4 x ptr> %ptrs, <4 x i1> %ma define void @masked_scatter_v1i128(<1 x i128> %data, <1 x ptr> %ptrs, <1 x i1> %masks) { ; CHECK-LABEL: 'masked_scatter_v1i128' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs, i32 0, <1 x i1> %masks) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPE_BASED_ONLY-LABEL: 'masked_scatter_v1i128' diff --git a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll index ba62056f5851b..7ebe14d98b21b 100644 --- a/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll +++ b/llvm/test/Analysis/CostModel/RISCV/active_lane_mask.ll @@ -3,28 +3,28 @@ define void @get_lane_mask() { ; CHECK-LABEL: 'get_lane_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv1i1_i64 = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv1i1_i32 = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask_nxv1i1_i64 = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask_nxv1i1_i32 = call @llvm.get.active.lane.mask.nxv1i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll index 510d33b8d9033..ec7eb81d98bf9 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll @@ -44,33 +44,33 @@ define i32 @masked_gather() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I8 = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64.u = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64.u = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64.u = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64.u = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64.u = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32.u = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 2, <16 x i1> undef, <16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32.u = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 2, <8 x i1> undef, <8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32.u = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 2, <4 x i1> undef, <4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32.u = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 2, <2 x i1> undef, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32.u = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> undef, i32 2, <1 x i1> undef, <1 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32F16.u = call <32 x half> @llvm.masked.gather.v32f16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F16.u = call <16 x half> @llvm.masked.gather.v16f16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F16.u = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F16.u = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F16.u = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F16.u = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16.u = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64.u = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 4, <8 x i1> undef, <8 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64.u = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 4, <4 x i1> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64.u = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64.u = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 4, <2 x i1> undef, <2 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64.u = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 4, <1 x i1> undef, <1 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32.u = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32.u = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32.u = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32.u = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32.u = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32.u = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16.u = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16.u = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I16.u = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I16.u = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16.u = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16.u = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I16.u = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll index caa5138287f2a..6da9d8d73cbd4 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll @@ -44,33 +44,33 @@ define i32 @masked_scatter() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i8.v1p0(<1 x i8> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 2, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 2, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 2, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 2, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 2, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 2, <16 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 2, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 2, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 2, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 2, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f32.v1p0(<1 x float> undef, <1 x ptr> undef, i32 2, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32f16.v32p0(<32 x half> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16f16.v16p0(<16 x half> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f16.v8p0(<8 x half> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f16.v2p0(<2 x half> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f16.v1p0(<1 x half> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i32.v1p0(<1 x i32> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i16.v1p0(<1 x i16> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; diff --git a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll index 387e6e43bb348..31bbc8b02a192 100644 --- a/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll +++ b/llvm/test/Analysis/CostModel/RISCV/masked_ldst.ll @@ -13,14 +13,14 @@ define void @fixed() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 287 for instruction: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; entry: diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll index f308c535d1588..a23ea00dbaa75 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll @@ -500,10 +500,10 @@ define void @strided_load() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t10.a = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t13.a = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t15.a = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 undef, i64 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t17 = call @llvm.experimental.vp.strided.load.nxv2i8.p0.i64(ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t19 = call @llvm.experimental.vp.strided.load.nxv4i8.p0.i64(ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t21 = call @llvm.experimental.vp.strided.load.nxv8i8.p0.i64(ptr undef, i64 undef, undef, i32 undef) @@ -543,10 +543,10 @@ define void @strided_store() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i8.p0.i64(<4 x i8> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i8.p0.i64(<8 x i8> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i8.p0.i64(<16 x i8> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.experimental.vp.strided.store.v16i64.p0.i64(<16 x i64> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 78 for instruction: call void @llvm.experimental.vp.strided.store.v16i64.p0.i64(<16 x i64> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr align 8 undef, i64 undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll index 264a74116449a..6dcbc73674aa6 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll @@ -22,14 +22,14 @@ define void @select() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %18 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %19 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %20 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %21 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %22 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %23 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %24 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %25 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %24 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %25 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = select i1 undef, i8 undef, i8 undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %27 = select i1 undef, <1 x i8> undef, <1 x i8> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %28 = select i1 undef, <2 x i8> undef, <2 x i8> undef @@ -47,14 +47,14 @@ define void @select() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %40 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %41 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %42 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %43 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %44 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %44 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %46 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %47 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %48 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %49 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %50 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %49 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %50 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %51 = select i1 undef, i16 undef, i16 undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %52 = select i1 undef, <1 x i16> undef, <1 x i16> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %53 = select i1 undef, <2 x i16> undef, <2 x i16> undef @@ -71,15 +71,15 @@ define void @select() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %64 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %65 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %66 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %67 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %68 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %69 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %67 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %68 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %69 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %70 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %71 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %72 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %73 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %74 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %75 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %73 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %74 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %75 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %76 = select i1 undef, i32 undef, i32 undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %77 = select i1 undef, <1 x i32> undef, <1 x i32> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %78 = select i1 undef, <2 x i32> undef, <2 x i32> undef @@ -95,16 +95,16 @@ define void @select() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %88 = select <32 x i1> undef, <32 x i32> undef, <32 x i32> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %89 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %90 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %91 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %92 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %93 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %94 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %91 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %92 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %93 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %94 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %95 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %96 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %97 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %98 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %99 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %100 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %97 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %98 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %99 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %100 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %101 = select i1 undef, i64 undef, i64 undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %102 = select i1 undef, <1 x i64> undef, <1 x i64> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %103 = select i1 undef, <2 x i64> undef, <2 x i64> undef @@ -119,17 +119,17 @@ define void @select() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %112 = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %113 = select <32 x i1> undef, <32 x i64> undef, <32 x i64> undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %114 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %115 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %116 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %117 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %118 = select i1 undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %119 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %115 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %116 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %117 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %118 = select i1 undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %119 = select i1 undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %120 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %121 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %122 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %123 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %124 = select undef, undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %125 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %121 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %122 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %123 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %124 = select undef, undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %125 = select undef, undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; select i1 undef, i1 undef, i1 undef diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index d80cb09fd3908..47487d6adf68a 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -374,7 +374,7 @@ define i32 @masks4(<4 x i1> %in) { ; ; AVX1-LABEL: 'masks4' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <4 x i1> %in to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %S = sext <4 x i1> %in to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %S = sext <4 x i1> %in to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'masks4' diff --git a/llvm/test/Analysis/CostModel/X86/extend.ll b/llvm/test/Analysis/CostModel/X86/extend.ll index 34fa3c4263923..4a2585a9ddf90 100644 --- a/llvm/test/Analysis/CostModel/X86/extend.ll +++ b/llvm/test/Analysis/CostModel/X86/extend.ll @@ -1962,7 +1962,7 @@ define i32 @sext_vXi1() "min-legal-vector-width"="256" { ; AVX1-LABEL: 'sext_vXi1' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i64 = sext <16 x i1> undef to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i64 = sext <32 x i1> undef to <32 x i64> @@ -2242,7 +2242,7 @@ define i32 @sext_vXi1() "min-legal-vector-width"="256" { ; BTVER2-LABEL: 'sext_vXi1' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i64 = sext <16 x i1> undef to <16 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i64 = sext <32 x i1> undef to <32 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll index 10786e1f7a4a4..cda9744a8d6b5 100644 --- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll @@ -314,15 +314,15 @@ define void @maskedgather(<16 x ptr> %va, <16 x i1> %vb, <16 x float> %vc) { ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedgather' -; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedgather' -; SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'maskedgather' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) @@ -335,15 +335,15 @@ define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) { ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'maskedscatter' -; LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'maskedscatter' -; SIZE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'maskedscatter' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll new file mode 100644 index 0000000000000..827e503fe7b1b --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll @@ -0,0 +1,2521 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX + +define i32 @masked_load(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_load' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_load' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_load' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) + %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) + %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) + %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) + %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) + %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) + %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) + %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) + %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) + %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) + %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) + %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) + %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) + %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) + %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) + %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) + %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) + %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) + %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) + %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) + %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) + %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) + %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_store(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_store' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_store' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_store' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_store' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_store' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) + + call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) + + ret i32 0 +} + +define i32 @masked_gather(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_gather' +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_gather' +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_gather' +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_gather' +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_gather' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_gather' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_gather' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_scatter(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_scatter' +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_scatter' +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_scatter' +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_scatter' +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_scatter' +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_scatter' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_scatter' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + + call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + + call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + + call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + + ret i32 0 +} + +define i32 @masked_expandload(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_expandload' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_expandload' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_expandload' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_expandload' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_expandload' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_expandload' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_compressstore(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_compressstore' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_compressstore' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_compressstore' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) + + ret i32 0 +} + +define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) { +; SSE2-LABEL: 'test1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX-LABEL: 'test1' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %mask = icmp eq <2 x i64> %trigger, zeroinitializer + %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) + ret <2 x double> %res +} + +define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) { +; SSE2-LABEL: 'test2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX-LABEL: 'test2' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX512-LABEL: 'test2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) + ret <4 x i32> %res +} + +define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { +; SSE2-LABEL: 'test3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask) + ret void +} + +define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) { +; SSE2-LABEL: 'test4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SSE42-LABEL: 'test4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX1-LABEL: 'test4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX2-LABEL: 'test4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SKL-LABEL: 'test4' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX512-LABEL: 'test4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; + %mask = icmp eq <8 x i32> %trigger, zeroinitializer + %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) + ret <8 x float> %res +} + +define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) { +; SSE2-LABEL: 'test5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test5' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { +; SSE2-LABEL: 'test6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test6' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) { +; SSE2-LABEL: 'test7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; SSE42-LABEL: 'test7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX-LABEL: 'test7' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX512-LABEL: 'test7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) + ret <2 x float> %res +} + +define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) { +; SSE2-LABEL: 'test8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; SSE42-LABEL: 'test8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX-LABEL: 'test8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX512-LABEL: 'test8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) + ret <2 x i32> %res +} + +define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) { +; SSE2-LABEL: 'test_gather_2f64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test_gather_2f64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX1-LABEL: 'test_gather_2f64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX2-LABEL: 'test_gather_2f64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SKL-LABEL: 'test_gather_2f64' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test_gather_2f64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) + ret <4 x i32> %res +} + +define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) + ret <4 x i32> %res +} + +define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_ra_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask2' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +; SSE2-LABEL: 'test_scatter_16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_scatter_16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_scatter_16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKL-LABEL: 'test_scatter_16i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind + %imask = bitcast i16 %mask to <16 x i1> + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) + ret void +} + +define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_8i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) + ret void +} + +define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; KNL-LABEL: 'test_scatter_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKX-LABEL: 'test_scatter_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) + ret void +} + +define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) { +; SSE2-LABEL: 'test_gather_4f32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + ret <4 x float>%res +} + +define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { +; SSE2-LABEL: 'test_gather_4f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) + ret <4 x float>%res +} + +declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) +declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>) +declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>) +declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>) +declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>) +declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>) +declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) +declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>) +declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>) +declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>) +declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>) +declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>) +declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>) +declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>) +declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>) +declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>) +declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>) +declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) +declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>) +declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>) +declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>) + +declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>) +declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>) +declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>) +declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>) +declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>) +declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>) +declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>) +declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>) +declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>) +declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>) +declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>) +declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>) +declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>) +declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>) +declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) +declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>) +declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>) +declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) +declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>) +declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) +declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>) + +declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) + +declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>) +declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) + +declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>) +declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>) + +declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) +declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 897344d622d0f..b0f9f6d32a56f 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -1,1461 +1,1569 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=SSE42 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,SKL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX -define i32 @masked_load() { +define i32 @masked_load(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_load' -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_load' -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_load' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_load' -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_load' -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) - %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) - %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) - %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) - %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) - %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) - %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) - %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) - - %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) - %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) - %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) - %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) - %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) - %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) - %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) - %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) - %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) - %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) - %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) - %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) - %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) - %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) - %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) - %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) - - %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) - %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) - %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) - %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) - %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) - %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) - %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) - %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) - - %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) - %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) - %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) - %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) - %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) - %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) - %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) - %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) - %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) - %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) - %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) - %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) - %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) - %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) - %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) - %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) - - %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) - %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) - %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) - %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) - - %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) - %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) - %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) - %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) + %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) + %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) + %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) + %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) + %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) + %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) + %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) + %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) + %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) + %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) + %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) + %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) + %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) + %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) + %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) + %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) + %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) + %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) + %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) + %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) + %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) + %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ret i32 0 } -define i32 @masked_store() { +define i32 @masked_store(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_store' -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_store' -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_store' -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_store' -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_store' -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) - call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) - call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) - call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) - call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) - call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) - call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) - - call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) - call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) - call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) - call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) - call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) - call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) - call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) - call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) - call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) - call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) - call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) - call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) - call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) - call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) - call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) - - call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) - call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) - call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) - call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) - call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) - call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) - call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) - - call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) - call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) - call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) - call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) - call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) - call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) - call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) - call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) - call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) - call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) - call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) - call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) - call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) - call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) - call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) - - call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) - call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) - call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) - - call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) - call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) - call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) - call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) + + call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ret i32 0 } -define i32 @masked_gather() { +define i32 @masked_gather(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_gather' -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_gather' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_gather' -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_gather' -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_gather' -; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_gather' -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_gather' -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) - %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) - %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) - %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) - %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) - %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) - %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) - %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) - %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) - %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) - %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) - %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) - %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) - %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) - %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) - %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) - %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) - %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) - %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) - %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) - %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) - %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) - %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) - %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ret i32 0 } -define i32 @masked_scatter() { +define i32 @masked_scatter(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_scatter' -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_scatter' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; -; AVX-LABEL: 'masked_scatter' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; AVX1-LABEL: 'masked_scatter' +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_scatter' +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_scatter' +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_scatter' -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_scatter' -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) - call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) - call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) - call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) - call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) - call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) - call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) - call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) - call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) - call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) - call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) - call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) - call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) - call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) - call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) - call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) - call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) - call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ret i32 0 } -define i32 @masked_expandload() { +define i32 @masked_expandload(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_expandload' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_expandload' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; -; AVX-LABEL: 'masked_expandload' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; AVX1-LABEL: 'masked_expandload' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_expandload' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_expandload' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_expandload' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) - %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) - %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) - %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) - %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) - %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) - %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) - %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) - %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) - %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) - %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) - %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) - %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) - %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) - %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) - %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) - %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) - %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) - %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) - %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) - %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) - %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) - %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) - %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) ret i32 0 } -define i32 @masked_compressstore() { +define i32 @masked_compressstore(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_compressstore' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_compressstore' -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_compressstore' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_compressstore' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_compressstore' -; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' -; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) - call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) - call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) - - call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) - call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) - call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) - - call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) - call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) - call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) - - call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) - call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) - call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) - - call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) - call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) - call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) - - call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) - call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) - call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) - call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ret i32 0 } diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 5f22b2e39f947..46123e9f60574 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1,1461 +1,1569 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=SSE42 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 ; -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,SKL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX -define i32 @masked_load() { +define i32 @masked_load(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_load' -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_load' -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_load' -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_load' -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_load' -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) - %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) - %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) - %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) - %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) - %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) - %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) - %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) - - %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) - %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) - %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) - %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) - %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) - %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) - %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) - %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) - %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) - %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) - %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) - %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) - %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) - %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) - %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) - %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) - - %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) - %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) - %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) - %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) - %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) - %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) - %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) - %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) - - %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) - %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) - %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) - %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) - %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) - %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) - %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) - %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) - %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) - %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) - %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) - %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) - %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) - %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) - %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) - %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) - - %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) - %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) - %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) - %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) - - %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) - %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) - %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) - %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) + %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) + %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) + %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) + %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) + %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) + %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) + %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) + %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) + %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) + %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) + %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) + %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) + %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) + %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) + %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) + %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) + %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) + %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) + %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) + %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) + %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) + %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ret i32 0 } -define i32 @masked_store() { +define i32 @masked_store(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_store' -; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 376 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_store' -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_store' -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_store' -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_store' -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) - call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) - call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) - call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) - call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) - call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) - call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) - - call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) - call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) - call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) - call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) - call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) - call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) - call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) - call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) - call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) - call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) - call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) - call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) - call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) - call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) - call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) - - call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) - call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) - call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) - call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) - call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) - call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) - call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) - - call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) - call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) - call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) - call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) - call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) - call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) - call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) - call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) - call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) - call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) - call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) - call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) - call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) - call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) - call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) - - call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) - call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) - call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) - call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) - - call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) - call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) - call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) - call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) + + call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) ret i32 0 } -define i32 @masked_gather() { +define i32 @masked_gather(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_gather' -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_gather' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_gather' -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_gather' -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_gather' -; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_gather' -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_gather' -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) - %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) - %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) - %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) - %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) - %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) - %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) - %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) - %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) - %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) - %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) - %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) - %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) - %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) - %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) - %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) - %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) - %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) - %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) - %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) - %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) - %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) - %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) - %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) ret i32 0 } -define i32 @masked_scatter() { +define i32 @masked_scatter(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_scatter' -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 316 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 158 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_scatter' -; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; -; AVX-LABEL: 'masked_scatter' -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; AVX1-LABEL: 'masked_scatter' +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_scatter' +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_scatter' +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_scatter' -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_scatter' -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) - call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) - call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) - call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) - call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) - call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) - call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) - call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) - call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) - call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) - call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) - call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) - call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) - call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) - call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) - call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) - call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) - call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) - call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) ret i32 0 } -define i32 @masked_expandload() { +define i32 @masked_expandload(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_expandload' -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_expandload' -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; -; AVX-LABEL: 'masked_expandload' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; AVX1-LABEL: 'masked_expandload' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_expandload' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_expandload' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_expandload' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) - %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) - %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) - %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) - %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) - %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) - %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) - %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) - %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) - %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) - %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) - %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) - %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) - %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) - %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) - %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) - %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) - %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) - %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) - %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) - %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) - %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) - %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) - %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) ret i32 0 } -define i32 @masked_compressstore() { +define i32 @masked_compressstore(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { ; SSE2-LABEL: 'masked_compressstore' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SSE42-LABEL: 'masked_compressstore' -; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_compressstore' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_compressstore' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_compressstore' -; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' -; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; - call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) - call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) - call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) - - call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) - call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) - call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) - - call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) - call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) - call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) - - call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) - call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) - call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) - - call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) - call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) - call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) - call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) - - call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) - call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) - call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) - call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) ret i32 0 } diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll new file mode 100644 index 0000000000000..8ca572ada8b7f --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll @@ -0,0 +1,2521 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX + +define i32 @masked_load(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_load' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_load' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_load' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) + %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) + %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) + %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) + %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) + %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) + %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) + %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) + %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) + %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) + %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) + %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) + %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) + %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) + %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) + %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) + %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) + %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) + %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) + %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) + %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) + %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) + %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_store(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_store' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_store' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_store' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_store' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_store' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) + + call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) + + ret i32 0 +} + +define i32 @masked_gather(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_gather' +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_gather' +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_gather' +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_gather' +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_gather' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_gather' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_gather' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_scatter(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_scatter' +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_scatter' +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_scatter' +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_scatter' +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_scatter' +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_scatter' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_scatter' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + + call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + + call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + + call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + + ret i32 0 +} + +define i32 @masked_expandload(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_expandload' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_expandload' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_expandload' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_expandload' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_expandload' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_expandload' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_compressstore(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_compressstore' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_compressstore' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_compressstore' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) + + ret i32 0 +} + +define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) { +; SSE2-LABEL: 'test1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX-LABEL: 'test1' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %mask = icmp eq <2 x i64> %trigger, zeroinitializer + %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) + ret <2 x double> %res +} + +define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) { +; SSE2-LABEL: 'test2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX-LABEL: 'test2' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX512-LABEL: 'test2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) + ret <4 x i32> %res +} + +define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { +; SSE2-LABEL: 'test3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask) + ret void +} + +define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) { +; SSE2-LABEL: 'test4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SSE42-LABEL: 'test4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX1-LABEL: 'test4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX2-LABEL: 'test4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SKL-LABEL: 'test4' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX512-LABEL: 'test4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; + %mask = icmp eq <8 x i32> %trigger, zeroinitializer + %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) + ret <8 x float> %res +} + +define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) { +; SSE2-LABEL: 'test5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test5' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { +; SSE2-LABEL: 'test6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test6' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) { +; SSE2-LABEL: 'test7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; SSE42-LABEL: 'test7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX-LABEL: 'test7' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX512-LABEL: 'test7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) + ret <2 x float> %res +} + +define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) { +; SSE2-LABEL: 'test8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; SSE42-LABEL: 'test8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX-LABEL: 'test8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX512-LABEL: 'test8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) + ret <2 x i32> %res +} + +define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) { +; SSE2-LABEL: 'test_gather_2f64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test_gather_2f64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX1-LABEL: 'test_gather_2f64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX2-LABEL: 'test_gather_2f64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SKL-LABEL: 'test_gather_2f64' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test_gather_2f64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) + ret <4 x i32> %res +} + +define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) + ret <4 x i32> %res +} + +define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_ra_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask2' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +; SSE2-LABEL: 'test_scatter_16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_scatter_16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_scatter_16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKL-LABEL: 'test_scatter_16i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind + %imask = bitcast i16 %mask to <16 x i1> + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) + ret void +} + +define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_8i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) + ret void +} + +define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; KNL-LABEL: 'test_scatter_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKX-LABEL: 'test_scatter_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) + ret void +} + +define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) { +; SSE2-LABEL: 'test_gather_4f32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + ret <4 x float>%res +} + +define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { +; SSE2-LABEL: 'test_gather_4f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) + ret <4 x float>%res +} + +declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) +declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>) +declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>) +declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>) +declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>) +declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>) +declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) +declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>) +declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>) +declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>) +declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>) +declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>) +declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>) +declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>) +declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>) +declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>) +declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>) +declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) +declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>) +declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>) +declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>) + +declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>) +declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>) +declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>) +declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>) +declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>) +declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>) +declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>) +declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>) +declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>) +declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>) +declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>) +declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>) +declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>) +declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>) +declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) +declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>) +declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>) +declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) +declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>) +declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) +declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>) + +declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) + +declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>) +declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) + +declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>) +declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>) + +declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) +declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll new file mode 100644 index 0000000000000..07583d268c8ae --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll @@ -0,0 +1,2521 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX + +define i32 @masked_load(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_load' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_load' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_load' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) + %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) + %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) + %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) + %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) + %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) + %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) + %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) + %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) + %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) + %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) + %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) + %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) + %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) + %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) + %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) + %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) + %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) + %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) + %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) + %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) + %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) + %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_store(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_store' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_store' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_store' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_store' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_store' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) + call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) + call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) + call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) + call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) + call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) + call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) + call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) + call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) + call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) + call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) + call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) + + call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) + + ret i32 0 +} + +define i32 @masked_gather(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_gather' +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_gather' +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_gather' +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_gather' +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_gather' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_gather' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_gather' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_scatter(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_scatter' +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 384 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_scatter' +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_scatter' +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 278 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_scatter' +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_scatter' +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 139 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 276 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 138 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 69 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_scatter' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; KNL-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; KNL-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; KNL-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; KNL-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; KNL-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_scatter' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) +; SKX-NEXT: Cost Model: Found an estimated cost of 175 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) +; SKX-NEXT: Cost Model: Found an estimated cost of 347 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) +; SKX-NEXT: Cost Model: Found an estimated cost of 173 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) +; SKX-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) +; SKX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + + call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) + + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) + + call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) + + call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) + call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) + call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) + call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) + + ret i32 0 +} + +define i32 @masked_expandload(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_expandload' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_expandload' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_expandload' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_expandload' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_expandload' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_expandload' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_compressstore(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { +; SSE2-LABEL: 'masked_compressstore' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_compressstore' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_compressstore' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) + + ret i32 0 +} + +define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) { +; SSE2-LABEL: 'test1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX-LABEL: 'test1' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %mask = icmp eq <2 x i64> %trigger, zeroinitializer + %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) + ret <2 x double> %res +} + +define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) { +; SSE2-LABEL: 'test2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX-LABEL: 'test2' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX512-LABEL: 'test2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) + ret <4 x i32> %res +} + +define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { +; SSE2-LABEL: 'test3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask) + ret void +} + +define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) { +; SSE2-LABEL: 'test4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SSE42-LABEL: 'test4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX1-LABEL: 'test4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX2-LABEL: 'test4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SKL-LABEL: 'test4' +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX512-LABEL: 'test4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; + %mask = icmp eq <8 x i32> %trigger, zeroinitializer + %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) + ret <8 x float> %res +} + +define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) { +; SSE2-LABEL: 'test5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test5' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { +; SSE2-LABEL: 'test6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test6' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) { +; SSE2-LABEL: 'test7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; SSE42-LABEL: 'test7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX-LABEL: 'test7' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX512-LABEL: 'test7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) + ret <2 x float> %res +} + +define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) { +; SSE2-LABEL: 'test8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; SSE42-LABEL: 'test8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX-LABEL: 'test8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX512-LABEL: 'test8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) + ret <2 x i32> %res +} + +define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) { +; SSE2-LABEL: 'test_gather_2f64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test_gather_2f64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX1-LABEL: 'test_gather_2f64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX2-LABEL: 'test_gather_2f64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SKL-LABEL: 'test_gather_2f64' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test_gather_2f64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) + ret <4 x i32> %res +} + +define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) + ret <4 x i32> %res +} + +define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_ra_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask2' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +; SSE2-LABEL: 'test_scatter_16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_scatter_16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_scatter_16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKL-LABEL: 'test_scatter_16i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind + %imask = bitcast i16 %mask to <16 x i1> + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) + ret void +} + +define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_8i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) + ret void +} + +define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; KNL-LABEL: 'test_scatter_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKX-LABEL: 'test_scatter_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) + ret void +} + +define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) { +; SSE2-LABEL: 'test_gather_4f32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + ret <4 x float>%res +} + +define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { +; SSE2-LABEL: 'test_gather_4f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) + ret <4 x float>%res +} + +declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) +declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>) +declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>) +declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>) +declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>) +declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>) +declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) +declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>) +declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>) +declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>) +declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>) +declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>) +declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>) +declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>) +declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>) +declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>) +declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>) +declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) +declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>) +declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>) +declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>) + +declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>) +declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>) +declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>) +declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>) +declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>) +declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>) +declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>) +declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>) +declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>) +declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>) +declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>) +declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>) +declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>) +declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>) +declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) +declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>) +declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>) +declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) +declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>) +declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) +declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>) + +declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) + +declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>) +declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) + +declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>) +declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>) + +declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) +declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll new file mode 100644 index 0000000000000..416742a94e0d3 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-backward.ll @@ -0,0 +1,369 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +declare void @llvm.assume(i1) + +define void @different_non_constant_strides_known_backward(ptr %A) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep.mul.2 = getelementptr inbounds i32, ptr %A, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_backward_distance_larger_than_trip_count(ptr %A) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward_distance_larger_than_trip_count' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.1024 = getelementptr inbounds i8, ptr %A, i64 1024 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep.mul.2 = getelementptr inbounds i32, ptr %A.1024, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_backward_min_distance_16(ptr %A) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_16' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.16 = getelementptr inbounds i8, ptr %A, i64 16 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep.mul.2 = getelementptr inbounds i32, ptr %A.16, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_backward_min_distance_15(ptr %A) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_15' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.15 = getelementptr inbounds i8, ptr %A, i64 15 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep.mul.2 = getelementptr inbounds i32, ptr %A.15, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_backward_min_distance_8(ptr %A) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_8' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.8 = getelementptr inbounds i8, ptr %A, i64 8 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep.mul.2 = getelementptr inbounds i32, ptr %A.8, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_backward_min_distance_3(ptr %A) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward_min_distance_3' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep.mul.2, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.3 = getelementptr inbounds i8, ptr %A, i64 3 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep.mul.2 = getelementptr inbounds i32, ptr %A.3, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_backward_via_assume(ptr %A, i64 %scale) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward_via_assume' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %c = icmp sgt i64 %scale, 0 + call void @llvm.assume(i1 %c) + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, %scale + %gep.mul.2 = getelementptr inbounds i32, ptr %A, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_backward_via_assume_distance_larger_than_trip_count(ptr %A, i64 %scale) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward_via_assume_distance_larger_than_trip_count' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.1024 = getelementptr inbounds i8, ptr %A, i64 1024 + %c = icmp sgt i64 %scale, 0 + call void @llvm.assume(i1 %c) + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, %scale + %gep.mul.2 = getelementptr inbounds i32, ptr %A.1024, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_backward_via_assume_min_distance_3(ptr %A, i64 %scale) { +; CHECK-LABEL: 'different_non_constant_strides_known_backward_via_assume_min_distance_3' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.3 = getelementptr inbounds i8, ptr %A, i64 3 + %c = icmp sgt i64 %scale, 0 + call void @llvm.assume(i1 %c) + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, %scale + %gep.mul.2 = getelementptr inbounds i32, ptr %A.3, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_not_known_backward(ptr %A, i64 %scale) { +; CHECK-LABEL: 'different_non_constant_strides_not_known_backward' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + %l = load i32, ptr %gep, align 4 + %add = add nsw i32 %l, 5 + %iv.mul.2 = shl nuw nsw i64 %iv, %scale + %gep.mul.2 = getelementptr inbounds i32, ptr %A, i64 %iv.mul.2 + store i32 %add, ptr %gep.mul.2, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll new file mode 100644 index 0000000000000..aa22a2143352d --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +declare void @llvm.assume(i1) + +define void @different_non_constant_strides_known_forward(ptr %A) { +; CHECK-LABEL: 'different_non_constant_strides_known_forward' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep.mul.2 = getelementptr inbounds i32, ptr %A, i64 %iv.mul.2 + %l = load i32, ptr %gep.mul.2, align 4 + %add = add nsw i32 %l, 5 + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + store i32 %add, ptr %gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_forward_min_distance_3(ptr %A) { +; CHECK-LABEL: 'different_non_constant_strides_known_forward_min_distance_3' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> +; CHECK-NEXT: store i32 %add, ptr %gep, align 4 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.3 = getelementptr inbounds i8, ptr %A, i64 3 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.mul.2 = shl nuw nsw i64 %iv, 1 + %gep.mul.2 = getelementptr inbounds i32, ptr %A.3, i64 %iv.mul.2 + %l = load i32, ptr %gep.mul.2, align 4 + %add = add nsw i32 %l, 5 + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + store i32 %add, ptr %gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_forward_via_assume(ptr %A, i64 %scale) { +; CHECK-LABEL: 'different_non_constant_strides_known_forward_via_assume' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %c = icmp sgt i64 %scale, 0 + call void @llvm.assume(i1 %c) + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.mul.2 = shl nuw nsw i64 %iv, %scale + %gep.mul.2 = getelementptr inbounds i32, ptr %A, i64 %iv.mul.2 + %l = load i32, ptr %gep.mul.2, align 4 + %add = add nsw i32 %l, 5 + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + store i32 %add, ptr %gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_known_forward_via_assume_min_distance_3(ptr %A, i64 %scale) { +; CHECK-LABEL: 'different_non_constant_strides_known_forward_via_assume_min_distance_3' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.3 = getelementptr inbounds i8, ptr %A, i64 3 + %c = icmp sgt i64 %scale, 0 + call void @llvm.assume(i1 %c) + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.mul.2 = shl nuw nsw i64 %iv, %scale + %gep.mul.2 = getelementptr inbounds i32, ptr %A.3, i64 %iv.mul.2 + %l = load i32, ptr %gep.mul.2, align 4 + %add = add nsw i32 %l, 5 + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + store i32 %add, ptr %gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @different_non_constant_strides_not_known_forward(ptr %A, i64 %scale) { +; CHECK-LABEL: 'different_non_constant_strides_not_known_forward' +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: cannot identify array bounds +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.mul.2 = shl nuw nsw i64 %iv, %scale + %gep.mul.2 = getelementptr inbounds i32, ptr %A, i64 %iv.mul.2 + %l = load i32, ptr %gep.mul.2, align 4 + %add = add nsw i32 %l, 5 + %gep = getelementptr inbounds i32, ptr %A, i64 %iv + store i32 %add, ptr %gep, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 256 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll new file mode 100644 index 0000000000000..08e0bae7f05ba --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/positive-dependence-distance-different-access-sizes.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; TODO: No runtime checks should be needed, as the distance between accesses +; is large enough to need runtime checks. +define void @test_distance_positive_independent_via_trip_count(ptr %A) { +; CHECK-LABEL: 'test_distance_positive_independent_via_trip_count' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GRP1:0x[0-9a-f]+]]): +; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.400, i64 %iv +; CHECK-NEXT: Against group ([[GRP2:0x[0-9a-f]+]]): +; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GRP1]]: +; CHECK-NEXT: (Low: (400 + %A) High: (804 + %A)) +; CHECK-NEXT: Member: {(400 + %A),+,4}<%loop> +; CHECK-NEXT: Group [[GRP2]]: +; CHECK-NEXT: (Low: %A High: (101 + %A)) +; CHECK-NEXT: Member: {%A,+,1}<%loop> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.400 = getelementptr inbounds i8, ptr %A, i64 400 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A.400 = getelementptr inbounds i32, ptr %A.400, i64 %iv + %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv + %l = load i8, ptr %gep.A, align 1 + %ext = zext i8 %l to i32 + store i32 %ext, ptr %gep.A.400, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +; TODO: Currently this is considered vectorizable with runtime checks, but the +; runtime checks are never true. +define void @test_distance_positive_backwards(ptr %A) { +; CHECK-LABEL: 'test_distance_positive_backwards' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GRP3:0x[0-9a-f]+]]): +; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.1, i64 %iv +; CHECK-NEXT: Against group ([[GRP4:0x[0-9a-f]+]]): +; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GRP3]]: +; CHECK-NEXT: (Low: (1 + %A) High: (405 + %A)) +; CHECK-NEXT: Member: {(1 + %A),+,4}<%loop> +; CHECK-NEXT: Group [[GRP4]]: +; CHECK-NEXT: (Low: %A High: (101 + %A)) +; CHECK-NEXT: Member: {%A,+,1}<%loop> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %A.1 = getelementptr inbounds i8, ptr %A, i64 1 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A.400 = getelementptr inbounds i32, ptr %A.1, i64 %iv + %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv + %l = load i8, ptr %gep.A, align 1 + %ext = zext i8 %l to i32 + store i32 %ext, ptr %gep.A.400, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @test_distance_positive_via_assume(ptr %A, i64 %off) { +; CHECK-LABEL: 'test_distance_positive_via_assume' +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe with run-time checks +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Check 0: +; CHECK-NEXT: Comparing group ([[GRP5:0x[0-9a-f]+]]): +; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.off, i64 %iv +; CHECK-NEXT: Against group ([[GRP6:0x[0-9a-f]+]]): +; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv +; CHECK-NEXT: Grouped accesses: +; CHECK-NEXT: Group [[GRP5]]: +; CHECK-NEXT: (Low: (%off + %A) High: (404 + %off + %A)) +; CHECK-NEXT: Member: {(%off + %A),+,4}<%loop> +; CHECK-NEXT: Group [[GRP6]]: +; CHECK-NEXT: (Low: %A High: (101 + %A)) +; CHECK-NEXT: Member: {%A,+,1}<%loop> +; CHECK-EMPTY: +; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. +; CHECK-NEXT: SCEV assumptions: +; CHECK-EMPTY: +; CHECK-NEXT: Expressions re-written: +; +entry: + %c = icmp sgt i64 %off, 0 + call void @llvm.assume(i1 %c) + %A.off = getelementptr inbounds i8, ptr %A, i64 %off + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A.400 = getelementptr inbounds i32, ptr %A.off, i64 %iv + %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv + %l = load i8, ptr %gep.A, align 1 + %ext = zext i8 %l to i32 + store i32 %ext, ptr %gep.A.400, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +declare void @llvm.assume(i1) diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll index e967407af14bd..32fe82ef2268c 100644 --- a/llvm/test/Assembler/atomic.ll +++ b/llvm/test/Assembler/atomic.ll @@ -72,3 +72,19 @@ define void @fp_atomics(ptr %x) { ret void } + +define void @fp_vector_atomicrmw(ptr %x, <2 x half> %val) { + ; CHECK: %atomic.fadd = atomicrmw fadd ptr %x, <2 x half> %val seq_cst + %atomic.fadd = atomicrmw fadd ptr %x, <2 x half> %val seq_cst + + ; CHECK: %atomic.fsub = atomicrmw fsub ptr %x, <2 x half> %val seq_cst + %atomic.fsub = atomicrmw fsub ptr %x, <2 x half> %val seq_cst + + ; CHECK: %atomic.fmax = atomicrmw fmax ptr %x, <2 x half> %val seq_cst + %atomic.fmax = atomicrmw fmax ptr %x, <2 x half> %val seq_cst + + ; CHECK: %atomic.fmin = atomicrmw fmin ptr %x, <2 x half> %val seq_cst + %atomic.fmin = atomicrmw fmin ptr %x, <2 x half> %val seq_cst + + ret void +} diff --git a/llvm/test/Assembler/flags.ll b/llvm/test/Assembler/flags.ll index d75b0cb0ea824..e0ad8bf000be1 100644 --- a/llvm/test/Assembler/flags.ll +++ b/llvm/test/Assembler/flags.ll @@ -256,6 +256,13 @@ define i64 @test_zext(i32 %a) { ret i64 %res } +define float @test_uitofp(i32 %a) { +; CHECK: %res = uitofp nneg i32 %a to float + %res = uitofp nneg i32 %a to float + ret float %res +} + + define i64 @test_or(i64 %a, i64 %b) { ; CHECK: %res = or disjoint i64 %a, %b %res = or disjoint i64 %a, %b diff --git a/llvm/test/Assembler/invalid-atomicrmw-scalable.ll b/llvm/test/Assembler/invalid-atomicrmw-scalable.ll new file mode 100644 index 0000000000000..e474134715704 --- /dev/null +++ b/llvm/test/Assembler/invalid-atomicrmw-scalable.ll @@ -0,0 +1,41 @@ +; RUN: split-file %s %t --leading-lines +; RUN: not llvm-as < %t/scalable_fp_vector_atomicrmw_xchg.ll 2>&1 | FileCheck -check-prefix=ERR0 %s +; RUN: not llvm-as < %t/scalable_int_vector_atomicrmw_xchg.ll 2>&1 | FileCheck -check-prefix=ERR1 %s +; RUN: not llvm-as < %t/scalable_ptr_vector_atomicrmw_xchg.ll 2>&1 | FileCheck -check-prefix=ERR2 %s +; RUN: not llvm-as < %t/scalable_fp_vector_atomicrmw_fadd.ll 2>&1 | FileCheck -check-prefix=ERR3 %s +; RUN: not llvm-as < %t/scalable_int_vector_atomicrmw_add.ll 2>&1 | FileCheck -check-prefix=ERR4 %s + +;--- scalable_fp_vector_atomicrmw_xchg.ll +define @scalable_fp_vector_atomicrmw_xchg(ptr %x, %val) { +; ERR0: :41: error: atomicrmw operand may not be scalable + %atomic.xchg = atomicrmw xchg ptr %x, %val seq_cst + ret %atomic.xchg +} + +;--- scalable_int_vector_atomicrmw_xchg.ll +define @scalable_int_vector_atomicrmw_xchg(ptr %x, %val) { +; ERR1: :41: error: atomicrmw operand may not be scalable + %atomic.xchg = atomicrmw xchg ptr %x, %val seq_cst + ret %atomic.xchg +} + +;--- scalable_ptr_vector_atomicrmw_xchg.ll +define @scalable_ptr_vector_atomicrmw_xchg(ptr %x, %val) { +; ERR2: :41: error: atomicrmw operand may not be scalable + %atomic.xchg = atomicrmw xchg ptr %x, %val seq_cst + ret %atomic.xchg +} + +;--- scalable_fp_vector_atomicrmw_fadd.ll +define @scalable_fp_vector_atomicrmw_fadd(ptr %x, %val) { +; ERR3: :41: error: atomicrmw operand may not be scalable + %atomic.fadd = atomicrmw fadd ptr %x, %val seq_cst + ret %atomic.fadd +} + +;--- scalable_int_vector_atomicrmw_add.ll +define @scalable_int_vector_atomicrmw_add(ptr %x, %val) { +; ERR4: :39: error: atomicrmw operand may not be scalable + %atomic.add = atomicrmw add ptr %x, %val seq_cst + ret %atomic.add +} diff --git a/llvm/test/Assembler/invalid-atomicrmw-xchg-fp-vector.ll b/llvm/test/Assembler/invalid-atomicrmw-xchg-fp-vector.ll new file mode 100644 index 0000000000000..ea523255ee774 --- /dev/null +++ b/llvm/test/Assembler/invalid-atomicrmw-xchg-fp-vector.ll @@ -0,0 +1,7 @@ +; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s + +; CHECK: error: atomicrmw xchg operand must be an integer, floating point, or pointer type +define <2 x half> @fp_vector_atomicrmw(ptr %x, <2 x half> %val) { + %atomic.xchg = atomicrmw xchg ptr %x, <2 x half> %val seq_cst + ret <2 x half> %atomic.xchg +} diff --git a/llvm/test/Bitcode/dbg-record-roundtrip.ll b/llvm/test/Bitcode/dbg-record-roundtrip.ll index bd347cac72067..cc83fdd4fa538 100644 --- a/llvm/test/Bitcode/dbg-record-roundtrip.ll +++ b/llvm/test/Bitcode/dbg-record-roundtrip.ll @@ -15,6 +15,16 @@ ; RUN: | llvm-dis --load-bitcode-into-experimental-debuginfo-iterators=true --write-experimental-debuginfo=true \ ; RUN: | FileCheck %s --check-prefixes=RECORDS +;; When preserving, we should output the format the bitcode was written in +;; regardless of the value of the write flag. +; RUN: llvm-as --write-experimental-debuginfo-iterators-to-bitcode=true %s -o - \ +; RUN: | llvm-dis --preserve-input-debuginfo-format=true --write-experimental-debuginfo=false \ +; RUN: | FileCheck %s --check-prefixes=RECORDS + +; RUN: llvm-as --write-experimental-debuginfo-iterators-to-bitcode=false %s -o - \ +; RUN: | llvm-dis --preserve-input-debuginfo-format=true --write-experimental-debuginfo=true \ +; RUN: | FileCheck %s + ;; Check that verify-uselistorder passes regardless of input format. ; RUN: llvm-as %s --write-experimental-debuginfo-iterators-to-bitcode=true -o - | verify-uselistorder ; RUN: verify-uselistorder %s diff --git a/llvm/test/Bitcode/flags.ll b/llvm/test/Bitcode/flags.ll index 96995ec570c93..fd56694ccceb2 100644 --- a/llvm/test/Bitcode/flags.ll +++ b/llvm/test/Bitcode/flags.ll @@ -18,6 +18,8 @@ second: ; preds = %first %z = add i32 %a, 0 ; [#uses=0] %hh = zext nneg i32 %a to i64 %ll = zext i32 %s to i64 + %ff = uitofp nneg i32 %a to float + %bb = uitofp i32 %s to float %jj = or disjoint i32 %a, 0 %oo = or i32 %a, 0 %tu = trunc nuw i32 %a to i16 @@ -39,6 +41,8 @@ first: ; preds = %entry %zz = add i32 %a, 0 ; [#uses=0] %kk = zext nneg i32 %a to i64 %rr = zext i32 %ss to i64 + %ww = uitofp nneg i32 %a to float + %xx = uitofp i32 %ss to float %mm = or disjoint i32 %a, 0 %nn = or i32 %a, 0 %tuu = trunc nuw i32 %a to i16 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll index b0507e9d075fa..9687ba683fb7e 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc_immo.ll @@ -35,16 +35,24 @@ define i8 @load_atomic_i8_aligned_monotonic_const(ptr readonly %ptr) { } define i8 @load_atomic_i8_aligned_acquire(ptr %ptr) { -; CHECK-LABEL: load_atomic_i8_aligned_acquire: -; CHECK: ldapurb w0, [x0, #4] +; GISEL-LABEL: load_atomic_i8_aligned_acquire: +; GISEL: add x8, x0, #4 +; GISEL: ldaprb w0, [x8] +; +; SDAG-LABEL: load_atomic_i8_aligned_acquire: +; SDAG: ldapurb w0, [x0, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 %r = load atomic i8, ptr %gep acquire, align 1 ret i8 %r } define i8 @load_atomic_i8_aligned_acquire_const(ptr readonly %ptr) { -; CHECK-LABEL: load_atomic_i8_aligned_acquire_const: -; CHECK: ldapurb w0, [x0, #4] +; GISEL-LABEL: load_atomic_i8_aligned_acquire_const: +; GISEL: add x8, x0, #4 +; GISEL: ldaprb w0, [x8] +; +; SDAG-LABEL: load_atomic_i8_aligned_acquire_const: +; SDAG: ldapurb w0, [x0, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 %r = load atomic i8, ptr %gep acquire, align 1 ret i8 %r @@ -101,16 +109,24 @@ define i16 @load_atomic_i16_aligned_monotonic_const(ptr readonly %ptr) { } define i16 @load_atomic_i16_aligned_acquire(ptr %ptr) { -; CHECK-LABEL: load_atomic_i16_aligned_acquire: -; CHECK: ldapurh w0, [x0, #8] +; GISEL-LABEL: load_atomic_i16_aligned_acquire: +; GISEL: add x8, x0, #8 +; GISEL: ldaprh w0, [x8] +; +; SDAG-LABEL: load_atomic_i16_aligned_acquire: +; SDAG: ldapurh w0, [x0, #8] %gep = getelementptr inbounds i16, ptr %ptr, i32 4 %r = load atomic i16, ptr %gep acquire, align 2 ret i16 %r } define i16 @load_atomic_i16_aligned_acquire_const(ptr readonly %ptr) { -; CHECK-LABEL: load_atomic_i16_aligned_acquire_const: -; CHECK: ldapurh w0, [x0, #8] +; GISEL-LABEL: load_atomic_i16_aligned_acquire_const: +; GISEL: add x8, x0, #8 +; GISEL: ldaprh w0, [x8] +; +; SDAG-LABEL: load_atomic_i16_aligned_acquire_const: +; SDAG: ldapurh w0, [x0, #8] %gep = getelementptr inbounds i16, ptr %ptr, i32 4 %r = load atomic i16, ptr %gep acquire, align 2 ret i16 %r @@ -367,16 +383,24 @@ define i8 @load_atomic_i8_unaligned_monotonic_const(ptr readonly %ptr) { } define i8 @load_atomic_i8_unaligned_acquire(ptr %ptr) { -; CHECK-LABEL: load_atomic_i8_unaligned_acquire: -; CHECK: ldapurb w0, [x0, #4] +; GISEL-LABEL: load_atomic_i8_unaligned_acquire: +; GISEL: add x8, x0, #4 +; GISEL: ldaprb w0, [x8] +; +; SDAG-LABEL: load_atomic_i8_unaligned_acquire: +; SDAG: ldapurb w0, [x0, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 %r = load atomic i8, ptr %gep acquire, align 1 ret i8 %r } define i8 @load_atomic_i8_unaligned_acquire_const(ptr readonly %ptr) { -; CHECK-LABEL: load_atomic_i8_unaligned_acquire_const: -; CHECK: ldapurb w0, [x0, #4] +; GISEL-LABEL: load_atomic_i8_unaligned_acquire_const: +; GISEL: add x8, x0, #4 +; GISEL: ldaprb w0, [x8] +; +; SDAG-LABEL: load_atomic_i8_unaligned_acquire_const: +; SDAG: ldapurb w0, [x0, #4] %gep = getelementptr inbounds i8, ptr %ptr, i32 4 %r = load atomic i8, ptr %gep acquire, align 1 ret i8 %r @@ -819,7 +843,8 @@ define i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %ptr) { define i8 @load_atomic_i8_from_gep() { ; GISEL-LABEL: load_atomic_i8_from_gep: ; GISEL: bl init -; GISEL: ldapurb w0, [x8, #1] +; GISEL: add x8, x8, #1 +; GISEL: ldaprb w0, [x8] ; ; SDAG-LABEL: load_atomic_i8_from_gep: ; SDAG: bl init @@ -834,7 +859,8 @@ define i8 @load_atomic_i8_from_gep() { define i16 @load_atomic_i16_from_gep() { ; GISEL-LABEL: load_atomic_i16_from_gep: ; GISEL: bl init -; GISEL: ldapurh w0, [x8, #2] +; GISEL: add x8, x8, #2 +; GISEL: ldaprh w0, [x8] ; ; SDAG-LABEL: load_atomic_i16_from_gep: ; SDAG: bl init @@ -884,7 +910,6 @@ define i128 @load_atomic_i128_from_gep() { ; ; SDAG-LABEL: load_atomic_i128_from_gep: ; SDAG: bl init -; SDAG: ldp x0, x1, [sp, #16] ; SDAG: dmb ishld %a = alloca [3 x i128] call void @init(ptr %a) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll index 7163da0dc0243..b619aac709d98 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -993,24 +993,24 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: ldrb w8, [x0, #4095] ; CHECK-NOLSE-O1-NEXT: ldrb w9, [x0, w1, sxtw] -; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 ; CHECK-NOLSE-O1-NEXT: ldurb w10, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 -; CHECK-NOLSE-O1-NEXT: ldrb w9, [x11] -; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 -; CHECK-NOLSE-O1-NEXT: add w0, w8, w9 +; CHECK-NOLSE-O1-NEXT: add w8, w9, w8, uxtb +; CHECK-NOLSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldrb w9, [x9] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10, uxtb +; CHECK-NOLSE-O1-NEXT: add w0, w8, w9, uxtb ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_8: ; CHECK-OUTLINE-O1: ; %bb.0: ; CHECK-OUTLINE-O1-NEXT: ldrb w8, [x0, #4095] ; CHECK-OUTLINE-O1-NEXT: ldrb w9, [x0, w1, sxtw] -; CHECK-OUTLINE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 ; CHECK-OUTLINE-O1-NEXT: ldurb w10, [x0, #-256] -; CHECK-OUTLINE-O1-NEXT: add w8, w8, w9 -; CHECK-OUTLINE-O1-NEXT: ldrb w9, [x11] -; CHECK-OUTLINE-O1-NEXT: add w8, w8, w10 -; CHECK-OUTLINE-O1-NEXT: add w0, w8, w9 +; CHECK-OUTLINE-O1-NEXT: add w8, w9, w8, uxtb +; CHECK-OUTLINE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-OUTLINE-O1-NEXT: ldrb w9, [x9] +; CHECK-OUTLINE-O1-NEXT: add w8, w8, w10, uxtb +; CHECK-OUTLINE-O1-NEXT: add w0, w8, w9, uxtb ; CHECK-OUTLINE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8: @@ -1045,12 +1045,12 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 { ; CHECK-LSE-O1: ; %bb.0: ; CHECK-LSE-O1-NEXT: ldrb w8, [x0, #4095] ; CHECK-LSE-O1-NEXT: ldrb w9, [x0, w1, sxtw] -; CHECK-LSE-O1-NEXT: ldurb w10, [x0, #-256] -; CHECK-LSE-O1-NEXT: add w8, w8, w10 -; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: add w8, w9, w8, uxtb +; CHECK-LSE-O1-NEXT: ldurb w9, [x0, #-256] +; CHECK-LSE-O1-NEXT: add w8, w8, w9, uxtb ; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O1-NEXT: ldrb w9, [x9] -; CHECK-LSE-O1-NEXT: add w0, w8, w9 +; CHECK-LSE-O1-NEXT: add w0, w8, w9, uxtb ; CHECK-LSE-O1-NEXT: ret ; ; CHECK-LSE-O0-LABEL: atomic_load_relaxed_8: @@ -1089,24 +1089,24 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1: ; %bb.0: ; CHECK-NOLSE-O1-NEXT: ldrh w8, [x0, #8190] ; CHECK-NOLSE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] -; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 ; CHECK-NOLSE-O1-NEXT: ldurh w10, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 -; CHECK-NOLSE-O1-NEXT: ldrh w9, [x11] -; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 -; CHECK-NOLSE-O1-NEXT: add w0, w8, w9 +; CHECK-NOLSE-O1-NEXT: add w8, w9, w8, uxth +; CHECK-NOLSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x9] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10, uxth +; CHECK-NOLSE-O1-NEXT: add w0, w8, w9, uxth ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-OUTLINE-O1-LABEL: atomic_load_relaxed_16: ; CHECK-OUTLINE-O1: ; %bb.0: ; CHECK-OUTLINE-O1-NEXT: ldrh w8, [x0, #8190] ; CHECK-OUTLINE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] -; CHECK-OUTLINE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 ; CHECK-OUTLINE-O1-NEXT: ldurh w10, [x0, #-256] -; CHECK-OUTLINE-O1-NEXT: add w8, w8, w9 -; CHECK-OUTLINE-O1-NEXT: ldrh w9, [x11] -; CHECK-OUTLINE-O1-NEXT: add w8, w8, w10 -; CHECK-OUTLINE-O1-NEXT: add w0, w8, w9 +; CHECK-OUTLINE-O1-NEXT: add w8, w9, w8, uxth +; CHECK-OUTLINE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 +; CHECK-OUTLINE-O1-NEXT: ldrh w9, [x9] +; CHECK-OUTLINE-O1-NEXT: add w8, w8, w10, uxth +; CHECK-OUTLINE-O1-NEXT: add w0, w8, w9, uxth ; CHECK-OUTLINE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16: @@ -1141,12 +1141,12 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 { ; CHECK-LSE-O1: ; %bb.0: ; CHECK-LSE-O1-NEXT: ldrh w8, [x0, #8190] ; CHECK-LSE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] -; CHECK-LSE-O1-NEXT: ldurh w10, [x0, #-256] -; CHECK-LSE-O1-NEXT: add w8, w8, w10 -; CHECK-LSE-O1-NEXT: add w8, w8, w9 +; CHECK-LSE-O1-NEXT: add w8, w9, w8, uxth +; CHECK-LSE-O1-NEXT: ldurh w9, [x0, #-256] +; CHECK-LSE-O1-NEXT: add w8, w8, w9, uxth ; CHECK-LSE-O1-NEXT: add x9, x0, #291, lsl #12 ; =1191936 ; CHECK-LSE-O1-NEXT: ldrh w9, [x9] -; CHECK-LSE-O1-NEXT: add w0, w8, w9 +; CHECK-LSE-O1-NEXT: add w0, w8, w9, uxth ; CHECK-LSE-O1-NEXT: ret ; ; CHECK-LSE-O0-LABEL: atomic_load_relaxed_16: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 0a2d695acb4e0..29c320da6c0a7 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -27,22 +27,6 @@ define void @test_write_register_intrin() { @_ZTIi = external global ptr declare i32 @__gxx_personality_v0(...) -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %2:_(<2 x p0>) = G_INSERT_VECTOR_ELT %0:_, %{{[0-9]+}}:_(p0), %{{[0-9]+}}:_(s32) (in function: vector_of_pointers_insertelement) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for vector_of_pointers_insertelement -; FALLBACK-WITH-REPORT-OUT-LABEL: vector_of_pointers_insertelement: -define void @vector_of_pointers_insertelement() { - br label %end - -block: - %dummy = insertelement <2 x ptr> %vec, ptr null, i32 0 - store <2 x ptr> %dummy, ptr undef - ret void - -end: - %vec = load <2 x ptr>, ptr undef - br label %block -} - ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: RET_ReallyLR implicit $x0 (in function: strict_align_feature) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for strict_align_feature ; FALLBACK-WITH-REPORT-OUT-LABEL: strict_align_feature diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 92ddc6309546f..a131f35e66d03 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1538,7 +1538,8 @@ define <2 x i32> @test_insertelement(<2 x i32> %vec, i32 %elt, i32 %idx){ ; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY $d0 ; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY $w1 -; CHECK: [[RES:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[VEC]], [[ELT]](s32), [[IDX]](s32) +; CHECK: [[IDX2:%[0-9]+]]:_(s64) = G_ZEXT [[IDX]] +; CHECK: [[RES:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[VEC]], [[ELT]](s32), [[IDX2]](s64) ; CHECK: $d0 = COPY [[RES]](<2 x s32>) %res = insertelement <2 x i32> %vec, i32 %elt, i32 %idx ret <2 x i32> %res diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll index c7f3bcf640e38..c8d313cf31afd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -377,13 +377,13 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x0, 4095, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unsigned) - ; CHECK-NEXT: renamable $w9 = LDRBBroW renamable $x0, killed renamable $w1, 1, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_regoff) - ; CHECK-NEXT: renamable $w10 = LDURBBi renamable $x0, -256, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unscaled) - ; CHECK-NEXT: renamable $x11 = ADDXri killed renamable $x0, 291, 12 - ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 - ; CHECK-NEXT: renamable $w9 = LDRBBui killed renamable $x11, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_random) - ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w10, 0, pcsections !0 - ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 + ; CHECK-NEXT: renamable $w9 = LDRBBroW renamable $x0, killed renamable $w1, 1, 0 :: (load unordered (s8) from %ir.ptr_regoff) + ; CHECK-NEXT: renamable $w10 = LDURBBi renamable $x0, -256 :: (load monotonic (s8) from %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $w8 = ADDWrx killed renamable $w9, killed renamable $w8, 0, pcsections !0 + ; CHECK-NEXT: renamable $x9 = ADDXri killed renamable $x0, 291, 12 + ; CHECK-NEXT: renamable $w8 = ADDWrx killed renamable $w8, killed renamable $w10, 0, pcsections !0 + ; CHECK-NEXT: renamable $w9 = LDRBBui killed renamable $x9, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_random) + ; CHECK-NEXT: renamable $w0 = ADDWrx killed renamable $w8, killed renamable $w9, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 %ptr_unsigned = getelementptr i8, ptr %p, i32 4095 %val_unsigned = load atomic i8, ptr %ptr_unsigned monotonic, align 1, !pcsections !0 @@ -409,13 +409,13 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) { ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $w8 = LDRHHui renamable $x0, 4095, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unsigned) - ; CHECK-NEXT: renamable $w9 = LDRHHroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s16) from %ir.ptr_regoff) - ; CHECK-NEXT: renamable $w10 = LDURHHi renamable $x0, -256, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unscaled) - ; CHECK-NEXT: renamable $x11 = ADDXri killed renamable $x0, 291, 12 - ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 - ; CHECK-NEXT: renamable $w9 = LDRHHui killed renamable $x11, 0, pcsections !0 :: (load unordered (s16) from %ir.ptr_random) - ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w10, 0, pcsections !0 - ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 + ; CHECK-NEXT: renamable $w9 = LDRHHroW renamable $x0, killed renamable $w1, 1, 1 :: (load unordered (s16) from %ir.ptr_regoff) + ; CHECK-NEXT: renamable $w10 = LDURHHi renamable $x0, -256 :: (load monotonic (s16) from %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $w8 = ADDWrx killed renamable $w9, killed renamable $w8, 8, pcsections !0 + ; CHECK-NEXT: renamable $x9 = ADDXri killed renamable $x0, 291, 12 + ; CHECK-NEXT: renamable $w8 = ADDWrx killed renamable $w8, killed renamable $w10, 8, pcsections !0 + ; CHECK-NEXT: renamable $w9 = LDRHHui killed renamable $x9, 0, pcsections !0 :: (load unordered (s16) from %ir.ptr_random) + ; CHECK-NEXT: renamable $w0 = ADDWrx killed renamable $w8, killed renamable $w9, 8, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 %ptr_unsigned = getelementptr i16, ptr %p, i32 4095 %val_unsigned = load atomic i16, ptr %ptr_unsigned monotonic, align 2, !pcsections !0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/atomic-anyextending-load-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/atomic-anyextending-load-crash.ll new file mode 100644 index 0000000000000..4bb4e4882410d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/atomic-anyextending-load-crash.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel -global-isel-abort=1 -O0 -o - %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64e-apple-macosx14.0.0" + +define void @test(ptr %0) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #144 +; CHECK-NEXT: stp x29, x30, [sp, #128] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldar w8, [x0] +; CHECK-NEXT: str w8, [sp, #116] ; 4-byte Folded Spill +; CHECK-NEXT: mov x8, #0 ; =0x0 +; CHECK-NEXT: str x8, [sp, #120] ; 8-byte Folded Spill +; CHECK-NEXT: blr x8 +; CHECK-NEXT: ldr w11, [sp, #116] ; 4-byte Folded Reload +; CHECK-NEXT: ldr x8, [sp, #120] ; 8-byte Folded Reload +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: str xzr, [x9] +; CHECK-NEXT: str xzr, [x9, #8] +; CHECK-NEXT: str xzr, [x9, #16] +; CHECK-NEXT: str xzr, [x9, #24] +; CHECK-NEXT: str xzr, [x9, #32] +; CHECK-NEXT: str xzr, [x9, #40] +; CHECK-NEXT: ; implicit-def: $x10 +; CHECK-NEXT: mov x10, x11 +; CHECK-NEXT: str x10, [x9, #48] +; CHECK-NEXT: str xzr, [x9, #56] +; CHECK-NEXT: str xzr, [x9, #64] +; CHECK-NEXT: str xzr, [x9, #72] +; CHECK-NEXT: str xzr, [x9, #80] +; CHECK-NEXT: str xzr, [x9, #88] +; CHECK-NEXT: str xzr, [x9, #96] +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: blr x8 +; CHECK-NEXT: ldp x29, x30, [sp, #128] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #144 +; CHECK-NEXT: ret +entry: + %atomic-load = load atomic i32, ptr %0 seq_cst, align 4 + %call10 = call ptr null() + call void (ptr, ...) null(ptr null, ptr null, i32 0, ptr null, ptr null, i32 0, i32 0, i32 %atomic-load, i32 0, i32 0, i32 0, i32 0, i64 0, ptr null) + ret void +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll index ecd7c3ca71be5..0ff6ae28279f8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll @@ -8,10 +8,11 @@ declare void @test_explicit_sret(ptr sret(i64)) define void @can_tail_call_forwarded_explicit_sret_ptr(ptr sret(i64) %arg) { ; CHECK-LABEL: name: can_tail_call_forwarded_explicit_sret_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x8 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 - ; CHECK: $x8 = COPY [[COPY]](p0) - ; CHECK: TCRETURNdi @test_explicit_sret, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x8 + ; CHECK-NEXT: liveins: $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 + ; CHECK-NEXT: $x8 = COPY [[COPY]](p0) + ; CHECK-NEXT: TCRETURNdi @test_explicit_sret, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x8 tail call void @test_explicit_sret(ptr %arg) ret void } @@ -20,13 +21,14 @@ define void @can_tail_call_forwarded_explicit_sret_ptr(ptr sret(i64) %arg) { define void @test_call_explicit_sret(ptr sret(i64) %arg) { ; CHECK-LABEL: name: test_call_explicit_sret ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x8 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x8 = COPY [[COPY]](p0) - ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: liveins: $x8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x8 = COPY [[COPY]](p0) + ; CHECK-NEXT: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: RET_ReallyLR call void @test_explicit_sret(ptr %arg) ret void } @@ -34,12 +36,12 @@ define void @test_call_explicit_sret(ptr sret(i64) %arg) { define void @dont_tail_call_explicit_sret_alloca_unused() { ; CHECK-LABEL: name: dont_tail_call_explicit_sret_alloca_unused ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) - ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK-NEXT: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: RET_ReallyLR %l = alloca i64, align 8 tail call void @test_explicit_sret(ptr %l) ret void @@ -48,16 +50,17 @@ define void @dont_tail_call_explicit_sret_alloca_unused() { define void @dont_tail_call_explicit_sret_alloca_dummyusers(ptr %ptr) { ; CHECK-LABEL: name: dont_tail_call_explicit_sret_alloca_dummyusers ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.ptr) - ; CHECK: G_STORE [[LOAD]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %ir.l) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) - ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.ptr) + ; CHECK-NEXT: G_STORE [[LOAD]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %ir.l) + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK-NEXT: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: RET_ReallyLR %l = alloca i64, align 8 %r = load i64, ptr %ptr, align 8 store i64 %r, ptr %l, align 8 @@ -68,15 +71,16 @@ define void @dont_tail_call_explicit_sret_alloca_dummyusers(ptr %ptr) { define void @dont_tail_call_tailcall_explicit_sret_gep(ptr %ptr) { ; CHECK-LABEL: name: dont_tail_call_tailcall_explicit_sret_gep ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x8 = COPY [[PTR_ADD]](p0) - ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x8 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: RET_ReallyLR %ptr2 = getelementptr i64, ptr %ptr, i32 1 tail call void @test_explicit_sret(ptr %ptr2) ret void @@ -85,14 +89,14 @@ define void @dont_tail_call_tailcall_explicit_sret_gep(ptr %ptr) { define i64 @dont_tail_call_sret_alloca_returned() { ; CHECK-LABEL: name: dont_tail_call_sret_alloca_returned ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) - ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.l) - ; CHECK: $x0 = COPY [[LOAD]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK-NEXT: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.l) + ; CHECK-NEXT: $x0 = COPY [[LOAD]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %l = alloca i64, align 8 tail call void @test_explicit_sret(ptr %l) %r = load i64, ptr %l, align 8 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir index 2d36fb3df0338..93f6051c3bd3b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-build-vector.mir @@ -25,11 +25,11 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %zero:_(s32) = G_CONSTANT i32 0 - %one:_(s32) = G_CONSTANT i32 1 + %zero:_(s64) = G_CONSTANT i64 0 + %one:_(s64) = G_CONSTANT i64 1 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) - %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) + %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64) $x0 = COPY %extract(s64) $x1 = COPY %extract2(s64) RET_ReallyLR implicit $x0 @@ -55,22 +55,22 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) - ; CHECK-NEXT: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) + ; CHECK-NEXT: %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64) ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: $x1 = COPY %extract2(s64) ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %zero:_(s32) = G_CONSTANT i32 0 - %one:_(s32) = G_CONSTANT i32 1 + %zero:_(s64) = G_CONSTANT i64 0 + %one:_(s64) = G_CONSTANT i64 1 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) - %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) + %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64) $x0 = COPY %extract(s64) $x1 = COPY %extract2(s64) $q0 = COPY %bv(<2 x s64>) @@ -103,12 +103,12 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %zero:_(s32) = G_CONSTANT i32 0 - %one:_(s32) = G_CONSTANT i32 1 + %zero:_(s64) = G_CONSTANT i64 0 + %one:_(s64) = G_CONSTANT i64 1 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) - %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) - %extract3:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) + %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64) + %extract3:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64) $x0 = COPY %extract(s64) $x1 = COPY %extract2(s64) $x2 = COPY %extract3(s64) @@ -140,12 +140,12 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %zero:_(s32) = G_CONSTANT i32 0 - %one:_(s32) = G_CONSTANT i32 1 - %two:_(s32) = G_CONSTANT i32 2 + %zero:_(s64) = G_CONSTANT i64 0 + %one:_(s64) = G_CONSTANT i64 1 + %two:_(s64) = G_CONSTANT i64 2 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) - %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %two(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) + %extract2:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %two(s64) $x0 = COPY %extract(s64) $x1 = COPY %extract2(s64) RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-const-infinite-loop.mir similarity index 97% rename from llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir rename to llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-const-infinite-loop.mir index b145a6d3fd39d..6efdc5e5e8c36 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-const-infinite-loop.mir @@ -1,6 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 # RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner %s -o - \ # RUN: --aarch64prelegalizercombiner-disable-rule=constant_fold_binop | FileCheck %s +# REQUIRES: asserts # `constant_fold_binop` is disabled to trigger the infinite loop in `commute_int_constant_to_rhs`. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir index c2a38e26676cf..587d53c300f83 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir @@ -23,9 +23,9 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %zero:_(s32) = G_CONSTANT i32 0 + %zero:_(s64) = G_CONSTANT i64 0 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -55,10 +55,10 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %zero:_(s32) = G_CONSTANT i32 0 + %zero:_(s64) = G_CONSTANT i64 0 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) %truncbv:_(<2 x s32>) = G_TRUNC %bv - %extract:_(s32) = G_EXTRACT_VECTOR_ELT %truncbv(<2 x s32>), %zero(s32) + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %truncbv(<2 x s32>), %zero(s64) %zext:_(s64) = G_ZEXT %extract $x0 = COPY %zext(s64) RET_ReallyLR implicit $x0 @@ -87,9 +87,9 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %one:_(s32) = G_CONSTANT i32 1 + %one:_(s64) = G_CONSTANT i64 1 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %one(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -117,9 +117,9 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %idx:_(s32) = G_CONSTANT i32 4 + %idx:_(s64) = G_CONSTANT i64 4 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -148,9 +148,9 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $w0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %zero:_(s32) = G_CONSTANT i32 0 + %zero:_(s64) = G_CONSTANT i64 0 %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) - %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s32) + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64) $w0 = COPY %extract(s32) RET_ReallyLR implicit $w0 @@ -175,17 +175,17 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 - ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: $q0 = COPY %bv(<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %zero:_(s32) = G_CONSTANT i32 0 + %zero:_(s64) = G_CONSTANT i64 0 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %zero(s64) $x0 = COPY %extract(s64) $q0 = COPY %bv(<2 x s64>) RET_ReallyLR implicit $x0 @@ -212,14 +212,45 @@ body: | %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 %undef:_(<2 x s64>) = G_IMPLICIT_DEF - %zero:_(s32) = G_CONSTANT i32 0 - %one:_(s32) = G_CONSTANT i32 1 - %ins1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %arg1(s64), %zero(s32) - %ins2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %ins1, %arg2(s64), %one(s32) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %ins2(<2 x s64>), %zero(s32) + %zero:_(s64) = G_CONSTANT i64 0 + %one:_(s64) = G_CONSTANT i64 1 + %ins1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %arg1(s64), %zero(s64) + %ins2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %ins1, %arg2(s64), %one(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %ins2(<2 x s64>), %zero(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 +... +--- +# This test checks that this combine runs after the insertvec->build_vector +name: extract_from_insert2 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $q0, $x0, $x1 + ; CHECK-LABEL: name: extract_from_insert2 + ; CHECK: liveins: $q0, $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 + ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 + ; CHECK-NEXT: %ins2:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + ; CHECK-NEXT: $q0 = COPY %ins2(<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 + %arg0:_(<2 x s64>) = COPY $q0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %zero:_(s64) = G_CONSTANT i64 0 + %one:_(s64) = G_CONSTANT i64 1 + %ins1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %arg0, %arg1(s64), %zero(s64) + %ins2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %ins1, %arg2(s64), %one(s64) + $q0 = COPY %ins2(<2 x s64>) + RET_ReallyLR implicit $q0 + ... --- name: extract_from_idx_negative @@ -239,8 +270,8 @@ body: | ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = COPY $q0 - %idx:_(s32) = G_CONSTANT i32 -2 - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + %idx:_(s64) = G_CONSTANT i64 -2 + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -263,8 +294,8 @@ body: | ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = G_IMPLICIT_DEF - %idx:_(s32) = G_CONSTANT i32 -2 - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + %idx:_(s64) = G_CONSTANT i64 -2 + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -284,8 +315,8 @@ body: | ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = COPY $q0 - %idx:_(s32) = G_IMPLICIT_DEF - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + %idx:_(s64) = G_IMPLICIT_DEF + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -308,8 +339,8 @@ body: | ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = COPY $q0 - %idx:_(s32) = G_CONSTANT i32 3000 - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + %idx:_(s64) = G_CONSTANT i64 3000 + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -329,15 +360,15 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0 - ; CHECK-NEXT: %idx:_(s32) = COPY $w1 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + ; CHECK-NEXT: %idx:_(s64) = COPY $x1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s64) ; CHECK-NEXT: %extract:_(s64) = G_FREEZE [[EVEC]] ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = COPY $q0 - %idx:_(s32) = COPY $w1 + %idx:_(s64) = COPY $x1 %fvec:_(<2 x s64>) = G_FREEZE %vec - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %fvec(<2 x s64>), %idx(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %fvec(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -360,10 +391,10 @@ body: | ; CHECK-NEXT: $x0 = COPY %element(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = COPY $q0 - %idx:_(s32) = COPY $w1 + %idx:_(s64) = COPY $x1 %element:_(s64) = COPY $x1 - %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx(s32) + %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -383,16 +414,16 @@ body: | ; CHECK: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0 - ; CHECK-NEXT: %idx2:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx2(s32) + ; CHECK-NEXT: %idx2:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx2(s64) ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = COPY $q0 - %idx:_(s32) = G_CONSTANT i32 0 - %idx2:_(s32) = G_CONSTANT i32 1 + %idx:_(s64) = G_CONSTANT i64 0 + %idx2:_(s64) = G_CONSTANT i64 1 %element:_(s64) = COPY $x1 - %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx2(s32) + %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx2(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -411,19 +442,19 @@ body: | ; CHECK-LABEL: name: extract_from_build_vector_non_const ; CHECK: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %idx:_(s32) = COPY $w0 + ; CHECK-NEXT: %idx:_(s64) = COPY $x0 ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s64) ; CHECK-NEXT: $x0 = COPY %extract(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = COPY $q0 - %idx:_(s32) = COPY $w0 + %idx:_(s64) = COPY $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -446,11 +477,11 @@ body: | ; CHECK-NEXT: $x0 = COPY %arg1(s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %vec:_(<2 x s64>) = COPY $q0 - %idx:_(s32) = G_CONSTANT i32 0 + %idx:_(s64) = G_CONSTANT i64 0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 @@ -478,9 +509,9 @@ body: | %arg2:_(s64) = COPY $x1 %arg3:_(s64) = COPY $x0 %arg4:_(s64) = COPY $x1 - %idx:_(s32) = G_CONSTANT i32 0 + %idx:_(s64) = G_CONSTANT i64 0 %bv:_(<4 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64), %arg3(s64), %arg4(s64) - %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %idx(s32) + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %idx(s64) $w0 = COPY %extract(s32) RET_ReallyLR implicit $x0 ... @@ -500,16 +531,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 - ; CHECK-NEXT: %idx:_(s32) = COPY $w0 + ; CHECK-NEXT: %idx:_(s64) = COPY $x0 ; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) - ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32) + ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s64) ; CHECK-NEXT: $w0 = COPY %extract(s32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s64) = COPY $x0 %arg2:_(s64) = COPY $x1 - %idx:_(s32) = COPY $w0 + %idx:_(s64) = COPY $x0 %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) - %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32) + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s64) $w0 = COPY %extract(s32) RET_ReallyLR implicit $x0 ... @@ -533,9 +564,9 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $x0 %arg1:_(s128) = COPY $q0 %arg2:_(s128) = COPY $q1 - %idx:_(s32) = G_CONSTANT i32 0 + %idx:_(s64) = G_CONSTANT i64 0 %bv:_(<2 x s64>) = G_BUILD_VECTOR_TRUNC %arg1(s128), %arg2(s128) - %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s64) $x0 = COPY %extract(s64) RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir index fb072fbe97c19..63343dd8ad935 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir @@ -129,25 +129,28 @@ body: | ; CHECK-LABEL: name: dont_apply_vector ; CHECK: liveins: $x0 - ; CHECK: %x:_(<2 x s32>) = COPY $x0 - ; CHECK: %one:_(s32) = G_CONSTANT i32 1 - ; CHECK: %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one(s32), %one(s32) - ; CHECK: %vec_and:_(<2 x s32>) = G_AND %x, %one_vec - ; CHECK: %zero:_(s32) = G_CONSTANT i32 0 - ; CHECK: %zero_vec:_(<2 x s32>) = G_BUILD_VECTOR %zero(s32), %zero(s32) - ; CHECK: %cmp:_(<2 x s1>) = G_ICMP intpred(ne), %vec_and(<2 x s32>), %zero_vec - ; CHECK: %elt:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %zero(s32) - ; CHECK: %ext:_(s32) = G_ZEXT %elt(s1) - ; CHECK: $w0 = COPY %ext(s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(<2 x s32>) = COPY $x0 + ; CHECK-NEXT: %one:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one(s32), %one(s32) + ; CHECK-NEXT: %vec_and:_(<2 x s32>) = G_AND %x, %one_vec + ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: %zero64:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %zero_vec:_(<2 x s32>) = G_BUILD_VECTOR %zero(s32), %zero(s32) + ; CHECK-NEXT: %cmp:_(<2 x s1>) = G_ICMP intpred(ne), %vec_and(<2 x s32>), %zero_vec + ; CHECK-NEXT: %elt:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %zero64(s64) + ; CHECK-NEXT: %ext:_(s32) = G_ZEXT %elt(s1) + ; CHECK-NEXT: $w0 = COPY %ext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(<2 x s32>) = COPY $x0 %one:_(s32) = G_CONSTANT i32 1 %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one, %one %vec_and:_(<2 x s32>) = G_AND %x, %one_vec %zero:_(s32) = G_CONSTANT i32 0 + %zero64:_(s64) = G_CONSTANT i64 0 %zero_vec:_(<2 x s32>) = G_BUILD_VECTOR %zero, %zero %cmp:_(<2 x s1>) = G_ICMP intpred(ne), %vec_and(<2 x s32>), %zero_vec - %elt:_(s1) = G_EXTRACT_VECTOR_ELT %cmp, %zero + %elt:_(s1) = G_EXTRACT_VECTOR_ELT %cmp, %zero64 %ext:_(s32) = G_ZEXT %elt(s1) $w0 = COPY %ext(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir index 254467192fb39..06fb2ce161c20 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir @@ -16,10 +16,10 @@ body: | %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(<2 x s32>) = G_IMPLICIT_DEF - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s32) = G_CONSTANT i32 1 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32) - %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s32) + %7:_(s64) = G_CONSTANT i64 0 + %8:_(s64) = G_CONSTANT i64 1 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s64) + %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s64) $x0 = COPY %4 ... --- @@ -38,10 +38,10 @@ body: | %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(<2 x s32>) = G_IMPLICIT_DEF - %7:_(s32) = G_CONSTANT i32 1 - %8:_(s32) = G_CONSTANT i32 0 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32) - %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s32) + %7:_(s64) = G_CONSTANT i64 1 + %8:_(s64) = G_CONSTANT i64 0 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s64) + %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s64) $x0 = COPY %4 ... --- @@ -63,8 +63,8 @@ body: | %6:_(s32) = COPY $w2 %7:_(s32) = COPY $w3 %2:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %6, %7 - %3:_(s32) = G_CONSTANT i32 1 - %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 1 + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64) $q0 = COPY %4 ... --- @@ -83,33 +83,33 @@ body: | %6:_(s32) = COPY $w2 %7:_(s32) = COPY $w3 %2:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %6, %7 - %3:_(s32) = G_CONSTANT i32 4 - %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 4 + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64) $q0 = COPY %4 ... --- name: test_combine_insert_vec_build_vec_variable body: | bb.1: - liveins: $w0, $w1, $w2, $w3 + liveins: $x0, $w1, $w2, $w3 ; CHECK-LABEL: name: test_combine_insert_vec_build_vec_variable - ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK: liveins: $x0, $w1, $w2, $w3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $w3 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[COPY]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[COPY1]](s32), [[COPY]](s64) ; CHECK-NEXT: $q0 = COPY [[IVEC]](<4 x s32>) - %0:_(s32) = COPY $w0 + %0:_(s64) = COPY $x0 %1:_(s32) = COPY $w1 %6:_(s32) = COPY $w2 %7:_(s32) = COPY $w3 - %2:_(<4 x s32>) = G_BUILD_VECTOR %0, %1, %6, %7 - %3:_(s32) = COPY %0 - %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %2:_(<4 x s32>) = G_BUILD_VECTOR %1, %1, %6, %7 + %3:_(s64) = COPY %0 + %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %1(s32), %3(s64) $q0 = COPY %4 ... --- @@ -128,11 +128,11 @@ body: | %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(<2 x s32>) = G_IMPLICIT_DEF - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s32) = G_CONSTANT i32 1 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32) - %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s32) - %5:_(<2 x s32>) = G_INSERT_VECTOR_ELT %4, %1(s32), %8(s32) + %7:_(s64) = G_CONSTANT i64 0 + %8:_(s64) = G_CONSTANT i64 1 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s64) + %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %8(s64) + %5:_(<2 x s32>) = G_INSERT_VECTOR_ELT %4, %1(s32), %8(s64) $x0 = COPY %5 ... --- @@ -150,11 +150,11 @@ body: | %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(<2 x s32>) = G_IMPLICIT_DEF - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s32) = G_CONSTANT i32 1 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32) - %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %7(s32) - %5:_(<2 x s32>) = G_INSERT_VECTOR_ELT %4, %1(s32), %8(s32) + %7:_(s64) = G_CONSTANT i64 0 + %8:_(s64) = G_CONSTANT i64 1 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s64) + %4:_(<2 x s32>) = G_INSERT_VECTOR_ELT %3, %1(s32), %7(s64) + %5:_(<2 x s32>) = G_INSERT_VECTOR_ELT %4, %1(s32), %8(s64) $x0 = COPY %5 ... --- @@ -174,12 +174,12 @@ body: | %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(<4 x s32>) = G_IMPLICIT_DEF - %7:_(s32) = G_CONSTANT i32 0 - %8:_(s32) = G_CONSTANT i32 2 - %9:_(s32) = G_CONSTANT i32 3 - %10:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s32) - %11:_(<4 x s32>) = G_INSERT_VECTOR_ELT %10, %1(s32), %8(s32) - %12:_(<4 x s32>) = G_INSERT_VECTOR_ELT %11, %0(s32), %9(s32) + %7:_(s64) = G_CONSTANT i64 0 + %8:_(s64) = G_CONSTANT i64 2 + %9:_(s64) = G_CONSTANT i64 3 + %10:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %7(s64) + %11:_(<4 x s32>) = G_INSERT_VECTOR_ELT %10, %1(s32), %8(s64) + %12:_(<4 x s32>) = G_INSERT_VECTOR_ELT %11, %0(s32), %9(s64) $q0 = COPY %12 ... --- diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll b/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll index 75865695ea200..5e667eba741aa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/constant-dbg-loc.ll @@ -10,24 +10,29 @@ target triple = "arm64-apple-ios5.0.0" define i32 @main() #0 !dbg !14 { ; CHECK-LABEL: name: main ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval - ; CHECK: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.retval) - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0), debug-location !17 :: (dereferenceable load (s32) from @var1) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C1]], debug-location !19 - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.2, debug-location !20 - ; CHECK: G_BR %bb.3, debug-location !20 - ; CHECK: bb.2.if.then: - ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: G_STORE [[C2]](s32), [[GV1]](p0), debug-location !21 :: (store (s32) into @var2) - ; CHECK: bb.3.if.end: - ; CHECK: $w0 = COPY [[C]](s32), debug-location !24 - ; CHECK: RET_ReallyLR implicit $w0, debug-location !24 + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.retval + ; CHECK-NEXT: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %ir.retval) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV]](p0), debug-location !17 :: (dereferenceable load (s32) from @var1) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C1]], debug-location !19 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2, debug-location !20 + ; CHECK-NEXT: G_BR %bb.3, debug-location !20 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.then: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: G_STORE [[C2]](s32), [[GV1]](p0), debug-location !21 :: (store (s32) into @var2) + ; CHECK-NEXT: G_BR %bb.3, debug-location !23 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.end: + ; CHECK-NEXT: $w0 = COPY [[C]](s32), debug-location !24 + ; CHECK-NEXT: RET_ReallyLR implicit $w0, debug-location !24 entry: %retval = alloca i32, align 4 store i32 0, ptr %retval, align 4 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll index f9f92b9e21900..baed1263008da 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-store-metadata.ll @@ -4,11 +4,12 @@ define void @store_nontemporal(ptr dereferenceable(4) %ptr) { ; CHECK-LABEL: name: store_nontemporal ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (non-temporal store (s32) into %ir.ptr) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (non-temporal store (s32) into %ir.ptr) + ; CHECK-NEXT: RET_ReallyLR store i32 0, ptr %ptr, align 4, !nontemporal !0 ret void } @@ -16,11 +17,12 @@ define void @store_nontemporal(ptr dereferenceable(4) %ptr) { define void @store_dereferenceable(ptr dereferenceable(4) %ptr) { ; CHECK-LABEL: name: store_dereferenceable ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.ptr) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.ptr) + ; CHECK-NEXT: RET_ReallyLR store i32 0, ptr %ptr, align 4 ret void } @@ -28,11 +30,12 @@ define void @store_dereferenceable(ptr dereferenceable(4) %ptr) { define void @store_volatile_dereferenceable(ptr dereferenceable(4) %ptr) { ; CHECK-LABEL: name: store_volatile_dereferenceable ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: (volatile store (s32) into %ir.ptr) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (volatile store (s32) into %ir.ptr) + ; CHECK-NEXT: RET_ReallyLR store volatile i32 0, ptr %ptr, align 4 ret void } @@ -40,11 +43,12 @@ define void @store_volatile_dereferenceable(ptr dereferenceable(4) %ptr) { define void @store_falkor_strided_access(ptr %ptr) { ; CHECK-LABEL: name: store_falkor_strided_access ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: G_STORE [[C]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store (s32) into %ir.ptr) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store (s32) into %ir.ptr) + ; CHECK-NEXT: RET_ReallyLR store i32 0, ptr %ptr, align 4, !falkor.strided.access !0 ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index 542cf018a6c00..5883da137a240 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -352,8 +352,8 @@ body: | %rhs:_(<3 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32) %lhs:_(<3 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32) %cmp:_(<3 x s32>) = G_ICMP intpred(eq), %lhs(<3 x s32>), %rhs - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s32>), %1(s32) + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s32>), %1(s64) $w0 = COPY %2(s32) RET_ReallyLR ... @@ -386,8 +386,8 @@ body: | %rhs:_(<3 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16) %lhs:_(<3 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16) %cmp:_(<3 x s16>) = G_ICMP intpred(eq), %lhs(<3 x s16>), %rhs - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s16) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s16>), %1(s32) + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s16>), %1(s64) %zext:_(s32) = G_ZEXT %2(s16) $w0 = COPY %zext(s32) RET_ReallyLR @@ -422,8 +422,8 @@ body: | %rhs:_(<3 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8) %lhs:_(<3 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8) %cmp:_(<3 x s8>) = G_ICMP intpred(eq), %lhs(<3 x s8>), %rhs - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s8) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s8>), %1(s32) + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s8) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s8>), %1(s64) %zext:_(s32) = G_ZEXT %2(s8) $w0 = COPY %zext(s32) RET_ReallyLR @@ -449,8 +449,8 @@ body: | %rhs:_(<3 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64), %const(s64) %lhs:_(<3 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64), %const(s64) %cmp:_(<3 x s64>) = G_ICMP intpred(eq), %lhs(<3 x s64>), %rhs - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s64>), %1(s32) + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s64) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s64>), %1(s64) $x0 = COPY %2(s64) RET_ReallyLR ... @@ -475,8 +475,8 @@ body: | %rhs:_(<5 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32), %const(s32) %lhs:_(<5 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32), %const(s32) %cmp:_(<5 x s32>) = G_ICMP intpred(eq), %lhs(<5 x s32>), %rhs - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<5 x s32>), %1(s32) + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<5 x s32>), %1(s64) $w0 = COPY %2(s32) RET_ReallyLR ... @@ -502,8 +502,8 @@ body: | %rhs:_(<7 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16) %lhs:_(<7 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16) %cmp:_(<7 x s16>) = G_ICMP intpred(eq), %lhs(<7 x s16>), %rhs - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s16) = G_EXTRACT_VECTOR_ELT %cmp(<7 x s16>), %1(s32) + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s16) = G_EXTRACT_VECTOR_ELT %cmp(<7 x s16>), %1(s64) %zext:_(s32) = G_ZEXT %2(s16) $w0 = COPY %zext(s32) RET_ReallyLR @@ -530,8 +530,8 @@ body: | %rhs:_(<9 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) %lhs:_(<9 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8) %cmp:_(<9 x s8>) = G_ICMP intpred(eq), %lhs(<9 x s8>), %rhs - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s8) = G_EXTRACT_VECTOR_ELT %cmp(<9 x s8>), %1(s32) + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s8) = G_EXTRACT_VECTOR_ELT %cmp(<9 x s8>), %1(s64) %zext:_(s32) = G_ZEXT %2(s8) $w0 = COPY %zext(s32) RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir index 2209287284b71..c03f51a89dfbd 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir @@ -13,8 +13,8 @@ body: | ; CHECK-NEXT: $x0 = COPY [[EVEC]](s64) ; CHECK-NEXT: RET_ReallyLR %0:_(<2 x s64>) = COPY $q0 - %1:_(s32) = G_CONSTANT i32 1 - %2:_(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %1(s32) + %1:_(s64) = G_CONSTANT i64 1 + %2:_(s64) = G_EXTRACT_VECTOR_ELT %0(<2 x s64>), %1(s64) $x0 = COPY %2(s64) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir index d3db2432e84cb..a74bf9a5438b6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir @@ -9,16 +9,16 @@ body: | ; CHECK: liveins: $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[C1]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[C1]](s32), [[C]](s64) ; CHECK-NEXT: $d0 = COPY [[IVEC]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %1:_(<2 x s32>) = COPY $d0 %0:_(<2 x s16>) = G_TRUNC %1(<2 x s32>) - %4:_(s32) = G_CONSTANT i32 0 + %4:_(s64) = G_CONSTANT i64 0 %3:_(s16) = G_CONSTANT i16 1 - %2:_(<2 x s16>) = G_INSERT_VECTOR_ELT %0, %3(s16), %4(s32) + %2:_(<2 x s16>) = G_INSERT_VECTOR_ELT %0, %3(s16), %4(s64) %5:_(<2 x s32>) = G_ANYEXT %2(<2 x s16>) $d0 = COPY %5(<2 x s32>) RET_ReallyLR implicit $d0 @@ -32,16 +32,16 @@ body: | ; CHECK: liveins: $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[C1]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[C1]](s32), [[C]](s64) ; CHECK-NEXT: $d0 = COPY [[IVEC]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %1:_(<2 x s32>) = COPY $d0 %0:_(<2 x s8>) = G_TRUNC %1(<2 x s32>) - %4:_(s32) = G_CONSTANT i32 0 + %4:_(s64) = G_CONSTANT i64 0 %3:_(s8) = G_CONSTANT i8 1 - %2:_(<2 x s8>) = G_INSERT_VECTOR_ELT %0, %3(s8), %4(s32) + %2:_(<2 x s8>) = G_INSERT_VECTOR_ELT %0, %3(s8), %4(s64) %5:_(<2 x s32>) = G_ANYEXT %2(<2 x s8>) $d0 = COPY %5(<2 x s32>) RET_ReallyLR implicit $d0 @@ -55,16 +55,16 @@ body: | ; CHECK: liveins: $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], [[C1]](s16), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], [[C1]](s16), [[C]](s64) ; CHECK-NEXT: $d0 = COPY [[IVEC]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %1:_(<4 x s16>) = COPY $d0 %0:_(<4 x s8>) = G_TRUNC %1(<4 x s16>) - %4:_(s32) = G_CONSTANT i32 0 + %4:_(s64) = G_CONSTANT i64 0 %3:_(s8) = G_CONSTANT i8 1 - %2:_(<4 x s8>) = G_INSERT_VECTOR_ELT %0, %3(s8), %4(s32) + %2:_(<4 x s8>) = G_INSERT_VECTOR_ELT %0, %3(s8), %4(s64) %5:_(<4 x s16>) = G_ANYEXT %2(<4 x s8>) $d0 = COPY %5(<4 x s16>) RET_ReallyLR implicit $d0 @@ -78,15 +78,15 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %val:_(s8) = G_CONSTANT i8 42 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<8 x s8>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s8), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<8 x s8>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s8), [[C]](s64) ; CHECK-NEXT: $d0 = COPY [[IVEC]](<8 x s8>) ; CHECK-NEXT: RET_ReallyLR %0:_(<8 x s8>) = COPY $d0 - %1:_(s32) = G_CONSTANT i32 1 + %1:_(s64) = G_CONSTANT i64 1 %val:_(s8) = G_CONSTANT i8 42 - %2:_(<8 x s8>) = G_INSERT_VECTOR_ELT %0(<8 x s8>), %val(s8), %1(s32) + %2:_(<8 x s8>) = G_INSERT_VECTOR_ELT %0(<8 x s8>), %val(s8), %1(s64) $d0 = COPY %2(<8 x s8>) RET_ReallyLR ... @@ -99,15 +99,15 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %val:_(s8) = G_CONSTANT i8 42 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<16 x s8>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s8), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<16 x s8>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s8), [[C]](s64) ; CHECK-NEXT: $q0 = COPY [[IVEC]](<16 x s8>) ; CHECK-NEXT: RET_ReallyLR %0:_(<16 x s8>) = COPY $q0 - %1:_(s32) = G_CONSTANT i32 1 + %1:_(s64) = G_CONSTANT i64 1 %val:_(s8) = G_CONSTANT i8 42 - %2:_(<16 x s8>) = G_INSERT_VECTOR_ELT %0(<16 x s8>), %val(s8), %1(s32) + %2:_(<16 x s8>) = G_INSERT_VECTOR_ELT %0(<16 x s8>), %val(s8), %1(s64) $q0 = COPY %2(<16 x s8>) RET_ReallyLR ... @@ -120,15 +120,15 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %val:_(s16) = G_CONSTANT i16 42 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s16), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s16), [[C]](s64) ; CHECK-NEXT: $d0 = COPY [[IVEC]](<4 x s16>) ; CHECK-NEXT: RET_ReallyLR %0:_(<4 x s16>) = COPY $d0 - %1:_(s32) = G_CONSTANT i32 1 + %1:_(s64) = G_CONSTANT i64 1 %val:_(s16) = G_CONSTANT i16 42 - %2:_(<4 x s16>) = G_INSERT_VECTOR_ELT %0(<4 x s16>), %val(s16), %1(s32) + %2:_(<4 x s16>) = G_INSERT_VECTOR_ELT %0(<4 x s16>), %val(s16), %1(s64) $d0 = COPY %2(<4 x s16>) RET_ReallyLR ... @@ -141,15 +141,15 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %val:_(s16) = G_CONSTANT i16 42 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<8 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s16), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<8 x s16>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s16), [[C]](s64) ; CHECK-NEXT: $q0 = COPY [[IVEC]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR %0:_(<8 x s16>) = COPY $q0 - %1:_(s32) = G_CONSTANT i32 1 + %1:_(s64) = G_CONSTANT i64 1 %val:_(s16) = G_CONSTANT i16 42 - %2:_(<8 x s16>) = G_INSERT_VECTOR_ELT %0(<8 x s16>), %val(s16), %1(s32) + %2:_(<8 x s16>) = G_INSERT_VECTOR_ELT %0(<8 x s16>), %val(s16), %1(s64) $q0 = COPY %2(<8 x s16>) RET_ReallyLR ... @@ -162,15 +162,15 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %val:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s32), [[C]](s64) ; CHECK-NEXT: $d0 = COPY [[IVEC]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR %0:_(<2 x s32>) = COPY $d0 - %1:_(s32) = G_CONSTANT i32 1 + %1:_(s64) = G_CONSTANT i64 1 %val:_(s32) = G_CONSTANT i32 42 - %2:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0(<2 x s32>), %val(s32), %1(s32) + %2:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0(<2 x s32>), %val(s32), %1(s64) $d0 = COPY %2(<2 x s32>) RET_ReallyLR ... @@ -183,15 +183,15 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %val:_(s32) = G_CONSTANT i32 42 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s32), [[C]](s64) ; CHECK-NEXT: $q0 = COPY [[IVEC]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR %0:_(<4 x s32>) = COPY $q0 - %1:_(s32) = G_CONSTANT i32 1 + %1:_(s64) = G_CONSTANT i64 1 %val:_(s32) = G_CONSTANT i32 42 - %2:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0(<4 x s32>), %val(s32), %1(s32) + %2:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0(<4 x s32>), %val(s32), %1(s64) $q0 = COPY %2(<4 x s32>) RET_ReallyLR ... @@ -204,15 +204,15 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: %val:_(s64) = G_CONSTANT i64 42 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s64), [[C]](s32) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[COPY]], %val(s64), [[C]](s64) ; CHECK-NEXT: $q0 = COPY [[IVEC]](<2 x s64>) ; CHECK-NEXT: RET_ReallyLR %0:_(<2 x s64>) = COPY $q0 - %1:_(s32) = G_CONSTANT i32 1 + %1:_(s64) = G_CONSTANT i64 1 %val:_(s64) = G_CONSTANT i64 42 - %2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %0(<2 x s64>), %val(s64), %1(s32) + %2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %0(<2 x s64>), %val(s64), %1(s64) $q0 = COPY %2(<2 x s64>) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir index 8803da265aa11..8bd62c5922541 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -782,8 +782,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s32>) = G_PHI [[BUILD_VECTOR1]](<4 x s32>), %bb.1, [[BUILD_VECTOR]](<4 x s32>), %bb.0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT [[PHI]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT [[PHI]](<4 x s32>), %one(s64) ; CHECK-NEXT: $w0 = COPY %extract(s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.0: @@ -797,8 +797,8 @@ body: | %val_2:_(<8 x s32>) = G_IMPLICIT_DEF bb.2: %phi:_(<8 x s32>) = G_PHI %val_2(<8 x s32>), %bb.1, %val_1(<8 x s32>), %bb.0 - %one:_(s32) = G_CONSTANT i32 1 - %extract:_(s32) = G_EXTRACT_VECTOR_ELT %phi(<8 x s32>), %one(s32) + %one:_(s64) = G_CONSTANT i64 1 + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %phi(<8 x s32>), %one(s64) $w0 = COPY %extract RET_ReallyLR implicit $w0 ... @@ -828,8 +828,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<8 x s16>) = G_PHI [[BUILD_VECTOR1]](<8 x s16>), %bb.1, [[BUILD_VECTOR]](<8 x s16>), %bb.0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: %extract:_(s16) = G_EXTRACT_VECTOR_ELT [[PHI]](<8 x s16>), [[C1]](s64) + ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %extract:_(s16) = G_EXTRACT_VECTOR_ELT [[PHI]](<8 x s16>), %one(s64) ; CHECK-NEXT: $h0 = COPY %extract(s16) ; CHECK-NEXT: RET_ReallyLR implicit $h0 bb.0: @@ -843,8 +843,8 @@ body: | %val_2:_(<16 x s16>) = G_IMPLICIT_DEF bb.2: %phi:_(<16 x s16>) = G_PHI %val_2(<16 x s16>), %bb.1, %val_1(<16 x s16>), %bb.0 - %one:_(s16) = G_CONSTANT i16 1 - %extract:_(s16) = G_EXTRACT_VECTOR_ELT %phi(<16 x s16>), %one(s16) + %one:_(s64) = G_CONSTANT i64 1 + %extract:_(s16) = G_EXTRACT_VECTOR_ELT %phi(<16 x s16>), %one(s64) $h0 = COPY %extract RET_ReallyLR implicit $h0 ... @@ -874,8 +874,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s8>) = G_PHI [[BUILD_VECTOR1]](<16 x s8>), %bb.1, [[BUILD_VECTOR]](<16 x s8>), %bb.0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: %extract:_(s8) = G_EXTRACT_VECTOR_ELT [[PHI]](<16 x s8>), [[C1]](s64) + ; CHECK-NEXT: %one:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: %extract:_(s8) = G_EXTRACT_VECTOR_ELT [[PHI]](<16 x s8>), %one(s64) ; CHECK-NEXT: $b0 = COPY %extract(s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 bb.0: @@ -889,8 +889,8 @@ body: | %val_2:_(<32 x s8>) = G_IMPLICIT_DEF bb.2: %phi:_(<32 x s8>) = G_PHI %val_2(<32 x s8>), %bb.1, %val_1(<32 x s8>), %bb.0 - %one:_(s8) = G_CONSTANT i8 1 - %extract:_(s8) = G_EXTRACT_VECTOR_ELT %phi(<32 x s8>), %one(s8) + %one:_(s64) = G_CONSTANT i64 1 + %extract:_(s8) = G_EXTRACT_VECTOR_ELT %phi(<32 x s8>), %one(s64) $b0 = COPY %extract RET_ReallyLR implicit $b0 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir index 499c08fa4966f..7921de6ce2362 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir @@ -15,7 +15,7 @@ define void @mul_wrong_pow_2(ptr %addr) { ret void } define void @more_than_one_use_shl_1(ptr %addr) { ret void } define void @more_than_one_use_shl_2(ptr %addr) { ret void } - define void @more_than_one_use_shl_lsl_fast(ptr %addr) #1 { ret void } + define void @more_than_one_use_shl_lsl_fast(ptr %addr) { ret void } define void @more_than_one_use_shl_lsl_slow(ptr %addr) { ret void } define void @more_than_one_use_shl_minsize(ptr %addr) #0 { ret void } define void @ldrwrox(ptr %addr) { ret void } @@ -24,7 +24,6 @@ define void @ldbbrox(ptr %addr) { ret void } define void @ldrqrox(ptr %addr) { ret void } attributes #0 = { optsize } - attributes #1 = { "target-features"="+addr-lsl-fast" } ... --- @@ -478,11 +477,10 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3 - ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) - ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) - ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]] ; CHECK-NEXT: $x2 = COPY [[ADDXrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir index f4374feadcdf3..9d12c3c32c7f8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir @@ -19,8 +19,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 %2:_(<4 x s32>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64) %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -44,8 +44,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $x0 %2:_(<2 x s64>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64) %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -69,8 +69,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $w0 %2:_(<2 x s32>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64) %4:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) $d0 = COPY %4(<2 x s32>) RET_ReallyLR implicit $d0 @@ -94,8 +94,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $s0 %2:_(<4 x s32>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64) %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, 0, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -119,8 +119,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $d0 %2:_(<2 x s64>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64) %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -144,8 +144,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $s0 %2:_(<2 x s32>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64) %4:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2, shufflemask(0, 0) $d0 = COPY %4(<2 x s32>) RET_ReallyLR implicit $d0 @@ -172,8 +172,8 @@ body: | %0:_(s64) = COPY $d0 %2:_(<2 x s64>) = G_IMPLICIT_DEF %6:_(<2 x s64>) = COPY %2 - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %6, %0(s64), %3(s64) %7:_(<2 x s64>) = COPY %1 %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %7(<2 x s64>), %2, shufflemask(0, 0) $q0 = COPY %4(<2 x s64>) @@ -194,15 +194,15 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY [[IVEC]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[COPY1]](<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $x0 %2:_(<2 x s64>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64) %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(0, 1) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -230,8 +230,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $x0 %2:_(<2 x s64>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<2 x s64>) = G_INSERT_VECTOR_ELT %2, %0(s64), %3(s64) %4:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %2, shufflemask(-1, -1) $q0 = COPY %4(<2 x s64>) RET_ReallyLR implicit $q0 @@ -258,8 +258,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $s0 %2:_(<4 x s32>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64) %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(0, -1, 0, 0) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -273,22 +273,20 @@ tracksRegLiveness: true body: | bb.1.entry: liveins: $s0 - ; Check a non-splat mask with an undef value. We shouldn't get a G_DUP here. - ; ; CHECK-LABEL: name: not_all_zeros_with_undefs ; CHECK: liveins: $s0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s64) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 3) ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $s0 %2:_(<4 x s32>) = G_IMPLICIT_DEF - %3:_(s32) = G_CONSTANT i32 0 - %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %1:_(<4 x s32>) = G_INSERT_VECTOR_ELT %2, %0(s32), %3(s64) %4:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2, shufflemask(-1, 0, 0, 3) $q0 = COPY %4(<4 x s32>) RET_ReallyLR implicit $q0 @@ -311,8 +309,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $d0 %copy:_(s16) = COPY $h0 %undef:_(<4 x s16>) = G_IMPLICIT_DEF - %cst:_(s32) = G_CONSTANT i32 0 - %ins:_(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s32) + %cst:_(s64) = G_CONSTANT i64 0 + %ins:_(<4 x s16>) = G_INSERT_VECTOR_ELT %undef, %copy(s16), %cst(s64) %splat:_(<4 x s16>) = G_SHUFFLE_VECTOR %ins(<4 x s16>), %undef, shufflemask(0, 0, 0, 0) $d0 = COPY %splat(<4 x s16>) RET_ReallyLR implicit $d0 @@ -335,8 +333,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $d0 %copy:_(s32) = COPY $w0 %undef:_(<8 x s8>) = G_IMPLICIT_DEF - %cst:_(s32) = G_CONSTANT i32 0 - %ins:_(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s32) + %cst:_(s64) = G_CONSTANT i64 0 + %ins:_(<8 x s8>) = G_INSERT_VECTOR_ELT %undef, %copy(s32), %cst(s64) %splat:_(<8 x s8>) = G_SHUFFLE_VECTOR %ins(<8 x s8>), %undef, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) $d0 = COPY %splat(<8 x s8>) RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-icmp-to-true-false-known-bits.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-icmp-to-true-false-known-bits.mir index 7666b2fb8368f..8adf5b2d26bfa 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-icmp-to-true-false-known-bits.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-icmp-to-true-false-known-bits.mir @@ -534,18 +534,20 @@ body: | liveins: $x0 ; CHECK-LABEL: name: vector_true ; CHECK: liveins: $x0 - ; CHECK: %cst:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: %cmp:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1) - ; CHECK: %extract:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %cst(s32) - ; CHECK: %extract_ext:_(s32) = G_ZEXT %extract(s1) - ; CHECK: $w0 = COPY %extract_ext(s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst64:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: %cmp:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1) + ; CHECK-NEXT: %extract:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %cst64(s64) + ; CHECK-NEXT: %extract_ext:_(s32) = G_ZEXT %extract(s1) + ; CHECK-NEXT: $w0 = COPY %extract_ext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 1 + %cst64:_(s64) = G_CONSTANT i64 1 %bv:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst %cmp:_(<2 x s1>) = G_ICMP intpred(eq), %bv(<2 x s32>), %bv - %extract:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %cst(s32) + %extract:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %cst64(s64) %extract_ext:_(s32) = G_ZEXT %extract(s1) $w0 = COPY %extract_ext(s32) RET_ReallyLR implicit $w0 @@ -559,18 +561,20 @@ body: | liveins: $x0 ; CHECK-LABEL: name: vector_false ; CHECK: liveins: $x0 - ; CHECK: %cst:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; CHECK: %cmp:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1) - ; CHECK: %extract:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %cst(s32) - ; CHECK: %extract_ext:_(s32) = G_ZEXT %extract(s1) - ; CHECK: $w0 = COPY %extract_ext(s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst64:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: %cmp:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1) + ; CHECK-NEXT: %extract:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %cst64(s64) + ; CHECK-NEXT: %extract_ext:_(s32) = G_ZEXT %extract(s1) + ; CHECK-NEXT: $w0 = COPY %extract_ext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %ptr:_(p0) = COPY $x0 %cst:_(s32) = G_CONSTANT i32 1 + %cst64:_(s64) = G_CONSTANT i64 1 %bv:_(<2 x s32>) = G_BUILD_VECTOR %cst, %cst %cmp:_(<2 x s1>) = G_ICMP intpred(ne), %bv(<2 x s32>), %bv - %extract:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %cst(s32) + %extract:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %cst64(s64) %extract_ext:_(s32) = G_ZEXT %extract(s1) $w0 = COPY %extract_ext(s32) RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir index e77fac19e0a78..2ffb785680685 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir @@ -57,11 +57,11 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_reg_output - ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_3b_and_PPR2_with_psub1_in_PPR_p8to15 */, def %0 + ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 - INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:GPR32common */, def %0:gpr32common + INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0:gpr32common %1:_(s32) = COPY %0 $w0 = COPY %1(s32) RET_ReallyLR implicit $w0 @@ -75,12 +75,12 @@ tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_mixed_types - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_3b_and_PPR2_with_psub1_in_PPR_p8to15 */, def %0, 2162698 /* regdef:WSeqPairsClass */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0, {{[0-9]+}} /* regdef:FPR64 */, def %1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1 ; CHECK-NEXT: $d0 = COPY [[COPY1]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:GPR32common */, def %0:gpr32common, 2162698 /* regdef:FPR64 */, def %1:fpr64 + INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0:gpr32common, 2621450 /* regdef:FPR64 */, def %1:fpr64 %3:_(s32) = COPY %0 %4:_(s64) = COPY %1 $d0 = COPY %4(s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir index eb539aacc4bf4..b0620a8f81dce 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-insert-vector-elt.mir @@ -24,14 +24,14 @@ body: | ; CHECK: liveins: $q1, $s0 ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 ; CHECK: [[COPY1:%[0-9]+]]:fpr(<4 x s32>) = COPY $q1 - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s32) + ; CHECK: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1 + ; CHECK: [[IVEC:%[0-9]+]]:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s64) ; CHECK: $q0 = COPY [[IVEC]](<4 x s32>) ; CHECK: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $s0 %1:_(<4 x s32>) = COPY $q1 - %3:_(s32) = G_CONSTANT i32 1 - %2:_(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 1 + %2:_(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s64) $q0 = COPY %2(<4 x s32>) RET_ReallyLR implicit $q0 @@ -47,16 +47,17 @@ body: | ; CHECK-LABEL: name: v4s32_gpr ; CHECK: liveins: $q0, $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s32) - ; CHECK: $q0 = COPY [[IVEC]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s64) + ; CHECK-NEXT: $q0 = COPY [[IVEC]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 %1:_(<4 x s32>) = COPY $q0 - %3:_(s32) = G_CONSTANT i32 1 - %2:_(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 1 + %2:_(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s64) $q0 = COPY %2(<4 x s32>) RET_ReallyLR implicit $q0 @@ -72,16 +73,17 @@ body: | ; CHECK-LABEL: name: v2s64_fpr ; CHECK: liveins: $d0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr(<2 x s64>) = COPY $q1 - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s64), [[C]](s32) - ; CHECK: $q0 = COPY [[IVEC]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s64), [[C]](s64) + ; CHECK-NEXT: $q0 = COPY [[IVEC]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $d0 %1:_(<2 x s64>) = COPY $q1 - %3:_(s32) = G_CONSTANT i32 1 - %2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s32) + %3:_(s64) = G_CONSTANT i64 1 + %2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s64) $q0 = COPY %2(<2 x s64>) RET_ReallyLR implicit $q0 @@ -97,16 +99,17 @@ body: | ; CHECK-LABEL: name: v2s64_gpr ; CHECK: liveins: $q0, $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0 - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s64), [[C]](s32) - ; CHECK: $q0 = COPY [[IVEC]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s64), [[C]](s64) + ; CHECK-NEXT: $q0 = COPY [[IVEC]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $x0 %1:_(<2 x s64>) = COPY $q0 - %3:_(s32) = G_CONSTANT i32 0 - %2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s32) + %3:_(s64) = G_CONSTANT i64 0 + %2:_(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s64) $q0 = COPY %2(<2 x s64>) RET_ReallyLR implicit $q0 @@ -122,16 +125,17 @@ body: | ; CHECK-LABEL: name: v2s32_fpr ; CHECK: liveins: $d1, $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr(<2 x s32>) = COPY $d1 - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s32) - ; CHECK: $d0 = COPY [[IVEC]](<2 x s32>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s64) + ; CHECK-NEXT: $d0 = COPY [[IVEC]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $s0 %1:_(<2 x s32>) = COPY $d1 - %3:_(s32) = G_CONSTANT i32 1 - %2:_(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 1 + %2:_(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s64) $d0 = COPY %2(<2 x s32>) RET_ReallyLR implicit $d0 @@ -147,16 +151,17 @@ body: | ; CHECK-LABEL: name: v2s32_gpr ; CHECK: liveins: $d0, $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:fpr(<2 x s32>) = COPY $d0 - ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 - ; CHECK: [[IVEC:%[0-9]+]]:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s32) - ; CHECK: $d0 = COPY [[IVEC]](<2 x s32>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY1]], [[COPY]](s32), [[C]](s64) + ; CHECK-NEXT: $d0 = COPY [[IVEC]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $w0 %1:_(<2 x s32>) = COPY $d0 - %3:_(s32) = G_CONSTANT i32 1 - %2:_(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + %3:_(s64) = G_CONSTANT i64 1 + %2:_(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s64) $d0 = COPY %2(<2 x s32>) RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir index d6618d440f42a..33f7e58804f14 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-insert-vector-elt.mir @@ -21,8 +21,9 @@ body: | %0:gpr(s32) = COPY $w0 %trunc:gpr(s8) = G_TRUNC %0 %1:fpr(<16 x s8>) = COPY $q1 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<16 x s8>) = G_INSERT_VECTOR_ELT %1, %trunc:gpr(s8), %3:gpr(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %4:gpr(s32) = G_ANYEXT %trunc + %2:fpr(<16 x s8>) = G_INSERT_VECTOR_ELT %1, %4:gpr(s32), %3:gpr(s64) $q0 = COPY %2(<16 x s8>) RET_ReallyLR implicit $q0 @@ -51,8 +52,9 @@ body: | %0:gpr(s32) = COPY $w0 %trunc:gpr(s8) = G_TRUNC %0 %1:fpr(<8 x s8>) = COPY $d0 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<8 x s8>) = G_INSERT_VECTOR_ELT %1, %trunc(s8), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %4:gpr(s32) = G_ANYEXT %trunc + %2:fpr(<8 x s8>) = G_INSERT_VECTOR_ELT %1, %4(s32), %3(s64) $d0 = COPY %2(<8 x s8>) RET_ReallyLR implicit $d0 @@ -78,8 +80,9 @@ body: | %0:gpr(s32) = COPY $w0 %trunc:gpr(s16) = G_TRUNC %0 %1:fpr(<8 x s16>) = COPY $q1 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<8 x s16>) = G_INSERT_VECTOR_ELT %1, %trunc:gpr(s16), %3:gpr(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %4:gpr(s32) = G_ANYEXT %trunc + %2:fpr(<8 x s16>) = G_INSERT_VECTOR_ELT %1, %4:gpr(s32), %3:gpr(s64) $q0 = COPY %2(<8 x s16>) RET_ReallyLR implicit $q0 @@ -106,8 +109,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:fpr(s16) = COPY $h0 %1:fpr(<8 x s16>) = COPY $q1 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<8 x s16>) = G_INSERT_VECTOR_ELT %1, %0(s16), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(<8 x s16>) = G_INSERT_VECTOR_ELT %1, %0(s16), %3(s64) $q0 = COPY %2(<8 x s16>) RET_ReallyLR implicit $q0 @@ -134,8 +137,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:fpr(s32) = COPY $s0 %1:fpr(<4 x s32>) = COPY $q1 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s64) $q0 = COPY %2(<4 x s32>) RET_ReallyLR implicit $q0 @@ -160,8 +163,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:gpr(s32) = COPY $w0 %1:fpr(<4 x s32>) = COPY $q0 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(<4 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s64) $q0 = COPY %2(<4 x s32>) RET_ReallyLR implicit $q0 @@ -190,8 +193,9 @@ body: | %0:gpr(s32) = COPY $w0 %trunc:gpr(s16) = G_TRUNC %0 %1:fpr(<4 x s16>) = COPY $d0 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<4 x s16>) = G_INSERT_VECTOR_ELT %1, %trunc(s16), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %4:gpr(s32) = G_ANYEXT %trunc + %2:fpr(<4 x s16>) = G_INSERT_VECTOR_ELT %1, %4(s32), %3(s64) $d0 = COPY %2(<4 x s16>) RET_ReallyLR implicit $d0 @@ -218,8 +222,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:fpr(s64) = COPY $d0 %1:fpr(<2 x s64>) = COPY $q1 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s64) $q0 = COPY %2(<2 x s64>) RET_ReallyLR implicit $q0 @@ -244,8 +248,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:gpr(s64) = COPY $x0 %1:fpr(<2 x s64>) = COPY $q0 - %3:gpr(s32) = G_CONSTANT i32 0 - %2:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 0 + %2:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %1, %0(s64), %3(s64) $q0 = COPY %2(<2 x s64>) RET_ReallyLR implicit $q0 @@ -266,17 +270,17 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr32 = COPY $s0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.dsub + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY]], %subreg.ssub - ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[INSERT_SUBREG]], 1, [[INSERT_SUBREG1]], 0 + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.dsub + ; CHECK-NEXT: [[INSvi32lane:%[0-9]+]]:fpr128 = INSvi32lane [[INSERT_SUBREG1]], 1, [[INSERT_SUBREG]], 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi32lane]].dsub ; CHECK-NEXT: $d0 = COPY [[COPY2]] ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:fpr(s32) = COPY $s0 %1:fpr(<2 x s32>) = COPY $d1 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s64) $d0 = COPY %2(<2 x s32>) RET_ReallyLR implicit $d0 @@ -304,8 +308,8 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:gpr(s32) = COPY $w0 %1:fpr(<2 x s32>) = COPY $d0 - %3:gpr(s32) = G_CONSTANT i32 1 - %2:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s32) + %3:gpr(s64) = G_CONSTANT i64 1 + %2:fpr(<2 x s32>) = G_INSERT_VECTOR_ELT %1, %0(s32), %3(s64) $d0 = COPY %2(<2 x s32>) RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll index 5c006508d284f..3a17a95ed71da 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel-abort=2 < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for test_bit_sink_operand +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; BIT Bitwise Insert if True ; @@ -200,34 +198,63 @@ define <16 x i8> @test_bit_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { } define <4 x i32> @test_bit_sink_operand(<4 x i32> %src, <4 x i32> %dst, <4 x i32> %mask, i32 %scratch) { -; CHECK-LABEL: test_bit_sink_operand: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w9, wzr -; CHECK-NEXT: cinc w8, w0, lt -; CHECK-NEXT: asr w8, w8, #1 -; CHECK-NEXT: .LBB11_1: // %do.body -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: bit v1.16b, v0.16b, v2.16b -; CHECK-NEXT: add x10, sp, #16 -; CHECK-NEXT: mov x11, sp -; CHECK-NEXT: bfi x10, x9, #2, #2 -; CHECK-NEXT: bfi x11, x9, #2, #2 -; CHECK-NEXT: add w9, w9, #1 -; CHECK-NEXT: cmp w9, #5 -; CHECK-NEXT: str q1, [sp, #16] -; CHECK-NEXT: str w0, [x10] -; CHECK-NEXT: ldr q1, [sp, #16] -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: str w8, [x11] -; CHECK-NEXT: ldr q0, [sp] -; CHECK-NEXT: b.ne .LBB11_1 -; CHECK-NEXT: // %bb.2: // %do.end -; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bit_sink_operand: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #32 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: cmp w0, #0 +; CHECK-SD-NEXT: mov w9, wzr +; CHECK-SD-NEXT: cinc w8, w0, lt +; CHECK-SD-NEXT: asr w8, w8, #1 +; CHECK-SD-NEXT: .LBB11_1: // %do.body +; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-SD-NEXT: bit v1.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: add x10, sp, #16 +; CHECK-SD-NEXT: mov x11, sp +; CHECK-SD-NEXT: bfi x10, x9, #2, #2 +; CHECK-SD-NEXT: bfi x11, x9, #2, #2 +; CHECK-SD-NEXT: add w9, w9, #1 +; CHECK-SD-NEXT: cmp w9, #5 +; CHECK-SD-NEXT: str q1, [sp, #16] +; CHECK-SD-NEXT: str w0, [x10] +; CHECK-SD-NEXT: ldr q1, [sp, #16] +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: str w8, [x11] +; CHECK-SD-NEXT: ldr q0, [sp] +; CHECK-SD-NEXT: b.ne .LBB11_1 +; CHECK-SD-NEXT: // %bb.2: // %do.end +; CHECK-SD-NEXT: mov v0.16b, v1.16b +; CHECK-SD-NEXT: add sp, sp, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bit_sink_operand: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #32 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 +; CHECK-GI-NEXT: asr w9, w0, #31 +; CHECK-GI-NEXT: mov w8, wzr +; CHECK-GI-NEXT: add x10, sp, #16 +; CHECK-GI-NEXT: mov x11, sp +; CHECK-GI-NEXT: add w9, w0, w9, lsr #31 +; CHECK-GI-NEXT: asr w9, w9, #1 +; CHECK-GI-NEXT: .LBB11_1: // %do.body +; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-GI-NEXT: bit v1.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: mov w12, w8 +; CHECK-GI-NEXT: add w8, w8, #1 +; CHECK-GI-NEXT: and x12, x12, #0x3 +; CHECK-GI-NEXT: cmp w8, #5 +; CHECK-GI-NEXT: str q1, [sp, #16] +; CHECK-GI-NEXT: str w0, [x10, x12, lsl #2] +; CHECK-GI-NEXT: ldr q1, [sp, #16] +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: str w9, [x11, x12, lsl #2] +; CHECK-GI-NEXT: ldr q0, [sp] +; CHECK-GI-NEXT: b.ne .LBB11_1 +; CHECK-GI-NEXT: // %bb.2: // %do.end +; CHECK-GI-NEXT: mov v0.16b, v1.16b +; CHECK-GI-NEXT: add sp, sp, #32 +; CHECK-GI-NEXT: ret entry: %0 = xor <4 x i32> %mask, diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll index 59cd87f58ab08..022aaea9ef0cc 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK0 -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-fast | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-slow-14 | FileCheck %s --check-prefixes=CHECK,CHECK0 +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK3 %struct.a = type [256 x i16] %struct.b = type [256 x i32] @@ -49,36 +49,20 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind { } define i32 @word(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: word: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: lsl x21, x8, #2 -; CHECK0-NEXT: ldr w20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov w0, w20 -; CHECK0-NEXT: str w20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: word: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldr w20, [x0, x21, lsl #2] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov w0, w20 -; CHECK3-NEXT: str w20, [x19, x21, lsl #2] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: word: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldr w20, [x0, x21, lsl #2] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: str w20, [x19, x21, lsl #2] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -90,36 +74,20 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind { } define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: doubleword: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: lsl x21, x8, #3 -; CHECK0-NEXT: ldr x20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov x0, x20 -; CHECK0-NEXT: str x20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: doubleword: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldr x20, [x0, x21, lsl #3] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov x0, x20 -; CHECK3-NEXT: str x20, [x19, x21, lsl #3] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: doubleword: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldr x20, [x0, x21, lsl #3] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: str x20, [x19, x21, lsl #3] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -163,20 +131,12 @@ endbb: } define i64 @gep3(ptr %p, i64 %b) { -; CHECK0-LABEL: gep3: -; CHECK0: // %bb.0: -; CHECK0-NEXT: lsl x9, x1, #3 -; CHECK0-NEXT: mov x8, x0 -; CHECK0-NEXT: ldr x0, [x0, x9] -; CHECK0-NEXT: str x1, [x8, x9] -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: gep3: -; CHECK3: // %bb.0: -; CHECK3-NEXT: mov x8, x0 -; CHECK3-NEXT: ldr x0, [x0, x1, lsl #3] -; CHECK3-NEXT: str x1, [x8, x1, lsl #3] -; CHECK3-NEXT: ret +; CHECK-LABEL: gep3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr x0, [x0, x1, lsl #3] +; CHECK-NEXT: str x1, [x8, x1, lsl #3] +; CHECK-NEXT: ret %g = getelementptr inbounds i64, ptr %p, i64 %b %l = load i64, ptr %g store i64 %b, ptr %g diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll index 017f382774892..f36b8440fe4bf 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll @@ -12,7 +12,6 @@ define <8 x i16> @haddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -27,7 +26,6 @@ define <8 x i16> @rhaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -42,7 +40,6 @@ define <8 x i16> @hadds_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> @@ -57,7 +54,6 @@ define <8 x i16> @shaddu_zext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = zext <8 x i8> %a0 to <8 x i16> %x1 = zext <8 x i8> %a1 to <8 x i16> diff --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll index 76790d128d066..f7aa57a068a4c 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll @@ -1,14 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" -; CHECK-GI: warning: Instruction selection used fallback path for sminv_v3i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smaxv_v3i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uminv_v3i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umaxv_v3i64 - declare i8 @llvm.vector.reduce.smin.v2i8(<2 x i8>) declare i8 @llvm.vector.reduce.smin.v3i8(<3 x i8>) declare i8 @llvm.vector.reduce.smin.v4i8(<4 x i8>) @@ -713,21 +708,39 @@ entry: } define i64 @sminv_v3i64(<3 x i64> %a) { -; CHECK-LABEL: sminv_v3i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: mov v2.d[1], x8 -; CHECK-NEXT: cmgt v1.2d, v2.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v2.16b, v1.16b -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: cmgt d2, d1, d0 -; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sminv_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v2.d[1], x8 +; CHECK-SD-NEXT: cmgt v1.2d, v2.2d, v0.2d +; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: cmgt d2, d1, d0 +; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sminv_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v2.d[1], x8 +; CHECK-GI-NEXT: cmgt v1.2d, v2.2d, v0.2d +; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: cmp x8, x9 +; CHECK-GI-NEXT: fcsel d0, d0, d1, lt +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.smin.v3i64(<3 x i64> %a) ret i64 %arg1 @@ -1056,21 +1069,39 @@ entry: } define i64 @smaxv_v3i64(<3 x i64> %a) { -; CHECK-LABEL: smaxv_v3i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: mov v2.d[1], x8 -; CHECK-NEXT: cmgt v1.2d, v0.2d, v2.2d -; CHECK-NEXT: bif v0.16b, v2.16b, v1.16b -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: cmgt d2, d0, d1 -; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: smaxv_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v2.d[1], x8 +; CHECK-SD-NEXT: cmgt v1.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: cmgt d2, d0, d1 +; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: smaxv_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v2.d[1], x8 +; CHECK-GI-NEXT: cmgt v1.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: cmp x8, x9 +; CHECK-GI-NEXT: fcsel d0, d0, d1, gt +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.smax.v3i64(<3 x i64> %a) ret i64 %arg1 @@ -1397,21 +1428,39 @@ entry: } define i64 @uminv_v3i64(<3 x i64> %a) { -; CHECK-LABEL: uminv_v3i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: mov v2.d[1], x8 -; CHECK-NEXT: cmhi v1.2d, v2.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v2.16b, v1.16b -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: cmhi d2, d1, d0 -; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uminv_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v2.d[1], x8 +; CHECK-SD-NEXT: cmhi v1.2d, v2.2d, v0.2d +; CHECK-SD-NEXT: bif v0.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: cmhi d2, d1, d0 +; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uminv_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v2.d[1], x8 +; CHECK-GI-NEXT: cmhi v1.2d, v2.2d, v0.2d +; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: cmp x8, x9 +; CHECK-GI-NEXT: fcsel d0, d0, d1, lo +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.umin.v3i64(<3 x i64> %a) ret i64 %arg1 @@ -1736,22 +1785,39 @@ entry: } define i64 @umaxv_v3i64(<3 x i64> %a) { -; CHECK-LABEL: umaxv_v3i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: mov v3.d[1], xzr -; CHECK-NEXT: cmhi v3.2d, v0.2d, v3.2d -; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: bif v0.16b, v2.16b, v3.16b -; CHECK-NEXT: and v1.8b, v1.8b, v4.8b -; CHECK-NEXT: cmhi d2, d0, d1 -; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: umaxv_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v3.16b, v2.16b +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: mov v3.d[1], xzr +; CHECK-SD-NEXT: cmhi v3.2d, v0.2d, v3.2d +; CHECK-SD-NEXT: ext v4.16b, v3.16b, v3.16b, #8 +; CHECK-SD-NEXT: bif v0.16b, v2.16b, v3.16b +; CHECK-SD-NEXT: and v1.8b, v1.8b, v4.8b +; CHECK-SD-NEXT: cmhi d2, d0, d1 +; CHECK-SD-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: fmov x0, d0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: umaxv_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: mov v2.d[1], xzr +; CHECK-GI-NEXT: cmhi v1.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: bif v0.16b, v2.16b, v1.16b +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: fmov x9, d1 +; CHECK-GI-NEXT: cmp x8, x9 +; CHECK-GI-NEXT: fcsel d0, d0, d1, hi +; CHECK-GI-NEXT: fmov x0, d0 +; CHECK-GI-NEXT: ret entry: %arg1 = call i64 @llvm.vector.reduce.umax.v3i64(<3 x i64> %a) ret i64 %arg1 diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll index 573f921e638cf..e31c9a072dc4b 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -134,9 +134,8 @@ define void @test8(i64 %a, ptr noalias %src, ptr noalias %dst, i64 %n) { ; CHECK-NEXT: b.hs .LBB7_1 ; CHECK-NEXT: // %bb.3: // %if.then ; CHECK-NEXT: // in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: lsl x10, x8, #3 -; CHECK-NEXT: ldr x11, [x1, x10] -; CHECK-NEXT: str x11, [x2, x10] +; CHECK-NEXT: ldr x10, [x1, x8, lsl #3] +; CHECK-NEXT: str x10, [x2, x8, lsl #3] ; CHECK-NEXT: b .LBB7_1 ; CHECK-NEXT: .LBB7_4: // %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll index d593272be1aa2..6bcd2f04849b2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll @@ -125,7 +125,7 @@ return: ; preds = %if.end23, %if.then3 } ; CHECK: @test -; CHECK-NOT: , uxtw #2] +; CHECK: , uxtw #2] define i32 @test(ptr %array, i8 zeroext %c, i32 %arg) { entry: %conv = zext i8 %c to i32 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index d282bee81827f..749d6071c98d7 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -4,9 +4,6 @@ ; CHECK-GI: warning: Instruction selection used fallback path for test_bitcastv2f32tov1f64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_bitcastv1f64tov2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_extracts_inserts_varidx_insert -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_concat_v1i32_undef -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_concat_diff_v1i32_v1i32 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { ; CHECK-LABEL: ins16bw: @@ -96,36 +93,22 @@ define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { } define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { -; CHECK-SD-LABEL: ins8h8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.h[7], v0.h[2] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins8h8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.h[7], v2.h[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins8h8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v1.h[7], v0.h[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 ret <8 x i16> %tmp4 } define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { -; CHECK-SD-LABEL: ins4s4: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.s[1], v0.s[2] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins4s4: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins4s4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v1.s[1], v0.s[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 ret <4 x i32> %tmp4 @@ -143,18 +126,11 @@ define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { } define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { -; CHECK-SD-LABEL: ins4f4: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.s[1], v0.s[2] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins4f4: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins4f4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov v1.s[1], v0.s[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <4 x float> %tmp1, i32 2 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 ret <4 x float> %tmp4 @@ -192,40 +168,24 @@ define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { } define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { -; CHECK-SD-LABEL: ins4h8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v1.h[7], v0.h[2] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins4h8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.h[7], v2.h[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins4h8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v1.h[7], v0.h[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 ret <8 x i16> %tmp4 } define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { -; CHECK-SD-LABEL: ins2s4: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v1.s[1], v0.s[1] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins2s4: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins2s4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v1.s[1], v0.s[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <2 x i32> %tmp1, i32 1 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 ret <4 x i32> %tmp4 @@ -244,20 +204,12 @@ define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { } define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { -; CHECK-SD-LABEL: ins2f4: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v1.s[1], v0.s[1] -; CHECK-SD-NEXT: mov v0.16b, v1.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins2f4: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov v0.16b, v1.16b -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins2f4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v1.s[1], v0.s[1] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ret %tmp3 = extractelement <2 x float> %tmp1, i32 1 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 ret <4 x float> %tmp4 @@ -307,40 +259,24 @@ define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { } define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { -; CHECK-SD-LABEL: ins8h4: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v1.h[3], v0.h[2] -; CHECK-SD-NEXT: fmov d0, d1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins8h4: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: mov v0.h[3], v2.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins8h4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v1.h[3], v0.h[2] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %tmp3 = extractelement <8 x i16> %tmp1, i32 2 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 ret <4 x i16> %tmp4 } define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { -; CHECK-SD-LABEL: ins4s2: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v1.s[1], v0.s[2] -; CHECK-SD-NEXT: fmov d0, d1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins4s2: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins4s2: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v1.s[1], v0.s[2] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %tmp3 = extractelement <4 x i32> %tmp1, i32 2 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 ret <2 x i32> %tmp4 @@ -357,20 +293,12 @@ define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { } define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { -; CHECK-SD-LABEL: ins4f2: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: mov v1.s[1], v0.s[2] -; CHECK-SD-NEXT: fmov d0, d1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins4f2: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov s2, v0.s[2] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins4f2: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v1.s[1], v0.s[2] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %tmp3 = extractelement <4 x float> %tmp1, i32 2 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 ret <2 x float> %tmp4 @@ -415,22 +343,13 @@ define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { } define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { -; CHECK-SD-LABEL: ins4h4: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: mov v1.h[3], v0.h[2] -; CHECK-SD-NEXT: fmov d0, d1 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ins4h4: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: mov h2, v0.h[2] -; CHECK-GI-NEXT: fmov d0, d1 -; CHECK-GI-NEXT: mov v0.h[3], v2.h[0] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ins4h4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v1.h[3], v0.h[2] +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %tmp3 = extractelement <4 x i16> %tmp1, i32 2 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 ret <4 x i16> %tmp4 @@ -1516,21 +1435,38 @@ define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) { } define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) { -; CHECK-LABEL: test_extracts_inserts_varidx_insert: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: bfi x8, x0, #1, #2 -; CHECK-NEXT: str h0, [x8] -; CHECK-NEXT: ldr d1, [sp, #8] -; CHECK-NEXT: mov v1.h[1], v0.h[1] -; CHECK-NEXT: mov v1.h[2], v0.h[2] -; CHECK-NEXT: mov v1.h[3], v0.h[3] -; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_extracts_inserts_varidx_insert: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add x8, sp, #8 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: bfi x8, x0, #1, #2 +; CHECK-SD-NEXT: str h0, [x8] +; CHECK-SD-NEXT: ldr d1, [sp, #8] +; CHECK-SD-NEXT: mov v1.h[1], v0.h[1] +; CHECK-SD-NEXT: mov v1.h[2], v0.h[2] +; CHECK-SD-NEXT: mov v1.h[3], v0.h[3] +; CHECK-SD-NEXT: fmov d0, d1 +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_extracts_inserts_varidx_insert: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w8, w0 +; CHECK-GI-NEXT: add x9, sp, #8 +; CHECK-GI-NEXT: str d0, [sp, #8] +; CHECK-GI-NEXT: and x8, x8, #0x3 +; CHECK-GI-NEXT: str h0, [x9, x8, lsl #1] +; CHECK-GI-NEXT: ldr d1, [sp, #8] +; CHECK-GI-NEXT: mov v1.h[1], v0.h[1] +; CHECK-GI-NEXT: mov v1.h[2], v0.h[2] +; CHECK-GI-NEXT: mov v1.h[3], v0.h[3] +; CHECK-GI-NEXT: fmov d0, d1 +; CHECK-GI-NEXT: add sp, sp, #16 +; CHECK-GI-NEXT: ret %tmp = extractelement <8 x i16> %x, i32 0 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx %tmp3 = extractelement <8 x i16> %x, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll b/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll index c598306c2de30..3df2ef7aa59fc 100644 --- a/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll +++ b/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll @@ -34,7 +34,6 @@ ; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x26 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVE,CHECK-RESERVE-X26 ; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x27 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVE,CHECK-RESERVE-X27 ; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x28 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVE,CHECK-RESERVE-X28 -; RUN: llc -mtriple=arm64-linux-gnu -mattr=+reserve-x30 -o - %s | FileCheck %s --check-prefixes=CHECK-RESERVE,CHECK-RESERVE-X30 ; Test multiple of reserve-x# options together. ; RUN: llc -mtriple=arm64-linux-gnu \ @@ -73,7 +72,6 @@ ; RUN: -mattr=+reserve-x26 \ ; RUN: -mattr=+reserve-x27 \ ; RUN: -mattr=+reserve-x28 \ -; RUN: -mattr=+reserve-x30 \ ; RUN: -reserve-regs-for-regalloc=X8,X16,X17,X19 \ ; RUN: -o - %s | FileCheck %s \ ; RUN: --check-prefix=CHECK-RESERVE \ @@ -104,8 +102,7 @@ ; RUN: --check-prefix=CHECK-RESERVE-X25 \ ; RUN: --check-prefix=CHECK-RESERVE-X26 \ ; RUN: --check-prefix=CHECK-RESERVE-X27 \ -; RUN: --check-prefix=CHECK-RESERVE-X28 \ -; RUN: --check-prefix=CHECK-RESERVE-X30 +; RUN: --check-prefix=CHECK-RESERVE-X28 ; x18 is reserved as a platform register on Darwin but not on other ; systems. Create loads of register pressure and make sure this is respected. @@ -152,7 +149,6 @@ define void @keep_live() { ; CHECK-RESERVE-X26-NOT: ldr x26 ; CHECK-RESERVE-X27-NOT: ldr x27 ; CHECK-RESERVE-X28-NOT: ldr x28 -; CHECK-RESERVE-X30-NOT: ldr x30 ; CHECK-RESERVE: Spill ; CHECK-RESERVE-NOT: ldr fp ; CHECK-RESERVE-X1-NOT: ldr x1, @@ -182,7 +178,6 @@ define void @keep_live() { ; CHECK-RESERVE-X26-NOT: ldr x26 ; CHECK-RESERVE-X27-NOT: ldr x27 ; CHECK-RESERVE-X28-NOT: ldr x28 -; CHECK-RESERVE-X30-NOT: ldr x30 ; CHECK-RESERVE: ret ret void } diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll index 3542b26b53539..5b055a4eb37a4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll @@ -201,11 +201,10 @@ define void @fct1_64x1(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_64x1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray64x1 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 %offset @@ -238,11 +237,10 @@ define void @fct1_32x2(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_32x2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray32x2 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 %offset @@ -275,11 +273,10 @@ define void @fct1_16x4(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_16x4: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray16x4 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 %offset @@ -312,11 +309,10 @@ define void @fct1_8x8(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_8x8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray8x8 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x8] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <8 x i8>, ptr %array, i64 %offset diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll new file mode 100644 index 0000000000000..a7539ac3cce80 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd-fp-vector.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefixes=CHECK,NOLSE %s +; RUN: llc -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck -check-prefixes=CHECK,LSE %s + +define <2 x half> @test_atomicrmw_fadd_v2f16_align4(ptr addrspace(1) %ptr, <2 x half> %value) #0 { +; NOLSE-LABEL: test_atomicrmw_fadd_v2f16_align4: +; NOLSE: // %bb.0: +; NOLSE-NEXT: fcvtl v1.4s, v0.4h +; NOLSE-NEXT: ldr s0, [x0] +; NOLSE-NEXT: b .LBB0_2 +; NOLSE-NEXT: .LBB0_1: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1 +; NOLSE-NEXT: fmov s0, w10 +; NOLSE-NEXT: cmp w10, w9 +; NOLSE-NEXT: b.eq .LBB0_5 +; NOLSE-NEXT: .LBB0_2: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB0_3 Depth 2 +; NOLSE-NEXT: fcvtl v2.4s, v0.4h +; NOLSE-NEXT: fmov w9, s0 +; NOLSE-NEXT: fadd v2.4s, v2.4s, v1.4s +; NOLSE-NEXT: fcvtn v2.4h, v2.4s +; NOLSE-NEXT: fmov w8, s2 +; NOLSE-NEXT: .LBB0_3: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB0_2 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr w10, [x0] +; NOLSE-NEXT: cmp w10, w9 +; NOLSE-NEXT: b.ne .LBB0_1 +; NOLSE-NEXT: // %bb.4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2 +; NOLSE-NEXT: stlxr wzr, w8, [x0] +; NOLSE-NEXT: cbnz wzr, .LBB0_3 +; NOLSE-NEXT: b .LBB0_1 +; NOLSE-NEXT: .LBB0_5: // %atomicrmw.end +; NOLSE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_atomicrmw_fadd_v2f16_align4: +; LSE: // %bb.0: +; LSE-NEXT: fcvtl v1.4s, v0.4h +; LSE-NEXT: ldr s0, [x0] +; LSE-NEXT: .LBB0_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: fcvtl v2.4s, v0.4h +; LSE-NEXT: fmov w8, s0 +; LSE-NEXT: mov w10, w8 +; LSE-NEXT: fadd v2.4s, v2.4s, v1.4s +; LSE-NEXT: fcvtn v2.4h, v2.4s +; LSE-NEXT: fmov w9, s2 +; LSE-NEXT: casal w10, w9, [x0] +; LSE-NEXT: fmov s0, w10 +; LSE-NEXT: cmp w10, w8 +; LSE-NEXT: b.ne .LBB0_1 +; LSE-NEXT: // %bb.2: // %atomicrmw.end +; LSE-NEXT: // kill: def $d0 killed $d0 killed $q0 +; LSE-NEXT: ret + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value seq_cst, align 4 + ret <2 x half> %res +} + +define <2 x float> @test_atomicrmw_fadd_v2f32_align8(ptr addrspace(1) %ptr, <2 x float> %value) #0 { +; NOLSE-LABEL: test_atomicrmw_fadd_v2f32_align8: +; NOLSE: // %bb.0: +; NOLSE-NEXT: ldr d1, [x0] +; NOLSE-NEXT: b .LBB1_2 +; NOLSE-NEXT: .LBB1_1: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1 +; NOLSE-NEXT: fmov d1, x10 +; NOLSE-NEXT: cmp x10, x9 +; NOLSE-NEXT: b.eq .LBB1_5 +; NOLSE-NEXT: .LBB1_2: // %atomicrmw.start +; NOLSE-NEXT: // =>This Loop Header: Depth=1 +; NOLSE-NEXT: // Child Loop BB1_3 Depth 2 +; NOLSE-NEXT: fadd v2.2s, v1.2s, v0.2s +; NOLSE-NEXT: fmov x9, d1 +; NOLSE-NEXT: fmov x8, d2 +; NOLSE-NEXT: .LBB1_3: // %atomicrmw.start +; NOLSE-NEXT: // Parent Loop BB1_2 Depth=1 +; NOLSE-NEXT: // => This Inner Loop Header: Depth=2 +; NOLSE-NEXT: ldaxr x10, [x0] +; NOLSE-NEXT: cmp x10, x9 +; NOLSE-NEXT: b.ne .LBB1_1 +; NOLSE-NEXT: // %bb.4: // %atomicrmw.start +; NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2 +; NOLSE-NEXT: stlxr wzr, x8, [x0] +; NOLSE-NEXT: cbnz wzr, .LBB1_3 +; NOLSE-NEXT: b .LBB1_1 +; NOLSE-NEXT: .LBB1_5: // %atomicrmw.end +; NOLSE-NEXT: fmov d0, d1 +; NOLSE-NEXT: ret +; +; LSE-LABEL: test_atomicrmw_fadd_v2f32_align8: +; LSE: // %bb.0: +; LSE-NEXT: ldr d1, [x0] +; LSE-NEXT: .LBB1_1: // %atomicrmw.start +; LSE-NEXT: // =>This Inner Loop Header: Depth=1 +; LSE-NEXT: fadd v2.2s, v1.2s, v0.2s +; LSE-NEXT: fmov x8, d1 +; LSE-NEXT: mov x10, x8 +; LSE-NEXT: fmov x9, d2 +; LSE-NEXT: casal x10, x9, [x0] +; LSE-NEXT: fmov d1, x10 +; LSE-NEXT: cmp x10, x8 +; LSE-NEXT: b.ne .LBB1_1 +; LSE-NEXT: // %bb.2: // %atomicrmw.end +; LSE-NEXT: fmov d0, d1 +; LSE-NEXT: ret + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value seq_cst, align 8 + ret <2 x float> %res +} + +attributes #0 = { nounwind } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll b/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll index 8f195531927e0..634d1b90ff903 100644 --- a/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll +++ b/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll @@ -82,13 +82,12 @@ define void @avoid_promotion_2_and(ptr nocapture noundef %arg) { ; CHECK-NEXT: eor w10, w10, w11 ; CHECK-NEXT: ldur w11, [x8, #-24] ; CHECK-NEXT: and w10, w10, w14 -; CHECK-NEXT: ldp x15, x14, [x8, #-16] -; CHECK-NEXT: ubfiz x13, x10, #1, #32 +; CHECK-NEXT: ldp x14, x13, [x8, #-16] ; CHECK-NEXT: str w10, [x8] -; CHECK-NEXT: and w10, w11, w12 -; CHECK-NEXT: ldrh w11, [x14, x13] -; CHECK-NEXT: strh w11, [x15, w10, uxtw #1] -; CHECK-NEXT: strh w12, [x14, x13] +; CHECK-NEXT: and w11, w11, w12 +; CHECK-NEXT: ldrh w15, [x13, w10, uxtw #1] +; CHECK-NEXT: strh w15, [x14, w11, uxtw #1] +; CHECK-NEXT: strh w12, [x13, w10, uxtw #1] ; CHECK-NEXT: b LBB1_1 ; CHECK-NEXT: LBB1_4: ; %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll index b5c21044266c4..50c70c5676c4a 100644 --- a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll +++ b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll @@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux" define void @f0(ptr %a, i64 %n) { ; CHECK-LABEL: f0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 @@ -15,7 +15,6 @@ define void @f0(ptr %a, i64 %n) { ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w23, -40 ; CHECK-NEXT: .cfi_offset w30, -48 ; CHECK-NEXT: mov x21, #1 // =0x1 ; CHECK-NEXT: mov x19, x1 @@ -27,18 +26,17 @@ define void @f0(ptr %a, i64 %n) { ; CHECK-NEXT: b.ge .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %loop.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lsl x23, x22, #2 +; CHECK-NEXT: ldr w0, [x20, x22, lsl #2] ; CHECK-NEXT: mov x1, x21 -; CHECK-NEXT: ldr w0, [x20, x23] ; CHECK-NEXT: bl g -; CHECK-NEXT: str w0, [x20, x23] +; CHECK-NEXT: str w0, [x20, x22, lsl #2] ; CHECK-NEXT: add x22, x22, #1 ; CHECK-NEXT: cmp x22, x19 ; CHECK-NEXT: b.lt .LBB0_1 ; CHECK-NEXT: .LBB0_2: // %exit ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: br label %loop @@ -64,15 +62,13 @@ exit: define void @f1(ptr %a, i64 %n) { ; CHECK-LABEL: f1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: mov x20, x0 ; CHECK-NEXT: mov x21, xzr @@ -80,19 +76,17 @@ define void @f1(ptr %a, i64 %n) { ; CHECK-NEXT: b.ge .LBB1_2 ; CHECK-NEXT: .LBB1_1: // %loop.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lsl x22, x21, #2 +; CHECK-NEXT: ldr w0, [x20, x21, lsl #2] ; CHECK-NEXT: mov x1, #1450704896 // =0x56780000 ; CHECK-NEXT: movk x1, #4660, lsl #48 -; CHECK-NEXT: ldr w0, [x20, x22] ; CHECK-NEXT: bl g -; CHECK-NEXT: str w0, [x20, x22] +; CHECK-NEXT: str w0, [x20, x21, lsl #2] ; CHECK-NEXT: add x21, x21, #1 ; CHECK-NEXT: cmp x21, x19 ; CHECK-NEXT: b.lt .LBB1_1 ; CHECK-NEXT: .LBB1_2: // %exit -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: br label %loop diff --git a/llvm/test/CodeGen/AArch64/elf-globals-static.ll b/llvm/test/CodeGen/AArch64/elf-globals-static.ll index 86b7c401b9a2e..855312f8927c7 100644 --- a/llvm/test/CodeGen/AArch64/elf-globals-static.ll +++ b/llvm/test/CodeGen/AArch64/elf-globals-static.ll @@ -15,11 +15,6 @@ define i8 @test_i8(i8 %new) { ; CHECK: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] ; CHECK: strb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] -; CHECK-PIC-LABEL: test_i8: -; CHECK-PIC: adrp x[[HIREG:[0-9]+]], :got:var8 -; CHECK-PIC: ldr x[[VAR_ADDR:[0-9]+]], [x[[HIREG]], :got_lo12:var8] -; CHECK-PIC: ldrb {{w[0-9]+}}, [x[[VAR_ADDR]]] - ; CHECK-FAST-LABEL: test_i8: ; CHECK-FAST: adrp x[[HIREG:[0-9]+]], var8 ; CHECK-FAST: ldrb {{w[0-9]+}}, [x[[HIREG]], :lo12:var8] diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir index 92fb053b0db72..2be7aba2a3df8 100644 --- a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir +++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir @@ -91,10 +91,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2621450 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2 ; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2621450 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) ; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2 ; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv @@ -111,10 +111,10 @@ body: | %6:gpr64common = LOADgot target-flags(aarch64-got) @c %3:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c) - INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3) + INLINEASM &"", 1 /* sideeffect attdialect */, 2621450 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3) %0:fpr64 = COPY %2 %5:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c) - INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3) + INLINEASM &"", 1 /* sideeffect attdialect */, 2621450 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3) %7:fpr64 = FNEGDr %2 nofpexcept FCMPDrr %4, killed %7, implicit-def $nzcv, implicit $fpcr Bcc 1, %bb.2, implicit $nzcv diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll index d4ea143a3d847..b87157a183835 100644 --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -972,10 +972,9 @@ define void @pr38938(ptr %a0, ptr %a1) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x1] ; CHECK-NEXT: ubfx x8, x8, #21, #10 -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: ldr w9, [x0, x8] +; CHECK-NEXT: ldr w9, [x0, x8, lsl #2] ; CHECK-NEXT: add w9, w9, #1 -; CHECK-NEXT: str w9, [x0, x8] +; CHECK-NEXT: str w9, [x0, x8, lsl #2] ; CHECK-NEXT: ret %tmp = load i64, ptr %a1, align 8 %tmp1 = lshr i64 %tmp, 21 diff --git a/llvm/test/CodeGen/AArch64/func-sanitizer.ll b/llvm/test/CodeGen/AArch64/func-sanitizer.ll index 89f23e7ed80e8..de83d70a5784a 100644 --- a/llvm/test/CodeGen/AArch64/func-sanitizer.ll +++ b/llvm/test/CodeGen/AArch64/func-sanitizer.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=MACHO ; CHECK-LABEL: .type _Z3funv,@function ; CHECK-NEXT: .word 3238382334 // 0xc105cafe @@ -7,6 +8,14 @@ ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ret +; MACHO: ltmp0: +; MACHO-NEXT: .long 3238382334 ; 0xc105cafe +; MACHO-NEXT: .long 42 ; 0x2a +; MACHO-NEXT: .alt_entry __Z3funv +; MACHO-NEXT: __Z3funv: +; MACHO-NEXT: ; %bb.0: +; MACHO-NEXT: ret + define dso_local void @_Z3funv() nounwind !func_sanitize !0 { ret void } diff --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll index 5c2dd761bdc0d..c6b2d07231bf8 100644 --- a/llvm/test/CodeGen/AArch64/insertextract.ll +++ b/llvm/test/CodeGen/AArch64/insertextract.ll @@ -1,44 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for insert_v2f64_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v3f64_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4f64_0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4f64_2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4f64_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v2f32_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v3f32_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4f32_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8f32_0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8f32_2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8f32_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4f16_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8f16_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v16f16_0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v16f16_2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v16f16_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8i8_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v16i8_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v32i8_0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v32i8_2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v32i8_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i16_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8i16_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v16i16_0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v16i16_2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v16i16_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v2i32_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v3i32_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i32_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8i32_0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8i32_2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v8i32_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v2i64_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v3i64_c -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_0 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for insert_v4i64_c +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI define <2 x double> @insert_v2f64_0(<2 x double> %a, double %b, i32 %c) { ; CHECK-LABEL: insert_v2f64_0: @@ -63,17 +25,29 @@ entry: } define <2 x double> @insert_v2f64_c(<2 x double> %a, double %b, i32 %c) { -; CHECK-LABEL: insert_v2f64_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x8, x0, #3, #1 -; CHECK-NEXT: str d1, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v2f64_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfi x8, x0, #3, #1 +; CHECK-SD-NEXT: str d1, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v2f64_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w0 +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0x1 +; CHECK-GI-NEXT: str d1, [x8, x9, lsl #3] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <2 x double> %a, double %b, i32 %c ret <2 x double> %d @@ -111,25 +85,51 @@ entry: } define <3 x double> @insert_v3f64_c(<3 x double> %a, double %b, i32 %c) { -; CHECK-LABEL: insert_v3f64_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: stp q0, q2, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x0, #0x3 -; CHECK-NEXT: str d3, [x8, x9, lsl #3] -; CHECK-NEXT: ldr q0, [sp] -; CHECK-NEXT: ldr d2, [sp, #16] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v3f64_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: stp q0, q2, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: and x9, x0, #0x3 +; CHECK-SD-NEXT: str d3, [x8, x9, lsl #3] +; CHECK-SD-NEXT: ldr q0, [sp] +; CHECK-SD-NEXT: ldr d2, [sp, #16] +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: add sp, sp, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v3f64_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov w8, w0 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: and x8, x8, #0x3 +; CHECK-GI-NEXT: stp q0, q2, [sp] +; CHECK-GI-NEXT: str d3, [x9, x8, lsl #3] +; CHECK-GI-NEXT: ldp q0, q2, [sp] +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <3 x double> %a, double %b, i32 %c ret <3 x double> %d @@ -158,16 +158,35 @@ entry: } define <4 x double> @insert_v4f64_c(<4 x double> %a, double %b, i32 %c) { -; CHECK-LABEL: insert_v4f64_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: stp q0, q1, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: and x8, x0, #0x3 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str d2, [x9, x8, lsl #3] -; CHECK-NEXT: ldp q0, q1, [sp], #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v4f64_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: and x8, x0, #0x3 +; CHECK-SD-NEXT: mov x9, sp +; CHECK-SD-NEXT: str d2, [x9, x8, lsl #3] +; CHECK-SD-NEXT: ldp q0, q1, [sp], #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v4f64_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: mov w8, w0 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: stp q0, q1, [sp] +; CHECK-GI-NEXT: and x8, x8, #0x3 +; CHECK-GI-NEXT: str d2, [x9, x8, lsl #3] +; CHECK-GI-NEXT: ldp q0, q1, [sp] +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <4 x double> %a, double %b, i32 %c ret <4 x double> %d @@ -200,18 +219,31 @@ entry: } define <2 x float> @insert_v2f32_c(<2 x float> %a, float %b, i32 %c) { -; CHECK-LABEL: insert_v2f32_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: str d0, [sp, #8] -; CHECK-NEXT: bfi x8, x0, #2, #1 -; CHECK-NEXT: str s1, [x8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v2f32_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add x8, sp, #8 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: str d0, [sp, #8] +; CHECK-SD-NEXT: bfi x8, x0, #2, #1 +; CHECK-SD-NEXT: str s1, [x8] +; CHECK-SD-NEXT: ldr d0, [sp, #8] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v2f32_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w0 +; CHECK-GI-NEXT: add x8, sp, #8 +; CHECK-GI-NEXT: str d0, [sp, #8] +; CHECK-GI-NEXT: and x9, x9, #0x1 +; CHECK-GI-NEXT: str s1, [x8, x9, lsl #2] +; CHECK-GI-NEXT: ldr d0, [sp, #8] +; CHECK-GI-NEXT: add sp, sp, #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <2 x float> %a, float %b, i32 %c ret <2 x float> %d @@ -260,17 +292,29 @@ entry: } define <3 x float> @insert_v3f32_c(<3 x float> %a, float %b, i32 %c) { -; CHECK-LABEL: insert_v3f32_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x8, x0, #2, #2 -; CHECK-NEXT: str s1, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v3f32_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfi x8, x0, #2, #2 +; CHECK-SD-NEXT: str s1, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v3f32_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w0 +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0x3 +; CHECK-GI-NEXT: str s1, [x8, x9, lsl #2] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <3 x float> %a, float %b, i32 %c ret <3 x float> %d @@ -299,17 +343,29 @@ entry: } define <4 x float> @insert_v4f32_c(<4 x float> %a, float %b, i32 %c) { -; CHECK-LABEL: insert_v4f32_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x8, x0, #2, #2 -; CHECK-NEXT: str s1, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v4f32_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfi x8, x0, #2, #2 +; CHECK-SD-NEXT: str s1, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v4f32_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w0 +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0x3 +; CHECK-GI-NEXT: str s1, [x8, x9, lsl #2] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <4 x float> %a, float %b, i32 %c ret <4 x float> %d @@ -338,16 +394,35 @@ entry: } define <8 x float> @insert_v8f32_c(<8 x float> %a, float %b, i32 %c) { -; CHECK-LABEL: insert_v8f32_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: stp q0, q1, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: and x8, x0, #0x7 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str s2, [x9, x8, lsl #2] -; CHECK-NEXT: ldp q0, q1, [sp], #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v8f32_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: and x8, x0, #0x7 +; CHECK-SD-NEXT: mov x9, sp +; CHECK-SD-NEXT: str s2, [x9, x8, lsl #2] +; CHECK-SD-NEXT: ldp q0, q1, [sp], #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v8f32_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: mov w8, w0 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: stp q0, q1, [sp] +; CHECK-GI-NEXT: and x8, x8, #0x7 +; CHECK-GI-NEXT: str s2, [x9, x8, lsl #2] +; CHECK-GI-NEXT: ldp q0, q1, [sp] +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <8 x float> %a, float %b, i32 %c ret <8 x float> %d @@ -380,18 +455,31 @@ entry: } define <4 x half> @insert_v4f16_c(<4 x half> %a, half %b, i32 %c) { -; CHECK-LABEL: insert_v4f16_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: str d0, [sp, #8] -; CHECK-NEXT: bfi x8, x0, #1, #2 -; CHECK-NEXT: str h1, [x8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v4f16_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add x8, sp, #8 +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: str d0, [sp, #8] +; CHECK-SD-NEXT: bfi x8, x0, #1, #2 +; CHECK-SD-NEXT: str h1, [x8] +; CHECK-SD-NEXT: ldr d0, [sp, #8] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v4f16_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w0 +; CHECK-GI-NEXT: add x8, sp, #8 +; CHECK-GI-NEXT: str d0, [sp, #8] +; CHECK-GI-NEXT: and x9, x9, #0x3 +; CHECK-GI-NEXT: str h1, [x8, x9, lsl #1] +; CHECK-GI-NEXT: ldr d0, [sp, #8] +; CHECK-GI-NEXT: add sp, sp, #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <4 x half> %a, half %b, i32 %c ret <4 x half> %d @@ -420,17 +508,29 @@ entry: } define <8 x half> @insert_v8f16_c(<8 x half> %a, half %b, i32 %c) { -; CHECK-LABEL: insert_v8f16_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x8, x0, #1, #3 -; CHECK-NEXT: str h1, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v8f16_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfi x8, x0, #1, #3 +; CHECK-SD-NEXT: str h1, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v8f16_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w0 +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0x7 +; CHECK-GI-NEXT: str h1, [x8, x9, lsl #1] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <8 x half> %a, half %b, i32 %c ret <8 x half> %d @@ -459,16 +559,35 @@ entry: } define <16 x half> @insert_v16f16_c(<16 x half> %a, half %b, i32 %c) { -; CHECK-LABEL: insert_v16f16_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: stp q0, q1, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: and x8, x0, #0xf -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str h2, [x9, x8, lsl #1] -; CHECK-NEXT: ldp q0, q1, [sp], #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v16f16_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: and x8, x0, #0xf +; CHECK-SD-NEXT: mov x9, sp +; CHECK-SD-NEXT: str h2, [x9, x8, lsl #1] +; CHECK-SD-NEXT: ldp q0, q1, [sp], #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v16f16_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: mov w8, w0 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: stp q0, q1, [sp] +; CHECK-GI-NEXT: and x8, x8, #0xf +; CHECK-GI-NEXT: str h2, [x9, x8, lsl #1] +; CHECK-GI-NEXT: ldp q0, q1, [sp] +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <16 x half> %a, half %b, i32 %c ret <16 x half> %d @@ -499,18 +618,33 @@ entry: } define <8 x i8> @insert_v8i8_c(<8 x i8> %a, i8 %b, i32 %c) { -; CHECK-LABEL: insert_v8i8_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: str d0, [sp, #8] -; CHECK-NEXT: bfxil x8, x1, #0, #3 -; CHECK-NEXT: strb w0, [x8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v8i8_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add x8, sp, #8 +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: str d0, [sp, #8] +; CHECK-SD-NEXT: bfxil x8, x1, #0, #3 +; CHECK-SD-NEXT: strb w0, [x8] +; CHECK-SD-NEXT: ldr d0, [sp, #8] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v8i8_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w1 +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: str d0, [sp, #8] +; CHECK-GI-NEXT: and x9, x9, #0x7 +; CHECK-GI-NEXT: mul x8, x9, x8 +; CHECK-GI-NEXT: add x9, sp, #8 +; CHECK-GI-NEXT: strb w0, [x9, x8] +; CHECK-GI-NEXT: ldr d0, [sp, #8] +; CHECK-GI-NEXT: add sp, sp, #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <8 x i8> %a, i8 %b, i32 %c ret <8 x i8> %d @@ -537,17 +671,31 @@ entry: } define <16 x i8> @insert_v16i8_c(<16 x i8> %a, i8 %b, i32 %c) { -; CHECK-LABEL: insert_v16i8_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfxil x8, x1, #0, #4 -; CHECK-NEXT: strb w0, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v16i8_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfxil x8, x1, #0, #4 +; CHECK-SD-NEXT: strb w0, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v16i8_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w1 +; CHECK-GI-NEXT: mov w8, #1 // =0x1 +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0xf +; CHECK-GI-NEXT: mul x8, x9, x8 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: strb w0, [x9, x8] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <16 x i8> %a, i8 %b, i32 %c ret <16 x i8> %d @@ -574,16 +722,37 @@ entry: } define <32 x i8> @insert_v32i8_c(<32 x i8> %a, i8 %b, i32 %c) { -; CHECK-LABEL: insert_v32i8_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: stp q0, q1, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: and x8, x1, #0x1f -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: strb w0, [x9, x8] -; CHECK-NEXT: ldp q0, q1, [sp], #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v32i8_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: and x8, x1, #0x1f +; CHECK-SD-NEXT: mov x9, sp +; CHECK-SD-NEXT: strb w0, [x9, x8] +; CHECK-SD-NEXT: ldp q0, q1, [sp], #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v32i8_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: mov w8, w1 +; CHECK-GI-NEXT: mov x10, sp +; CHECK-GI-NEXT: stp q0, q1, [sp] +; CHECK-GI-NEXT: and x8, x8, #0x1f +; CHECK-GI-NEXT: lsl x9, x8, #1 +; CHECK-GI-NEXT: sub x8, x9, x8 +; CHECK-GI-NEXT: strb w0, [x10, x8] +; CHECK-GI-NEXT: ldp q0, q1, [sp] +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <32 x i8> %a, i8 %b, i32 %c ret <32 x i8> %d @@ -614,18 +783,31 @@ entry: } define <4 x i16> @insert_v4i16_c(<4 x i16> %a, i16 %b, i32 %c) { -; CHECK-LABEL: insert_v4i16_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: str d0, [sp, #8] -; CHECK-NEXT: bfi x8, x1, #1, #2 -; CHECK-NEXT: strh w0, [x8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v4i16_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add x8, sp, #8 +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: str d0, [sp, #8] +; CHECK-SD-NEXT: bfi x8, x1, #1, #2 +; CHECK-SD-NEXT: strh w0, [x8] +; CHECK-SD-NEXT: ldr d0, [sp, #8] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v4i16_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w1 +; CHECK-GI-NEXT: add x8, sp, #8 +; CHECK-GI-NEXT: str d0, [sp, #8] +; CHECK-GI-NEXT: and x9, x9, #0x3 +; CHECK-GI-NEXT: strh w0, [x8, x9, lsl #1] +; CHECK-GI-NEXT: ldr d0, [sp, #8] +; CHECK-GI-NEXT: add sp, sp, #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <4 x i16> %a, i16 %b, i32 %c ret <4 x i16> %d @@ -652,17 +834,29 @@ entry: } define <8 x i16> @insert_v8i16_c(<8 x i16> %a, i16 %b, i32 %c) { -; CHECK-LABEL: insert_v8i16_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x8, x1, #1, #3 -; CHECK-NEXT: strh w0, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v8i16_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfi x8, x1, #1, #3 +; CHECK-SD-NEXT: strh w0, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v8i16_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w1 +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0x7 +; CHECK-GI-NEXT: strh w0, [x8, x9, lsl #1] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <8 x i16> %a, i16 %b, i32 %c ret <8 x i16> %d @@ -689,16 +883,35 @@ entry: } define <16 x i16> @insert_v16i16_c(<16 x i16> %a, i16 %b, i32 %c) { -; CHECK-LABEL: insert_v16i16_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: stp q0, q1, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: and x8, x1, #0xf -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: strh w0, [x9, x8, lsl #1] -; CHECK-NEXT: ldp q0, q1, [sp], #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v16i16_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: and x8, x1, #0xf +; CHECK-SD-NEXT: mov x9, sp +; CHECK-SD-NEXT: strh w0, [x9, x8, lsl #1] +; CHECK-SD-NEXT: ldp q0, q1, [sp], #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v16i16_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: mov w8, w1 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: stp q0, q1, [sp] +; CHECK-GI-NEXT: and x8, x8, #0xf +; CHECK-GI-NEXT: strh w0, [x9, x8, lsl #1] +; CHECK-GI-NEXT: ldp q0, q1, [sp] +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <16 x i16> %a, i16 %b, i32 %c ret <16 x i16> %d @@ -729,18 +942,31 @@ entry: } define <2 x i32> @insert_v2i32_c(<2 x i32> %a, i32 %b, i32 %c) { -; CHECK-LABEL: insert_v2i32_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: add x8, sp, #8 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: str d0, [sp, #8] -; CHECK-NEXT: bfi x8, x1, #2, #1 -; CHECK-NEXT: str w0, [x8] -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v2i32_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: add x8, sp, #8 +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: str d0, [sp, #8] +; CHECK-SD-NEXT: bfi x8, x1, #2, #1 +; CHECK-SD-NEXT: str w0, [x8] +; CHECK-SD-NEXT: ldr d0, [sp, #8] +; CHECK-SD-NEXT: add sp, sp, #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v2i32_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w1 +; CHECK-GI-NEXT: add x8, sp, #8 +; CHECK-GI-NEXT: str d0, [sp, #8] +; CHECK-GI-NEXT: and x9, x9, #0x1 +; CHECK-GI-NEXT: str w0, [x8, x9, lsl #2] +; CHECK-GI-NEXT: ldr d0, [sp, #8] +; CHECK-GI-NEXT: add sp, sp, #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <2 x i32> %a, i32 %b, i32 %c ret <2 x i32> %d @@ -789,17 +1015,29 @@ entry: } define <3 x i32> @insert_v3i32_c(<3 x i32> %a, i32 %b, i32 %c) { -; CHECK-LABEL: insert_v3i32_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x8, x1, #2, #2 -; CHECK-NEXT: str w0, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v3i32_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfi x8, x1, #2, #2 +; CHECK-SD-NEXT: str w0, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v3i32_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w1 +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0x3 +; CHECK-GI-NEXT: str w0, [x8, x9, lsl #2] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <3 x i32> %a, i32 %b, i32 %c ret <3 x i32> %d @@ -826,17 +1064,29 @@ entry: } define <4 x i32> @insert_v4i32_c(<4 x i32> %a, i32 %b, i32 %c) { -; CHECK-LABEL: insert_v4i32_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x8, x1, #2, #2 -; CHECK-NEXT: str w0, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v4i32_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfi x8, x1, #2, #2 +; CHECK-SD-NEXT: str w0, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v4i32_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w1 +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0x3 +; CHECK-GI-NEXT: str w0, [x8, x9, lsl #2] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <4 x i32> %a, i32 %b, i32 %c ret <4 x i32> %d @@ -863,16 +1113,35 @@ entry: } define <8 x i32> @insert_v8i32_c(<8 x i32> %a, i32 %b, i32 %c) { -; CHECK-LABEL: insert_v8i32_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: stp q0, q1, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: and x8, x1, #0x7 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str w0, [x9, x8, lsl #2] -; CHECK-NEXT: ldp q0, q1, [sp], #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v8i32_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: and x8, x1, #0x7 +; CHECK-SD-NEXT: mov x9, sp +; CHECK-SD-NEXT: str w0, [x9, x8, lsl #2] +; CHECK-SD-NEXT: ldp q0, q1, [sp], #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v8i32_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: mov w8, w1 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: stp q0, q1, [sp] +; CHECK-GI-NEXT: and x8, x8, #0x7 +; CHECK-GI-NEXT: str w0, [x9, x8, lsl #2] +; CHECK-GI-NEXT: ldp q0, q1, [sp] +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <8 x i32> %a, i32 %b, i32 %c ret <8 x i32> %d @@ -899,17 +1168,29 @@ entry: } define <2 x i64> @insert_v2i64_c(<2 x i64> %a, i64 %b, i32 %c) { -; CHECK-LABEL: insert_v2i64_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: bfi x8, x1, #3, #1 -; CHECK-NEXT: str x0, [x8] -; CHECK-NEXT: ldr q0, [sp], #16 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v2i64_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sub sp, sp, #16 +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: str q0, [sp] +; CHECK-SD-NEXT: bfi x8, x1, #3, #1 +; CHECK-SD-NEXT: str x0, [x8] +; CHECK-SD-NEXT: ldr q0, [sp], #16 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v2i64_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub sp, sp, #16 +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: mov w9, w1 +; CHECK-GI-NEXT: mov x8, sp +; CHECK-GI-NEXT: str q0, [sp] +; CHECK-GI-NEXT: and x9, x9, #0x1 +; CHECK-GI-NEXT: str x0, [x8, x9, lsl #3] +; CHECK-GI-NEXT: ldr q0, [sp], #16 +; CHECK-GI-NEXT: ret entry: %d = insertelement <2 x i64> %a, i64 %b, i32 %c ret <2 x i64> %d @@ -946,25 +1227,51 @@ entry: } define <3 x i64> @insert_v3i64_c(<3 x i64> %a, i64 %b, i32 %c) { -; CHECK-LABEL: insert_v3i64_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: stp q0, q2, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: and x9, x1, #0x3 -; CHECK-NEXT: str x0, [x8, x9, lsl #3] -; CHECK-NEXT: ldr q0, [sp] -; CHECK-NEXT: ldr d2, [sp, #16] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v3i64_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: stp q0, q2, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: mov x8, sp +; CHECK-SD-NEXT: and x9, x1, #0x3 +; CHECK-SD-NEXT: str x0, [x8, x9, lsl #3] +; CHECK-SD-NEXT: ldr q0, [sp] +; CHECK-SD-NEXT: ldr d2, [sp, #16] +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: add sp, sp, #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v3i64_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov w8, w1 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: and x8, x8, #0x3 +; CHECK-GI-NEXT: stp q0, q2, [sp] +; CHECK-GI-NEXT: str x0, [x9, x8, lsl #3] +; CHECK-GI-NEXT: ldp q0, q2, [sp] +; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <3 x i64> %a, i64 %b, i32 %c ret <3 x i64> %d @@ -991,16 +1298,35 @@ entry: } define <4 x i64> @insert_v4i64_c(<4 x i64> %a, i64 %b, i32 %c) { -; CHECK-LABEL: insert_v4i64_c: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: stp q0, q1, [sp, #-32]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: and x8, x1, #0x3 -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str x0, [x9, x8, lsl #3] -; CHECK-NEXT: ldp q0, q1, [sp], #32 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: insert_v4i64_c: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]! +; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 +; CHECK-SD-NEXT: and x8, x1, #0x3 +; CHECK-SD-NEXT: mov x9, sp +; CHECK-SD-NEXT: str x0, [x9, x8, lsl #3] +; CHECK-SD-NEXT: ldp q0, q1, [sp], #32 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: insert_v4i64_c: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: sub x9, sp, #48 +; CHECK-GI-NEXT: mov x29, sp +; CHECK-GI-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-GI-NEXT: .cfi_def_cfa w29, 16 +; CHECK-GI-NEXT: .cfi_offset w30, -8 +; CHECK-GI-NEXT: .cfi_offset w29, -16 +; CHECK-GI-NEXT: mov w8, w1 +; CHECK-GI-NEXT: mov x9, sp +; CHECK-GI-NEXT: stp q0, q1, [sp] +; CHECK-GI-NEXT: and x8, x8, #0x3 +; CHECK-GI-NEXT: str x0, [x9, x8, lsl #3] +; CHECK-GI-NEXT: ldp q0, q1, [sp] +; CHECK-GI-NEXT: mov sp, x29 +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret entry: %d = insertelement <4 x i64> %a, i64 %b, i32 %c ret <4 x i64> %d diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll index 30123a31cebbe..e8dafd5e8fbab 100644 --- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll @@ -223,10 +223,9 @@ define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) { ; CHECK-NEXT: // Parent Loop BB3_1 Depth=1 ; CHECK-NEXT: // => This Loop Header: Depth=2 ; CHECK-NEXT: // Child Loop BB3_3 Depth 3 -; CHECK-NEXT: lsl x12, x11, #3 +; CHECK-NEXT: ldr x13, [x1, x11, lsl #3] +; CHECK-NEXT: ldr x12, [x10, x11, lsl #3] ; CHECK-NEXT: mov x14, x4 -; CHECK-NEXT: ldr x13, [x1, x12] -; CHECK-NEXT: ldr x12, [x10, x12] ; CHECK-NEXT: ldr w13, [x13] ; CHECK-NEXT: .LBB3_3: // %for.body8 ; CHECK-NEXT: // Parent Loop BB3_1 Depth=1 diff --git a/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll new file mode 100644 index 0000000000000..728cffeba02a2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll @@ -0,0 +1,50 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t + +;--- ok.ll + +; RUN: llc -mtriple=aarch64-linux ok.ll -o - | \ +; RUN: FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=aarch64-linux ok.ll -filetype=obj -o - | \ +; RUN: llvm-readelf --notes - | FileCheck %s --check-prefix=OBJ + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458} +!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 85} + +; ASM: .section .note.gnu.property,"a",@note +; ASM-NEXT: .p2align 3, 0x0 +; ASM-NEXT: .word 4 +; ASM-NEXT: .word 24 +; ASM-NEXT: .word 5 +; ASM-NEXT: .asciz "GNU" +; 3221225473 = 0xc0000001 = GNU_PROPERTY_AARCH64_FEATURE_PAUTH +; ASM-NEXT: .word 3221225473 +; ASM-NEXT: .word 16 +; ASM-NEXT: .xword 268435458 +; ASM-NEXT: .xword 85 + +; OBJ: Displaying notes found in: .note.gnu.property +; OBJ-NEXT: Owner Data size Description +; OBJ-NEXT: GNU 0x00000018 NT_GNU_PROPERTY_TYPE_0 (property note) +; OBJ-NEXT: AArch64 PAuth ABI core info: platform 0x10000002 (llvm_linux), version 0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini) + +; ERR: either both or no 'aarch64-elf-pauthabi-platform' and 'aarch64-elf-pauthabi-version' module flags must be present + +;--- err1.ll + +; RUN: not llc -mtriple=aarch64-linux err1.ll 2>&1 -o - | \ +; RUN: FileCheck %s --check-prefix=ERR + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 2} + +;--- err2.ll + +; RUN: not llc -mtriple=aarch64-linux err2.ll 2>&1 -o - | \ +; RUN: FileCheck %s --check-prefix=ERR + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 31} diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir index 65148344096cd..5dd29cf39c0ea 100644 --- a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir +++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir @@ -487,7 +487,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[DEF]] - ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, {{[0-9]+}} /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %1, 262158 /* mem:m */, killed [[COPY1]] + ; CHECK-NEXT: INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 2621450 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed [[COPY1]] ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF @@ -505,7 +505,7 @@ body: | %0:gpr64common = COPY $x0 %2:gpr64all = IMPLICIT_DEF %3:gpr64sp = COPY %2 - INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 2359306 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3 + INLINEASM &"ldr ${0:s}, $1", 8 /* mayload attdialect */, 2621450 /* regdef:FPR64 */, def %1, 262158 /* mem:m */, killed %3 %4:fpr128 = MOVIv2d_ns 0 %5:fpr64 = COPY %4.dsub %7:fpr128 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll index 52007221e12a7..f65a08ae2acea 100644 --- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll +++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll @@ -100,7 +100,7 @@ exit: } ; Address calculation cheap enough on some cores. -define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast,+addr-lsl-fast" { +define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast" { ; CHECK-LABEL: f3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tbz w0, #0, .LBB3_2 @@ -130,7 +130,7 @@ exit: ret i32 %v } -define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast,+addr-lsl-fast" { +define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" { ; CHECK-LABEL: f4: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp x1, #1 diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir index ef7d55a1c2395..11cf388e38531 100644 --- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir @@ -1,5 +1,5 @@ # RUN: llc -mtriple=aarch64-linux-gnu -run-pass=greedy %s -o - | FileCheck %s -# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo %s -o - | FileCheck %s --check-prefix=EXPAND +# RUN: llc -mtriple=aarch64-linux-gnu -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND --- | ; ModuleID = '' source_filename = "" @@ -173,8 +173,8 @@ body: | ; CHECK-NEXT: stack-id: scalable-vector, callee-saved-register: '' ; EXPAND-LABEL: name: spills_fills_stack_id_pnr - ; EXPAND: STR_PXI $p0, $sp, 7 - ; EXPAND: $p0 = LDR_PXI $sp, 7, implicit-def $pn0 + ; EXPAND: STR_PXI $pn0, $sp, 7 + ; EXPAND: $pn0 = LDR_PXI $sp, 7, implicit-def $pn0 %0:pnr = COPY $pn0 @@ -213,11 +213,9 @@ body: | ; EXPAND-LABEL: name: spills_fills_stack_id_virtreg_pnr ; EXPAND: renamable $pn8 = WHILEGE_CXX_B - ; EXPAND: $p0 = ORR_PPzPP $p8, $p8, killed $p8 - ; EXPAND: STR_PXI killed renamable $p0, $sp, 7 + ; EXPAND: STR_PXI killed renamable $pn8, $sp, 7 ; - ; EXPAND: renamable $p0 = LDR_PXI $sp, 7 - ; EXPAND: $p8 = ORR_PPzPP $p0, $p0, killed $p0, implicit-def $pn8 + ; EXPAND: renamable $pn8 = LDR_PXI $sp, 7 ; EXPAND: $p0 = PEXT_PCI_B killed renamable $pn8, 0 diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll new file mode 100644 index 0000000000000..c39894c27d9d4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll @@ -0,0 +1,289 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs -aarch64-lower-to-sme-routines=false < %s | FileCheck %s -check-prefixes=CHECK-NO-SME-ROUTINES +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -mattr=+mops -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK-MOPS + +@dst = global [512 x i8] zeroinitializer, align 1 +@src = global [512 x i8] zeroinitializer, align 1 + +define void @se_memcpy(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind { +; CHECK-LABEL: se_memcpy: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: adrp x0, :got:dst +; CHECK-NEXT: adrp x1, :got:src +; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NEXT: ldr x1, [x1, :got_lo12:src] +; CHECK-NEXT: bl __arm_sc_memcpy +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-NO-SME-ROUTINES-LABEL: se_memcpy: +; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0 +; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst +; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src +; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src] +; CHECK-NO-SME-ROUTINES-NEXT: smstop sm +; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy +; CHECK-NO-SME-ROUTINES-NEXT: smstart sm +; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ret +; +; CHECK-MOPS-LABEL: se_memcpy: +; CHECK-MOPS: // %bb.0: // %entry +; CHECK-MOPS-NEXT: adrp x8, :got:src +; CHECK-MOPS-NEXT: adrp x9, :got:dst +; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:src] +; CHECK-MOPS-NEXT: ldr x9, [x9, :got_lo12:dst] +; CHECK-MOPS-NEXT: cpyfp [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: cpyfm [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: cpyfe [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: ret +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false) + ret void +} + +define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind { +; CHECK-LABEL: se_memset: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: adrp x0, :got:dst +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NEXT: bl __arm_sc_memset +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-NO-SME-ROUTINES-LABEL: se_memset: +; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0 +; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst +; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NO-SME-ROUTINES-NEXT: smstop sm +; CHECK-NO-SME-ROUTINES-NEXT: mov w1, #2 // =0x2 +; CHECK-NO-SME-ROUTINES-NEXT: bl memset +; CHECK-NO-SME-ROUTINES-NEXT: smstart sm +; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ret +; +; CHECK-MOPS-LABEL: se_memset: +; CHECK-MOPS: // %bb.0: // %entry +; CHECK-MOPS-NEXT: adrp x8, :got:dst +; CHECK-MOPS-NEXT: mov w9, #2 // =0x2 +; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:dst] +; CHECK-MOPS-NEXT: setp [x8]!, x0!, x9 +; CHECK-MOPS-NEXT: setm [x8]!, x0!, x9 +; CHECK-MOPS-NEXT: sete [x8]!, x0!, x9 +; CHECK-MOPS-NEXT: ret +entry: + tail call void @llvm.memset.p0.i64(ptr align 1 @dst, i8 2, i64 %n, i1 false) + ret void +} + +define void @se_memmove(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind { +; CHECK-LABEL: se_memmove: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: adrp x0, :got:dst +; CHECK-NEXT: adrp x1, :got:src +; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NEXT: ldr x1, [x1, :got_lo12:src] +; CHECK-NEXT: bl __arm_sc_memmove +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-NO-SME-ROUTINES-LABEL: se_memmove: +; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0 +; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst +; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src +; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src] +; CHECK-NO-SME-ROUTINES-NEXT: smstop sm +; CHECK-NO-SME-ROUTINES-NEXT: bl memmove +; CHECK-NO-SME-ROUTINES-NEXT: smstart sm +; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ret +; +; CHECK-MOPS-LABEL: se_memmove: +; CHECK-MOPS: // %bb.0: // %entry +; CHECK-MOPS-NEXT: adrp x8, :got:src +; CHECK-MOPS-NEXT: adrp x9, :got:dst +; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:src] +; CHECK-MOPS-NEXT: ldr x9, [x9, :got_lo12:dst] +; CHECK-MOPS-NEXT: cpyp [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: cpym [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: cpye [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: ret +entry: + tail call void @llvm.memmove.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false) + ret void +} + +define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind { +; CHECK-LABEL: sc_memcpy: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: adrp x0, :got:dst +; CHECK-NEXT: adrp x1, :got:src +; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NEXT: ldr x1, [x1, :got_lo12:src] +; CHECK-NEXT: bl __arm_sc_memcpy +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-NO-SME-ROUTINES-LABEL: sc_memcpy: +; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0 +; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: bl __arm_sme_state +; CHECK-NO-SME-ROUTINES-NEXT: adrp x8, :got:dst +; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src +; CHECK-NO-SME-ROUTINES-NEXT: and x19, x0, #0x1 +; CHECK-NO-SME-ROUTINES-NEXT: ldr x8, [x8, :got_lo12:dst] +; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src] +; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_2 +; CHECK-NO-SME-ROUTINES-NEXT: // %bb.1: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: smstop sm +; CHECK-NO-SME-ROUTINES-NEXT: .LBB3_2: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: mov x0, x8 +; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy +; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_4 +; CHECK-NO-SME-ROUTINES-NEXT: // %bb.3: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: smstart sm +; CHECK-NO-SME-ROUTINES-NEXT: .LBB3_4: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ret +; +; CHECK-MOPS-LABEL: sc_memcpy: +; CHECK-MOPS: // %bb.0: // %entry +; CHECK-MOPS-NEXT: adrp x8, :got:src +; CHECK-MOPS-NEXT: adrp x9, :got:dst +; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:src] +; CHECK-MOPS-NEXT: ldr x9, [x9, :got_lo12:dst] +; CHECK-MOPS-NEXT: cpyfp [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: cpyfm [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: cpyfe [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: ret +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false) + ret void +} + +define void @sb_memcpy(i64 noundef %n) "aarch64_pstate_sm_body" nounwind { +; CHECK-LABEL: sb_memcpy: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: adrp x0, :got:dst +; CHECK-NEXT: adrp x1, :got:src +; CHECK-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NEXT: ldr x1, [x1, :got_lo12:src] +; CHECK-NEXT: bl __arm_sc_memcpy +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-NO-SME-ROUTINES-LABEL: sb_memcpy: +; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry +; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0 +; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NO-SME-ROUTINES-NEXT: smstart sm +; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst +; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src +; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst] +; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src] +; CHECK-NO-SME-ROUTINES-NEXT: smstop sm +; CHECK-NO-SME-ROUTINES-NEXT: bl memcpy +; CHECK-NO-SME-ROUTINES-NEXT: smstart sm +; CHECK-NO-SME-ROUTINES-NEXT: smstop sm +; CHECK-NO-SME-ROUTINES-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NO-SME-ROUTINES-NEXT: ret +; +; CHECK-MOPS-LABEL: sb_memcpy: +; CHECK-MOPS: // %bb.0: // %entry +; CHECK-MOPS-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-MOPS-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-MOPS-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-MOPS-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-MOPS-NEXT: smstart sm +; CHECK-MOPS-NEXT: adrp x8, :got:src +; CHECK-MOPS-NEXT: adrp x9, :got:dst +; CHECK-MOPS-NEXT: ldr x8, [x8, :got_lo12:src] +; CHECK-MOPS-NEXT: ldr x9, [x9, :got_lo12:dst] +; CHECK-MOPS-NEXT: cpyfp [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: cpyfm [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: cpyfe [x9]!, [x8]!, x0! +; CHECK-MOPS-NEXT: smstop sm +; CHECK-MOPS-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-MOPS-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-MOPS-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-MOPS-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-MOPS-NEXT: ret +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr align 1 @dst, ptr nonnull align 1 @src, i64 %n, i1 false) + ret void +} + +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) +declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) +declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) diff --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll index bcfc7b336f3e6..bcb878ad744bb 100644 --- a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll +++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll @@ -9,7 +9,7 @@ define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind { ; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> %mask, <4 x i32> undef), !nontemporal !0 @@ -25,7 +25,7 @@ define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind { ; CHECK-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: stnt1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> %mask), !nontemporal !0 ret void @@ -43,7 +43,8 @@ define <4 x i32> @load_v4i32(ptr %a) nounwind { define void @store_v4i32(<4 x i32> %x, ptr %a) nounwind { ; CHECK-LABEL: store_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: stnp d0, d1, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> ), !nontemporal !0 ret void @@ -52,7 +53,7 @@ define void @store_v4i32(<4 x i32> %x, ptr %a) nounwind { define @masked_load_nxv4i32(ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_load_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv4i32(ptr %a, i32 1, %mask, undef), !nontemporal !0 ret %load @@ -61,7 +62,7 @@ define @masked_load_nxv4i32(ptr %a, %mask) define void @masked_store_nxv4i32( %x, ptr %a, %mask) nounwind { ; CHECK-LABEL: masked_store_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: stnt1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.nxv4i32.p0( %x, ptr %a, i32 1, %mask), !nontemporal !0 ret void diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll new file mode 100644 index 0000000000000..11f67634a3fb2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK + +define @bsl( %a, %b) { +; CHECK-LABEL: bsl: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #0x7fffffff +; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: ret + %1 = and %a, splat(i32 2147483647) + %2 = and %b, splat(i32 -2147483648) + %c = or %1, %2 + ret %c +} + +; we are not expecting bsl instruction here. the constants do not match to fold to bsl. +define @no_bsl_fold( %a, %b) { +; CHECK-LABEL: no_bsl_fold: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-NEXT: and z1.s, z1.s, #0x7ffffffe +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %1 = and %a, splat(i32 2147483647) + %2 = and %b, splat(i32 2147483646) + %c = or %1, %2 + ret %c +} diff --git a/llvm/test/CodeGen/AArch64/tagged-globals-pic.ll b/llvm/test/CodeGen/AArch64/tagged-globals-pic.ll index 6ee0dd193c3dc..3c4274abdd563 100644 --- a/llvm/test/CodeGen/AArch64/tagged-globals-pic.ll +++ b/llvm/test/CodeGen/AArch64/tagged-globals-pic.ll @@ -31,18 +31,6 @@ define ptr @global_addr() #0 { } define i32 @global_load() #0 { - ; CHECK-SELECTIONDAGISEL: global_load: - ; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global - ; CHECK-SELECTIONDAGISEL: ldr w0, [[[REG]], :lo12:global] - ; CHECK-SELECTIONDAGISEL: ret - - ; CHECK-GLOBALISEL: global_load: - ; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global - ; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296 - ; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global - ; CHECK-GLOBALISEL: ldr w0, [[[REG]]] - ; CHECK-GLOBALISEL: ret - ; CHECK-PIC: global_load: ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global ; CHECK-PIC: ldr [[REG]], [[[REG]], :got_lo12:global] @@ -54,18 +42,6 @@ define i32 @global_load() #0 { } define void @global_store() #0 { - ; CHECK-SELECTIONDAGISEL: global_store: - ; CHECK-SELECTIONDAGISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global - ; CHECK-SELECTIONDAGISEL: str wzr, [[[REG]], :lo12:global] - ; CHECK-SELECTIONDAGISEL: ret - - ; CHECK-GLOBALISEL: global_store: - ; CHECK-GLOBALISEL: adrp [[REG:x[0-9]+]], :pg_hi21_nc:global - ; CHECK-GLOBALISEL: movk [[REG]], #:prel_g3:global+4294967296 - ; CHECK-GLOBALISEL: add [[REG]], [[REG]], :lo12:global - ; CHECK-GLOBALISEL: str wzr, [[[REG]]] - ; CHECK-GLOBALISEL: ret - ; CHECK-PIC: global_store: ; CHECK-PIC: adrp [[REG:x[0-9]+]], :got:global ; CHECK-PIC: ldr [[REG]], [[[REG]], :got_lo12:global] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir index b49f516098513..0a2b3da7f7d94 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir @@ -28,16 +28,13 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_ptradd_crash__offset_wider - ; CHECK: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 3 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[C]](s128) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[TRUNC]], [[C1]](s64) - ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[SHL]](s64) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1) ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %1:_(p1) = G_CONSTANT i64 0 - %3:_(s128) = G_CONSTANT i128 3 + %3:_(s32) = G_CONSTANT i32 3 %0:_(<4 x s32>) = G_LOAD %1 :: (load (<4 x s32>) from `ptr addrspace(1) null`, addrspace 1) %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 $sgpr0 = COPY %2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll index ae0556c1b21f0..c509cf4b1bf37 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll @@ -48,10 +48,10 @@ define <2 x ptr addrspace(7)> @gep_vector_splat(<2 x ptr addrspace(7)> %ptrs, i6 ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x p8>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV2]](s64), [[C]](s64) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV2]](s64), [[C]](s32) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[SHUF]](<2 x s64>) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x s32>) = G_ADD [[BUILD_VECTOR1]], [[TRUNC]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index af03b3e20ecb6..93155335e2086 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -294,7 +294,8 @@ body: | ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_CONSTANT i1 false - %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 + %4:_(s32) = G_ZEXT %1 + %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %4 %3:_(s32) = G_ANYEXT %2 $vgpr0 = COPY %3 ... @@ -948,7 +949,8 @@ body: | ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 - %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + %3:_(s32) = G_TRUNC %1 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %3 $vgpr0 = COPY %2 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extractelement-crash.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extractelement-crash.mir index b5dca793bc507..805890a75d402 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extractelement-crash.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extractelement-crash.mir @@ -18,8 +18,9 @@ body: | %7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0(<2 x s32>), %6(s32), %5(s32) %8:_(s1) = G_CONSTANT i1 1 - %9:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %8(s1) - %10:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0(<2 x s32>), %9(s32), %8(s1) + %11:_(s32) = G_ZEXT %8 + %9:_(s32) = G_EXTRACT_VECTOR_ELT %0(<2 x s32>), %11(s32) + %10:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0(<2 x s32>), %9(s32), %11(s32) SI_RETURN ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index 1dcb2bf3e42a6..b57dd396ae355 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -82,7 +82,8 @@ body: | %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s64) = COPY $vgpr3_vgpr4 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + %4:_(s32) = G_TRUNC %2 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 $vgpr0_vgpr1 = COPY %3 ... @@ -105,7 +106,8 @@ body: | %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s32) = COPY $vgpr16 %2:_(s64) = COPY $vgpr17_vgpr18 - %3:_(<16 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + %4:_(s32) = G_TRUNC %2 + %3:_(<16 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4 S_ENDPGM 0, implicit %3 ... @@ -131,28 +133,6 @@ body: | S_ENDPGM 0, implicit %3 ... ---- -name: insert_vector_elt_0_v2s32_s8 - -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 - - ; CHECK-LABEL: name: insert_vector_elt_0_v2s32_s8 - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s8) = G_CONSTANT i8 0 - %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 - $vgpr0_vgpr1 = COPY %3 -... - --- name: insert_vector_elt_0_v2i8_i32 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll index d9e0464942182..6bda962d1b9ca 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll @@ -12,7 +12,7 @@ define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) #0 { ; CHECK-LABEL: define amdgpu_kernel void @noop_fdiv_fpmath( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[MD_25ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !0 +; CHECK-NEXT: [[MD_25ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META0:![0-9]+]] ; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -26,7 +26,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -61,9 +61,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 @@ -89,7 +89,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -124,9 +124,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 @@ -152,7 +152,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -169,9 +169,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, floa ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 @@ -430,15 +430,15 @@ define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath !1 +; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]] ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) @@ -458,7 +458,7 @@ define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 @@ -482,15 +482,15 @@ define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath !1 +; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]] ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) @@ -510,7 +510,7 @@ define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 @@ -524,15 +524,15 @@ define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath !1 +; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] ; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath !0 +; DAZ-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] ; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath !0 +; DAZ-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[TMP1:%.*]] = fneg float [[X]] ; DAZ-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]]) @@ -542,7 +542,7 @@ define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) ; DAZ-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] ; DAZ-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath !0 +; DAZ-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] ; DAZ-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 @@ -1179,7 +1179,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x fl ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath !1 +; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 @@ -1241,7 +1241,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x fl ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath !1 +; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 @@ -1303,7 +1303,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x fl ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] ; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath !1 +; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] ; DAZ-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 @@ -1359,15 +1359,15 @@ define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(ptr addrspace(1) %out, <2 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> , [[X]] ; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> , [[X]], !fpmath !1 +; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> , [[X]], !fpmath [[META1:![0-9]+]] ; CHECK-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> , [[X]] ; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> , [[X]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> , [[X]], !fpmath !0 +; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> , [[X]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> , [[X]], !fpmath !0 +; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> , [[X]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; @@ -1395,9 +1395,9 @@ define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat(ptr addrspace(1) ; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> , [[X]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> , [[X]], !fpmath !0 +; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> , [[X]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> , [[X]], !fpmath !0 +; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> , [[X]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; @@ -1418,9 +1418,9 @@ define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant(ptr addrs ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { ; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 -; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> [[X_INSERT]], [[Y]], !fpmath !0 +; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> [[X_INSERT]], [[Y]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 -; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> [[X_INSERT]], [[Y]], !fpmath !0 +; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> [[X_INSERT]], [[Y]], !fpmath [[META0]] ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; @@ -1520,7 +1520,7 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]] ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2 +; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2:![0-9]+]] ; IEEE-GOODFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]]) ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 @@ -1536,8 +1536,8 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]] ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1 -; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1 +; IEEE-GOODFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] +; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) ; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] @@ -1563,7 +1563,7 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00 ; IEEE-GOODFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1 +; IEEE-GOODFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]]) ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0 ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1 @@ -1601,7 +1601,7 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]] ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2 +; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2:![0-9]+]] ; IEEE-BADFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]]) ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 @@ -1617,8 +1617,8 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]] ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1 -; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1 +; IEEE-BADFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] +; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) ; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] @@ -1644,7 +1644,7 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00 ; IEEE-BADFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]] ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1 +; IEEE-BADFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]]) ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0 ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_HALF_ULP]]) @@ -1683,8 +1683,8 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { ; DAZ-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[MD_25ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1 -; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath !1 +; DAZ-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] +; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) ; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] @@ -1701,7 +1701,7 @@ define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { ; DAZ-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) ; DAZ-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fneg contract float [[TMP1]] ; DAZ-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !1 +; DAZ-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] ; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_HALF_ULP]]) ; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) @@ -1873,7 +1873,7 @@ define amdgpu_kernel void @rsq_f32_fpmath_flags(ptr addrspace(1) %out, float %x) define float @rsq_f32_missing_contract0(float %x) { ; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract0( ; IEEE-GOODFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2 +; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 @@ -1884,7 +1884,7 @@ define float @rsq_f32_missing_contract0(float %x) { ; ; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract0( ; IEEE-BADFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2 +; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]]) @@ -1907,7 +1907,7 @@ define float @rsq_f32_missing_contract0(float %x) { define float @rsq_f32_missing_contract1(float %x) { ; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract1( ; IEEE-GOODFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2 +; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 @@ -1918,7 +1918,7 @@ define float @rsq_f32_missing_contract1(float %x) { ; ; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract1( ; IEEE-BADFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { -; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath !2 +; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]]) @@ -2116,7 +2116,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2 +; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 @@ -2136,7 +2136,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2 +; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0 ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1 ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0 @@ -2160,7 +2160,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0 ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3 +; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0 @@ -2187,7 +2187,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2 +; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 @@ -2207,7 +2207,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2 +; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0 ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1 ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0 @@ -2231,7 +2231,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0 ; IEEE-BADFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3 +; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0 @@ -2258,7 +2258,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> , [[SQRT_X_NO_MD]] ; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2 +; DAZ-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2:![0-9]+]] ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 @@ -2290,7 +2290,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo ; DAZ-NEXT: [[TMP26:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0 ; DAZ-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP26]], float [[TMP25]], i64 1 ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !3 +; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] ; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 ; DAZ-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 ; DAZ-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[X]], i64 0 @@ -3086,7 +3086,7 @@ define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace( define <4 x float> @rsq_f32_vector_mixed_constant_numerator(<4 x float> %arg) { ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator( ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { -; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2 +; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 @@ -3135,7 +3135,7 @@ define <4 x float> @rsq_f32_vector_mixed_constant_numerator(<4 x float> %arg) { ; ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator( ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { -; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2 +; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 @@ -3381,7 +3381,7 @@ define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(<4 x float> define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(<4 x float> %arg) { ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div( ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { -; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2 +; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2:![0-9]+]] ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract afn <4 x float> , [[DENOM]] ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] ; @@ -3410,7 +3410,7 @@ define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(<4 x float> define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(<4 x float> %arg) { ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv( ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { -; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2 +; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract <4 x float> , [[DENOM]] ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] ; @@ -3573,7 +3573,7 @@ define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(<4 x fl define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) { ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp( ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { -; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2 +; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 @@ -3616,7 +3616,7 @@ define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %ar ; ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp( ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { -; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2 +; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 @@ -3696,7 +3696,7 @@ define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %ar define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) { ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct( ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { -; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath !2 +; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv arcp contract <4 x float> , [[DENOM]] ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] ; @@ -3857,21 +3857,21 @@ define <4 x float> @rsq_f32_vector_const_denom(ptr addrspace(1) %out, <2 x float ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fneg contract float [[TMP9]] -; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) -; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 -; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 -; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP21]] -; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP20]]) -; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP23]], i32 [[TMP22]]) -; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) +; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 +; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP27]] ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP26]]) +; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP22]]) +; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) +; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 +; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = extractvalue { float, i32 } [[TMP48]], 1 +; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP49]]) ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef) ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1 -; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP30]], [[TMP28]] -; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP27]] +; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP30]], [[TMP51]] +; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP50]] ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]]) ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 @@ -3910,20 +3910,20 @@ define <4 x float> @rsq_f32_vector_const_denom(ptr addrspace(1) %out, <2 x float ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fneg contract float [[TMP9]] -; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) -; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 +; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) +; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP18]]) ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP21]] -; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP20]]) -; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP23]], i32 [[TMP22]]) -; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) -; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 -; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP10]]) ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP26]]) +; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP22]]) +; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) +; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 +; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP10]]) +; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP49]]) ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef) ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef) -; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP30]], [[TMP28]] +; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP30]], [[TMP50]] ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP27]] ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]]) ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) @@ -4110,7 +4110,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, floa ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -4145,9 +4145,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, floa ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 @@ -4173,7 +4173,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, floa ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -4208,9 +4208,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, floa ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 @@ -4236,7 +4236,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, floa ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -4253,9 +4253,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, floa ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 @@ -4295,7 +4295,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, floa ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -4330,9 +4330,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, floa ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 @@ -4358,7 +4358,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, floa ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -4393,9 +4393,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, floa ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 @@ -4421,7 +4421,7 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, floa ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath !1 +; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 @@ -4438,9 +4438,9 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, floa ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath !0 +; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 -; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath !0 +; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 98658834e8978..bf4302c156d83 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -5678,22 +5678,18 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) { ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_clause 0x4 -; GFX11-NEXT: scratch_store_b128 off, v[18:21], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[10:13], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[6:9], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[2:5], s0 -; GFX11-NEXT: scratch_store_b16 off, v1, s0 offset:128 -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s0, s0, 48 +; GFX11-NEXT: s_clause 0x5 +; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[18:21], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[6:9], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[2:5], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[30:33], s1 -; GFX11-NEXT: scratch_store_b128 off, v[26:29], s2 -; GFX11-NEXT: scratch_store_b128 off, v[22:25], s3 -; GFX11-NEXT: scratch_store_b128 off, v[14:17], s0 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: scratch_store_b128 v0, v[30:33], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[26:29], off offset:96 +; GFX11-NEXT: scratch_store_b16 v0, v1, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0 %ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1 @@ -8827,19 +8823,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX11-NEXT: global_load_u16 v32, v[1:2], off offset:54 ; GFX11-NEXT: global_load_u16 v33, v[1:2], off offset:58 ; GFX11-NEXT: global_load_u16 v1, v[1:2], off offset:62 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s3, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s4, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s5, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s6, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s7, s0, 0x90 -; GFX11-NEXT: s_add_i32 s8, s0, 0x70 -; GFX11-NEXT: s_add_i32 s9, s0, 0x60 -; GFX11-NEXT: s_add_i32 s10, s0, 0x50 -; GFX11-NEXT: s_add_i32 s11, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(31) ; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v3 ; GFX11-NEXT: s_waitcnt vmcnt(30) @@ -8936,23 +8919,23 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX11-NEXT: v_cvt_f64_f32_e32 v[5:6], v5 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[3:4], v2 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[1:2], v37 -; GFX11-NEXT: scratch_store_b128 off, v[96:99], s1 -; GFX11-NEXT: scratch_store_b128 off, v[84:87], s2 -; GFX11-NEXT: scratch_store_b128 off, v[80:83], s3 -; GFX11-NEXT: scratch_store_b128 off, v[68:71], s4 -; GFX11-NEXT: scratch_store_b128 off, v[64:67], s5 -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s6 -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s7 -; GFX11-NEXT: scratch_store_b128 off, v[33:36], s0 offset:128 -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s8 -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s9 -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s10 -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s11 -; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_store_b128 v0, v[96:99], off offset:240 +; GFX11-NEXT: scratch_store_b128 v0, v[84:87], off offset:224 +; GFX11-NEXT: scratch_store_b128 v0, v[80:83], off offset:208 +; GFX11-NEXT: scratch_store_b128 v0, v[68:71], off offset:192 +; GFX11-NEXT: scratch_store_b128 v0, v[64:67], off offset:176 +; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160 +; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144 +; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_setpc_b64 s[30:31] %load = load <32 x bfloat>, ptr addrspace(1) %ptr %fpext = fpext <32 x bfloat> %load to <32 x double> diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir index c48231f3851a7..29621a0477418 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir @@ -586,7 +586,7 @@ name: dpp_reg_sequence_both_combined tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 @@ -606,12 +606,12 @@ body: | # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec -# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec +# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec name: dpp_reg_sequence_first_combined tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 @@ -636,7 +636,7 @@ name: dpp_reg_sequence_second_combined tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 @@ -656,12 +656,12 @@ body: | # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec -# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec +# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec name: dpp_reg_sequence_none_combined tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 @@ -683,12 +683,12 @@ body: | # GCN: S_BRANCH %bb.1 # GCN: bb.1: # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec -# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec +# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec name: dpp_reg_sequence_exec_changed tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 @@ -699,6 +699,7 @@ body: | S_BRANCH %bb.1 bb.1: + liveins: $vcc_lo %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec ... @@ -712,12 +713,12 @@ body: | # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1 # GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec -# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec +# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec name: dpp_reg_sequence_subreg tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 @@ -782,6 +783,7 @@ name: dpp64_add64_impdef tracksRegLiveness: true body: | bb.0: + liveins: $vcc_lo %0:vreg_64 = IMPLICIT_DEF %1:vreg_64 = IMPLICIT_DEF %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec @@ -796,6 +798,7 @@ name: dpp64_add64_undef tracksRegLiveness: true body: | bb.0: + liveins: $vcc_lo %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec @@ -860,12 +863,12 @@ body: | # GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence # GCN: %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec -# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec +# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec name: dont_combine_more_than_one_operand_dpp_reg_sequence tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo %0:vreg_64 = COPY $vgpr0_vgpr1 %1:vreg_64 = COPY $vgpr2_vgpr3 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll index 66bf0d5abb732..99818df6175bd 100644 --- a/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/fptoi.i128.ll @@ -1490,13 +1490,704 @@ define i128 @fptoui_f16_to_i128(half %x) { ret i128 %cvt } -; FIXME: ExpandLargeFpConvert asserts on bfloat -; define i128 @fptosi_bf16_to_i128(bfloat %x) { -; %cvt = fptosi bfloat %x to i128 -; ret i128 %cvt -; } +define i128 @fptosi_bf16_to_i128(bfloat %x) { +; SDAG-LABEL: fptosi_bf16_to_i128: +; SDAG: ; %bb.0: ; %fp-to-i-entry +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v4, v0 +; SDAG-NEXT: v_bfe_u32 v5, v4, 7, 8 +; SDAG-NEXT: s_movk_i32 s4, 0x7e +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: v_mov_b32_e32 v2, 0 +; SDAG-NEXT: v_mov_b32_e32 v6, 0 +; SDAG-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-NEXT: v_mov_b32_e32 v3, 0 +; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5 +; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc +; SDAG-NEXT: s_cbranch_execz .LBB6_10 +; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-end +; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5 +; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v6, vcc +; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc +; SDAG-NEXT: s_movk_i32 s4, 0xff7f +; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v6, vcc +; SDAG-NEXT: s_mov_b32 s5, -1 +; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[0:1] +; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], -1, v[2:3] +; SDAG-NEXT: v_cmp_lt_i16_e32 vcc, -1, v4 +; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] +; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1 +; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3 +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[6:7] +; SDAG-NEXT: s_cbranch_execz .LBB6_7 +; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-end9 +; SDAG-NEXT: s_movk_i32 s4, 0x7f +; SDAG-NEXT: v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDAG-NEXT: s_mov_b64 s[4:5], 0x85 +; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6] +; SDAG-NEXT: v_mov_b32_e32 v7, 0 +; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 0, vcc +; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 1, vcc +; SDAG-NEXT: v_or_b32_e32 v6, 0x80, v0 +; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1 +; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3 +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[6:7] +; SDAG-NEXT: s_cbranch_execz .LBB6_4 +; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-else +; SDAG-NEXT: v_sub_u32_e32 v0, 0xc6, v5 +; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff3a, v5 +; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff7a, v5 +; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[6:7] +; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] +; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4 +; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5] +; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4 +; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5] +; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v0, s[4:5] +; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5] +; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v8, 0 +; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; SDAG-NEXT: v_mul_lo_u32 v13, v9, v2 +; SDAG-NEXT: v_mov_b32_e32 v6, v1 +; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v8, v[6:7] +; SDAG-NEXT: v_mul_lo_u32 v14, v8, v3 +; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, 0 +; SDAG-NEXT: v_add_co_u32_e64 v6, s[4:5], -1, v10 +; SDAG-NEXT: v_mov_b32_e32 v10, v5 +; SDAG-NEXT: v_mov_b32_e32 v5, v7 +; SDAG-NEXT: v_addc_co_u32_e64 v8, s[4:5], 0, -1, s[4:5] +; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v9, v[4:5] +; SDAG-NEXT: v_add3_u32 v3, v3, v14, v13 +; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v12, v[2:3] +; SDAG-NEXT: v_add_co_u32_e64 v5, s[4:5], v10, v5 +; SDAG-NEXT: v_mul_lo_u32 v3, v6, v11 +; SDAG-NEXT: v_addc_co_u32_e64 v6, s[4:5], 0, 0, s[4:5] +; SDAG-NEXT: v_mul_lo_u32 v7, v8, v12 +; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; SDAG-NEXT: ; implicit-def: $vgpr8 +; SDAG-NEXT: v_add3_u32 v3, v7, v2, v3 +; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v5, v1 +; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v6, v3, s[4:5] +; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6 +; SDAG-NEXT: v_mov_b32_e32 v1, v4 +; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7 +; SDAG-NEXT: .LBB6_4: ; %Flow +; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13] +; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-then12 +; SDAG-NEXT: v_sub_u32_e32 v2, 0x86, v5 +; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7] +; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5] +; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 +; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5] +; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v8 +; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v8 +; SDAG-NEXT: v_mov_b32_e32 v3, v2 +; SDAG-NEXT: ; %bb.6: ; %Flow1 +; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] +; SDAG-NEXT: .LBB6_7: ; %Flow2 +; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; SDAG-NEXT: ; %bb.8: ; %fp-to-i-if-then5 +; SDAG-NEXT: v_bfrev_b32_e32 v0, 1 +; SDAG-NEXT: v_bfrev_b32_e32 v1, -2 +; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc +; SDAG-NEXT: v_mov_b32_e32 v0, v2 +; SDAG-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-NEXT: ; %bb.9: ; %Flow3 +; SDAG-NEXT: s_or_b64 exec, exec, s[4:5] +; SDAG-NEXT: .LBB6_10: ; %fp-to-i-cleanup +; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fptosi_bf16_to_i128: +; GISEL: ; %bb.0: ; %fp-to-i-entry +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v4, v0 +; GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v4 +; GISEL-NEXT: v_mov_b32_e32 v6, 0 +; GISEL-NEXT: v_lshrrev_b64 v[0:1], 7, v[5:6] +; GISEL-NEXT: v_mov_b32_e32 v1, 0x7f +; GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfe_u32 v5, v0, 0, 8 +; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[1:2] +; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] +; GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-NEXT: v_mov_b32_e32 v2, s6 +; GISEL-NEXT: v_mov_b32_e32 v3, s7 +; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_cbranch_execz .LBB6_10 +; GISEL-NEXT: ; %bb.1: ; %fp-to-i-if-end +; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5 +; GISEL-NEXT: v_mov_b32_e32 v2, 0xffffff80 +; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc +; GISEL-NEXT: v_mov_b32_e32 v3, -1 +; GISEL-NEXT: v_addc_co_u32_e64 v7, s[6:7], 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_cmp_le_u64_e32 vcc, -1, v[7:8] +; GISEL-NEXT: v_cmp_lt_i16_e64 s[4:5], -1, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, -1, v[7:8] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7] +; GISEL-NEXT: s_cbranch_execz .LBB6_7 +; GISEL-NEXT: ; %bb.2: ; %fp-to-i-if-end9 +; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], -1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[6:7] +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v2, 1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[6:7] +; GISEL-NEXT: v_lshlrev_b16_e32 v3, 2, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v7, 3, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v8, 4, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v9, 5, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v10, 6, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v11, 7, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v12, 8, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v13, 9, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v14, 10, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v15, 11, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v16, 12, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v17, 13, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v18, 14, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v19, 15, v0 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v2 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v3 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v7 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v7 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v8 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v8 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v9 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v9 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v10 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v10 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v11 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v11 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v12 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v12 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v13 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v13 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v14 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v14 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v15 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v15 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v16 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v16 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v17 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v17 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v18 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v18 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v19 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v19 +; GISEL-NEXT: v_and_b32_e32 v11, 0xffff, v0 +; GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v11 +; GISEL-NEXT: v_or3_b32 v9, v1, v0, 1 +; GISEL-NEXT: v_or3_b32 v10, v11, v0, 0 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x86 +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: v_and_b32_e32 v2, 0x7f, v4 +; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[0:1] +; GISEL-NEXT: v_or_b32_e32 v7, 0x80, v2 +; GISEL-NEXT: v_mov_b32_e32 v8, 0 +; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7] +; GISEL-NEXT: s_cbranch_execz .LBB6_4 +; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else +; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff7a, v5 +; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[7:8] +; GISEL-NEXT: v_subrev_u32_e32 v4, 64, v6 +; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6 +; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 +; GISEL-NEXT: v_lshl_or_b32 v11, v11, 16, v11 +; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[7:8] +; GISEL-NEXT: v_lshlrev_b64 v[4:5], v4, v[7:8] +; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v8, v11, 0 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v10, v[0:1] +; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v8, v9, 0 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v13, v9, v[6:7] +; GISEL-NEXT: v_mul_lo_u32 v4, v12, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, v6 +; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[8:9], v8, v10, v[1:2] +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v12, v9, v[1:2] +; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] +; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v13, v10, v[4:5] +; GISEL-NEXT: ; implicit-def: $vgpr5 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7] +; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8 +; GISEL-NEXT: ; implicit-def: $vgpr9 +; GISEL-NEXT: .LBB6_4: ; %Flow +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17] +; GISEL-NEXT: s_cbranch_execz .LBB6_6 +; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12 +; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x86, v5 +; GISEL-NEXT: v_subrev_u32_e32 v2, 64, v3 +; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[7:8] +; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0 +; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; GISEL-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v9 +; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; GISEL-NEXT: v_mul_i32_i24_e32 v0, v0, v9 +; GISEL-NEXT: v_mov_b32_e32 v3, v2 +; GISEL-NEXT: .LBB6_6: ; %Flow1 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: .LBB6_7: ; %Flow2 +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[14:15] +; GISEL-NEXT: s_cbranch_execz .LBB6_9 +; GISEL-NEXT: ; %bb.8: ; %fp-to-i-if-then5 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] +; GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v1 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_lshlrev_b32_e32 v3, 2, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v1 +; GISEL-NEXT: v_or_b32_e32 v2, v1, v2 +; GISEL-NEXT: v_or3_b32 v0, v0, v3, v4 +; GISEL-NEXT: v_lshlrev_b32_e32 v5, 4, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v3, v4 +; GISEL-NEXT: v_or3_b32 v0, v0, v5, v6 +; GISEL-NEXT: v_lshlrev_b32_e32 v7, 6, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v8, 7, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v5, v6 +; GISEL-NEXT: v_or3_b32 v0, v0, v7, v8 +; GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v10, 9, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v7, v8 +; GISEL-NEXT: v_or3_b32 v0, v0, v9, v10 +; GISEL-NEXT: v_lshlrev_b32_e32 v11, 10, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v12, 11, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v9, v10 +; GISEL-NEXT: v_or3_b32 v0, v0, v11, v12 +; GISEL-NEXT: v_lshlrev_b32_e32 v13, 12, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v14, 13, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v11, v12 +; GISEL-NEXT: v_or3_b32 v0, v0, v13, v14 +; GISEL-NEXT: v_lshlrev_b32_e32 v15, 14, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v16, 15, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v13, v14 +; GISEL-NEXT: v_or3_b32 v0, v0, v15, v16 +; GISEL-NEXT: v_lshlrev_b32_e32 v17, 16, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v18, 17, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v15, v16 +; GISEL-NEXT: v_or3_b32 v0, v0, v17, v18 +; GISEL-NEXT: v_lshlrev_b32_e32 v19, 18, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v20, 19, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v17, v18 +; GISEL-NEXT: v_or3_b32 v0, v0, v19, v20 +; GISEL-NEXT: v_lshlrev_b32_e32 v3, 20, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v4, 21, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v19, v20 +; GISEL-NEXT: v_or3_b32 v0, v0, v3, v4 +; GISEL-NEXT: v_lshlrev_b32_e32 v5, 22, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 23, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v3, v4 +; GISEL-NEXT: v_or3_b32 v0, v0, v5, v6 +; GISEL-NEXT: v_lshlrev_b32_e32 v7, 24, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v8, 25, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v5, v6 +; GISEL-NEXT: v_or3_b32 v0, v0, v7, v8 +; GISEL-NEXT: v_lshlrev_b32_e32 v9, 26, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v10, 27, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v7, v8 +; GISEL-NEXT: v_or3_b32 v0, v0, v9, v10 +; GISEL-NEXT: v_lshlrev_b32_e32 v11, 28, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v12, 29, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v9, v10 +; GISEL-NEXT: v_or3_b32 v0, v0, v11, v12 +; GISEL-NEXT: v_lshlrev_b32_e32 v13, 30, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 31, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v11, v12 +; GISEL-NEXT: v_or3_b32 v0, v0, v13, v1 +; GISEL-NEXT: v_or3_b32 v1, v2, v13, v1 +; GISEL-NEXT: v_add_u32_e32 v3, 0x80000000, v1 +; GISEL-NEXT: v_mov_b32_e32 v2, v1 +; GISEL-NEXT: .LBB6_9: ; %Flow3 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: .LBB6_10: ; %fp-to-i-cleanup +; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %cvt = fptosi bfloat %x to i128 + ret i128 %cvt +} -; define i128 @fptoui_bf16_to_i128(bfloat %x) { -; %cvt = fptoui bfloat %x to i128 -; ret i128 %cvt -; } +define i128 @fptoui_bf16_to_i128(bfloat %x) { +; SDAG-LABEL: fptoui_bf16_to_i128: +; SDAG: ; %bb.0: ; %fp-to-i-entry +; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SDAG-NEXT: v_mov_b32_e32 v4, v0 +; SDAG-NEXT: v_bfe_u32 v5, v4, 7, 8 +; SDAG-NEXT: s_movk_i32 s4, 0x7e +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: v_mov_b32_e32 v2, 0 +; SDAG-NEXT: v_mov_b32_e32 v6, 0 +; SDAG-NEXT: v_mov_b32_e32 v1, 0 +; SDAG-NEXT: v_mov_b32_e32 v3, 0 +; SDAG-NEXT: v_cmp_lt_u32_e32 vcc, s4, v5 +; SDAG-NEXT: s_and_saveexec_b64 s[8:9], vcc +; SDAG-NEXT: s_cbranch_execz .LBB7_10 +; SDAG-NEXT: ; %bb.1: ; %fp-to-i-if-end +; SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5 +; SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v6, vcc +; SDAG-NEXT: v_addc_co_u32_e32 v2, vcc, -1, v6, vcc +; SDAG-NEXT: s_movk_i32 s4, 0xff7f +; SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v6, vcc +; SDAG-NEXT: s_mov_b32 s5, -1 +; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[0:1] +; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], -1, v[2:3] +; SDAG-NEXT: v_cmp_lt_i16_e32 vcc, -1, v4 +; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] +; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1 +; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3 +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_xor_b64 s[10:11], exec, s[6:7] +; SDAG-NEXT: s_cbranch_execz .LBB7_7 +; SDAG-NEXT: ; %bb.2: ; %fp-to-i-if-end9 +; SDAG-NEXT: s_movk_i32 s4, 0x7f +; SDAG-NEXT: v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; SDAG-NEXT: s_mov_b64 s[4:5], 0x85 +; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[4:5], v[5:6] +; SDAG-NEXT: v_mov_b32_e32 v7, 0 +; SDAG-NEXT: v_cndmask_b32_e64 v9, -1, 0, vcc +; SDAG-NEXT: v_cndmask_b32_e64 v8, -1, 1, vcc +; SDAG-NEXT: v_or_b32_e32 v6, 0x80, v0 +; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1 +; SDAG-NEXT: ; implicit-def: $vgpr2_vgpr3 +; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; SDAG-NEXT: s_xor_b64 s[12:13], exec, s[6:7] +; SDAG-NEXT: s_cbranch_execz .LBB7_4 +; SDAG-NEXT: ; %bb.3: ; %fp-to-i-if-else +; SDAG-NEXT: v_sub_u32_e32 v0, 0xc6, v5 +; SDAG-NEXT: v_add_u32_e32 v2, 0xffffff3a, v5 +; SDAG-NEXT: v_add_u32_e32 v4, 0xffffff7a, v5 +; SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, v[6:7] +; SDAG-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] +; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v4 +; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5] +; SDAG-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4 +; SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v1, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v0, s[4:5] +; SDAG-NEXT: v_lshlrev_b64 v[0:1], v4, v[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[6:7] +; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, v0, s[4:5] +; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5] +; SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v8, 0 +; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; SDAG-NEXT: v_mul_lo_u32 v13, v9, v2 +; SDAG-NEXT: v_mov_b32_e32 v6, v1 +; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v8, v[6:7] +; SDAG-NEXT: v_mul_lo_u32 v14, v8, v3 +; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, 0 +; SDAG-NEXT: v_add_co_u32_e64 v6, s[4:5], -1, v10 +; SDAG-NEXT: v_mov_b32_e32 v10, v5 +; SDAG-NEXT: v_mov_b32_e32 v5, v7 +; SDAG-NEXT: v_addc_co_u32_e64 v8, s[4:5], 0, -1, s[4:5] +; SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v9, v[4:5] +; SDAG-NEXT: v_add3_u32 v3, v3, v14, v13 +; SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v6, v12, v[2:3] +; SDAG-NEXT: v_add_co_u32_e64 v5, s[4:5], v10, v5 +; SDAG-NEXT: v_mul_lo_u32 v3, v6, v11 +; SDAG-NEXT: v_addc_co_u32_e64 v6, s[4:5], 0, 0, s[4:5] +; SDAG-NEXT: v_mul_lo_u32 v7, v8, v12 +; SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v11, v9, v[5:6] +; SDAG-NEXT: ; implicit-def: $vgpr8 +; SDAG-NEXT: v_add3_u32 v3, v7, v2, v3 +; SDAG-NEXT: v_add_co_u32_e64 v2, s[4:5], v5, v1 +; SDAG-NEXT: v_addc_co_u32_e64 v3, s[4:5], v6, v3, s[4:5] +; SDAG-NEXT: ; implicit-def: $vgpr5_vgpr6 +; SDAG-NEXT: v_mov_b32_e32 v1, v4 +; SDAG-NEXT: ; implicit-def: $vgpr6_vgpr7 +; SDAG-NEXT: .LBB7_4: ; %Flow +; SDAG-NEXT: s_andn2_saveexec_b64 s[6:7], s[12:13] +; SDAG-NEXT: ; %bb.5: ; %fp-to-i-if-then12 +; SDAG-NEXT: v_sub_u32_e32 v2, 0x86, v5 +; SDAG-NEXT: v_lshrrev_b64 v[0:1], v2, v[6:7] +; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v2 +; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, v0, s[4:5] +; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v2 +; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5] +; SDAG-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v8 +; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; SDAG-NEXT: v_mul_i32_i24_e32 v0, v0, v8 +; SDAG-NEXT: v_mov_b32_e32 v3, v2 +; SDAG-NEXT: ; %bb.6: ; %Flow1 +; SDAG-NEXT: s_or_b64 exec, exec, s[6:7] +; SDAG-NEXT: .LBB7_7: ; %Flow2 +; SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; SDAG-NEXT: ; %bb.8: ; %fp-to-i-if-then5 +; SDAG-NEXT: v_bfrev_b32_e32 v0, 1 +; SDAG-NEXT: v_bfrev_b32_e32 v1, -2 +; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; SDAG-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc +; SDAG-NEXT: v_mov_b32_e32 v0, v2 +; SDAG-NEXT: v_mov_b32_e32 v1, v2 +; SDAG-NEXT: ; %bb.9: ; %Flow3 +; SDAG-NEXT: s_or_b64 exec, exec, s[4:5] +; SDAG-NEXT: .LBB7_10: ; %fp-to-i-cleanup +; SDAG-NEXT: s_or_b64 exec, exec, s[8:9] +; SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-LABEL: fptoui_bf16_to_i128: +; GISEL: ; %bb.0: ; %fp-to-i-entry +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_mov_b32_e32 v4, v0 +; GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v4 +; GISEL-NEXT: v_mov_b32_e32 v6, 0 +; GISEL-NEXT: v_lshrrev_b64 v[0:1], 7, v[5:6] +; GISEL-NEXT: v_mov_b32_e32 v1, 0x7f +; GISEL-NEXT: s_mov_b64 s[4:5], 0 +; GISEL-NEXT: v_mov_b32_e32 v2, 0 +; GISEL-NEXT: v_bfe_u32 v5, v0, 0, 8 +; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[1:2] +; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] +; GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GISEL-NEXT: v_mov_b32_e32 v1, s5 +; GISEL-NEXT: v_mov_b32_e32 v2, s6 +; GISEL-NEXT: v_mov_b32_e32 v3, s7 +; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_cbranch_execz .LBB7_10 +; GISEL-NEXT: ; %bb.1: ; %fp-to-i-if-end +; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffff01, v5 +; GISEL-NEXT: v_mov_b32_e32 v2, 0xffffff80 +; GISEL-NEXT: v_addc_co_u32_e64 v1, s[6:7], 0, -1, vcc +; GISEL-NEXT: v_mov_b32_e32 v3, -1 +; GISEL-NEXT: v_addc_co_u32_e64 v7, s[6:7], 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[0:1], v[2:3] +; GISEL-NEXT: v_addc_co_u32_e64 v8, s[6:7], 0, -1, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GISEL-NEXT: v_cmp_le_u64_e32 vcc, -1, v[7:8] +; GISEL-NEXT: v_cmp_lt_i16_e64 s[4:5], -1, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, -1, v[7:8] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[14:15], exec, s[6:7] +; GISEL-NEXT: s_cbranch_execz .LBB7_7 +; GISEL-NEXT: ; %bb.2: ; %fp-to-i-if-end9 +; GISEL-NEXT: s_xor_b64 s[6:7], s[4:5], -1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[6:7] +; GISEL-NEXT: v_and_b32_e32 v0, 1, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v2, 1, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[6:7] +; GISEL-NEXT: v_lshlrev_b16_e32 v3, 2, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v7, 3, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v8, 4, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v9, 5, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v10, 6, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v11, 7, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v12, 8, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v13, 9, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v14, 10, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v15, 11, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v16, 12, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v17, 13, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v18, 14, v0 +; GISEL-NEXT: v_lshlrev_b16_e32 v19, 15, v0 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v2 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v3 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v3 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v7 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v7 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v8 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v8 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v9 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v9 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v10 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v10 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v11 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v11 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v12 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v12 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v13 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v13 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v14 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v14 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v15 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v15 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v16 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v16 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v17 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v17 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v18 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v18 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v19 +; GISEL-NEXT: v_or_b32_e32 v1, v1, v19 +; GISEL-NEXT: v_and_b32_e32 v11, 0xffff, v0 +; GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v11 +; GISEL-NEXT: v_or3_b32 v9, v1, v0, 1 +; GISEL-NEXT: v_or3_b32 v10, v11, v0, 0 +; GISEL-NEXT: v_mov_b32_e32 v0, 0x86 +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: v_and_b32_e32 v2, 0x7f, v4 +; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[5:6], v[0:1] +; GISEL-NEXT: v_or_b32_e32 v7, 0x80, v2 +; GISEL-NEXT: v_mov_b32_e32 v8, 0 +; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 +; GISEL-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GISEL-NEXT: s_xor_b64 s[16:17], exec, s[6:7] +; GISEL-NEXT: s_cbranch_execz .LBB7_4 +; GISEL-NEXT: ; %bb.3: ; %fp-to-i-if-else +; GISEL-NEXT: v_add_u32_e32 v6, 0xffffff7a, v5 +; GISEL-NEXT: v_lshlrev_b64 v[0:1], v6, v[7:8] +; GISEL-NEXT: v_subrev_u32_e32 v4, 64, v6 +; GISEL-NEXT: v_sub_u32_e32 v2, 64, v6 +; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 +; GISEL-NEXT: v_lshl_or_b32 v11, v11, 16, v11 +; GISEL-NEXT: v_lshrrev_b64 v[2:3], v2, v[7:8] +; GISEL-NEXT: v_lshlrev_b64 v[4:5], v4, v[7:8] +; GISEL-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v12, 0, v1, vcc +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v8, v11, 0 +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v12, v10, v[0:1] +; GISEL-NEXT: v_cndmask_b32_e64 v13, v2, 0, s[6:7] +; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v8, v9, 0 +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v13, v9, v[6:7] +; GISEL-NEXT: v_mul_lo_u32 v4, v12, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GISEL-NEXT: v_mov_b32_e32 v2, v6 +; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[8:9], v8, v10, v[1:2] +; GISEL-NEXT: v_mul_lo_u32 v6, v8, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[6:7] +; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[10:11], v12, v9, v[1:2] +; GISEL-NEXT: v_addc_co_u32_e64 v6, s[10:11], v7, v6, s[10:11] +; GISEL-NEXT: v_addc_co_u32_e64 v4, s[8:9], v6, v4, s[8:9] +; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[8:9], v13, v10, v[4:5] +; GISEL-NEXT: ; implicit-def: $vgpr5 +; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v3, v9, v[6:7] +; GISEL-NEXT: ; implicit-def: $vgpr7_vgpr8 +; GISEL-NEXT: ; implicit-def: $vgpr9 +; GISEL-NEXT: .LBB7_4: ; %Flow +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[16:17] +; GISEL-NEXT: s_cbranch_execz .LBB7_6 +; GISEL-NEXT: ; %bb.5: ; %fp-to-i-if-then12 +; GISEL-NEXT: v_sub_co_u32_e32 v3, vcc, 0x86, v5 +; GISEL-NEXT: v_subrev_u32_e32 v2, 64, v3 +; GISEL-NEXT: v_lshrrev_b64 v[0:1], v3, v[7:8] +; GISEL-NEXT: v_lshrrev_b64 v[1:2], v2, 0 +; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; GISEL-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v9 +; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; GISEL-NEXT: v_mul_i32_i24_e32 v0, v0, v9 +; GISEL-NEXT: v_mov_b32_e32 v3, v2 +; GISEL-NEXT: .LBB7_6: ; %Flow1 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: .LBB7_7: ; %Flow2 +; GISEL-NEXT: s_andn2_saveexec_b64 s[6:7], s[14:15] +; GISEL-NEXT: s_cbranch_execz .LBB7_9 +; GISEL-NEXT: ; %bb.8: ; %fp-to-i-if-then5 +; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5] +; GISEL-NEXT: v_and_b32_e32 v1, 1, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v1 +; GISEL-NEXT: v_or_b32_e32 v0, v0, v2 +; GISEL-NEXT: v_lshlrev_b32_e32 v3, 2, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v1 +; GISEL-NEXT: v_or_b32_e32 v2, v1, v2 +; GISEL-NEXT: v_or3_b32 v0, v0, v3, v4 +; GISEL-NEXT: v_lshlrev_b32_e32 v5, 4, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 5, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v3, v4 +; GISEL-NEXT: v_or3_b32 v0, v0, v5, v6 +; GISEL-NEXT: v_lshlrev_b32_e32 v7, 6, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v8, 7, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v5, v6 +; GISEL-NEXT: v_or3_b32 v0, v0, v7, v8 +; GISEL-NEXT: v_lshlrev_b32_e32 v9, 8, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v10, 9, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v7, v8 +; GISEL-NEXT: v_or3_b32 v0, v0, v9, v10 +; GISEL-NEXT: v_lshlrev_b32_e32 v11, 10, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v12, 11, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v9, v10 +; GISEL-NEXT: v_or3_b32 v0, v0, v11, v12 +; GISEL-NEXT: v_lshlrev_b32_e32 v13, 12, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v14, 13, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v11, v12 +; GISEL-NEXT: v_or3_b32 v0, v0, v13, v14 +; GISEL-NEXT: v_lshlrev_b32_e32 v15, 14, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v16, 15, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v13, v14 +; GISEL-NEXT: v_or3_b32 v0, v0, v15, v16 +; GISEL-NEXT: v_lshlrev_b32_e32 v17, 16, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v18, 17, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v15, v16 +; GISEL-NEXT: v_or3_b32 v0, v0, v17, v18 +; GISEL-NEXT: v_lshlrev_b32_e32 v19, 18, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v20, 19, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v17, v18 +; GISEL-NEXT: v_or3_b32 v0, v0, v19, v20 +; GISEL-NEXT: v_lshlrev_b32_e32 v3, 20, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v4, 21, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v19, v20 +; GISEL-NEXT: v_or3_b32 v0, v0, v3, v4 +; GISEL-NEXT: v_lshlrev_b32_e32 v5, 22, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 23, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v3, v4 +; GISEL-NEXT: v_or3_b32 v0, v0, v5, v6 +; GISEL-NEXT: v_lshlrev_b32_e32 v7, 24, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v8, 25, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v5, v6 +; GISEL-NEXT: v_or3_b32 v0, v0, v7, v8 +; GISEL-NEXT: v_lshlrev_b32_e32 v9, 26, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v10, 27, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v7, v8 +; GISEL-NEXT: v_or3_b32 v0, v0, v9, v10 +; GISEL-NEXT: v_lshlrev_b32_e32 v11, 28, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v12, 29, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v9, v10 +; GISEL-NEXT: v_or3_b32 v0, v0, v11, v12 +; GISEL-NEXT: v_lshlrev_b32_e32 v13, 30, v1 +; GISEL-NEXT: v_lshlrev_b32_e32 v1, 31, v1 +; GISEL-NEXT: v_or3_b32 v2, v2, v11, v12 +; GISEL-NEXT: v_or3_b32 v0, v0, v13, v1 +; GISEL-NEXT: v_or3_b32 v1, v2, v13, v1 +; GISEL-NEXT: v_add_u32_e32 v3, 0x80000000, v1 +; GISEL-NEXT: v_mov_b32_e32 v2, v1 +; GISEL-NEXT: .LBB7_9: ; %Flow3 +; GISEL-NEXT: s_or_b64 exec, exec, s[6:7] +; GISEL-NEXT: .LBB7_10: ; %fp-to-i-cleanup +; GISEL-NEXT: s_or_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_setpc_b64 s[30:31] + %cvt = fptoui bfloat %x to i128 + ret i128 %cvt +} diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll index acadee2798171..401cbce00ac9a 100644 --- a/llvm/test/CodeGen/AMDGPU/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll @@ -1561,34 +1561,28 @@ define <33 x i32> @v33i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 -; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:16 -; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 -; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 +; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s4, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s0 offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s0 offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s0 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 offset:128 +; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <33 x i32>, ptr addrspace(1) %ptr @@ -1850,34 +1844,28 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 -; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:16 -; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 -; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 +; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s4, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s0 offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s0 offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s0 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 offset:128 +; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr @@ -2143,33 +2131,24 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144 ; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s3, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s4, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s5, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s6, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s7, s0, 0x90 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:224 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:208 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:192 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s5 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:176 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s6 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:160 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s7 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:144 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 +; GFX11-NEXT: scratch_store_b32 v0, v33, off ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index c1d682689903a..3b078c41f4a84 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -1989,256 +1989,138 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 { ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 ; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_clause 0x7 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:1024 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:512 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:256 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:128 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x790 -; GFX11-NEXT: s_add_i32 s2, s0, 0x780 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x770 -; GFX11-NEXT: s_add_i32 s2, s0, 0x760 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x750 -; GFX11-NEXT: s_add_i32 s2, s0, 0x740 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x730 -; GFX11-NEXT: s_add_i32 s2, s0, 0x720 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x710 -; GFX11-NEXT: s_add_i32 s2, s0, 0x700 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x690 -; GFX11-NEXT: s_add_i32 s2, s0, 0x680 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x670 -; GFX11-NEXT: s_add_i32 s2, s0, 0x660 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x650 -; GFX11-NEXT: s_add_i32 s2, s0, 0x640 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x630 -; GFX11-NEXT: s_add_i32 s2, s0, 0x620 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x610 -; GFX11-NEXT: s_add_i32 s2, s0, 0x600 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x590 -; GFX11-NEXT: s_add_i32 s2, s0, 0x580 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x570 -; GFX11-NEXT: s_add_i32 s2, s0, 0x560 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x550 -; GFX11-NEXT: s_add_i32 s2, s0, 0x540 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x530 -; GFX11-NEXT: s_add_i32 s2, s0, 0x520 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x510 -; GFX11-NEXT: s_add_i32 s2, s0, 0x500 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x490 -; GFX11-NEXT: s_add_i32 s2, s0, 0x480 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x470 -; GFX11-NEXT: s_add_i32 s2, s0, 0x460 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x450 -; GFX11-NEXT: s_add_i32 s2, s0, 0x440 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x430 -; GFX11-NEXT: s_add_i32 s2, s0, 0x420 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x410 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3f0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3e0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3d0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3c0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3b0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3a0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x390 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x380 -; GFX11-NEXT: s_add_i32 s2, s0, 0x370 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x360 -; GFX11-NEXT: s_add_i32 s2, s0, 0x350 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x340 -; GFX11-NEXT: s_add_i32 s2, s0, 0x330 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x320 -; GFX11-NEXT: s_add_i32 s2, s0, 0x310 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x300 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2f0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2e0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2d0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2c0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2b0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2a0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x290 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x280 -; GFX11-NEXT: s_add_i32 s2, s0, 0x270 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x260 -; GFX11-NEXT: s_add_i32 s2, s0, 0x250 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x240 -; GFX11-NEXT: s_add_i32 s2, s0, 0x230 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x220 -; GFX11-NEXT: s_add_i32 s2, s0, 0x210 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x190 -; GFX11-NEXT: s_add_i32 s2, s0, 0x180 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x170 -; GFX11-NEXT: s_add_i32 s2, s0, 0x160 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x150 -; GFX11-NEXT: s_add_i32 s2, s0, 0x140 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x130 -; GFX11-NEXT: s_add_i32 s2, s0, 0x120 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x110 -; GFX11-NEXT: s_add_i32 s2, s0, 0xf0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xd0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xb0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x90 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x50 -; GFX11-NEXT: s_add_i32 s0, s0, 48 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2032 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2016 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2000 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1984 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1968 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1952 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1936 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1920 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1904 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1888 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1872 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1856 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1840 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1824 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1808 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1792 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1776 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1760 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1744 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1728 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1712 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1696 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1680 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1664 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1648 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1632 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1616 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1600 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1584 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1568 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1552 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1536 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1520 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1504 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1488 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1472 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1456 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1440 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1424 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1408 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1392 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1376 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1360 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1344 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1328 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1312 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1296 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1280 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1264 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1248 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1232 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1216 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1200 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1184 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1168 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1152 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1136 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1120 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1104 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1088 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1072 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1056 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1040 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1024 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1008 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:992 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:976 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:960 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:944 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:928 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:912 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:896 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:880 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:864 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:848 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:832 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:816 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:800 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:784 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:768 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:752 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:736 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:720 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:704 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:688 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:672 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:656 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:640 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:624 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:608 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:592 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:576 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:560 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:544 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:528 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:512 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:496 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:480 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:464 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:448 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:432 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:416 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:400 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:384 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:368 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:352 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:336 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:320 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:304 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:288 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:272 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:256 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:224 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:208 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:192 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:176 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:160 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:144 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: ret <512 x i32> zeroinitializer @@ -2636,7 +2518,6 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-LABEL: return_72xi32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:212 ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:208 @@ -2651,93 +2532,82 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:172 ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:168 ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:164 -; GFX11-NEXT: s_clause 0x14 -; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:124 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:120 -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64 +; GFX11-NEXT: s_clause 0x11 +; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:16 +; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:12 +; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:8 +; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:32 +; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:28 +; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:24 +; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:48 +; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:44 +; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:40 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:64 +; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:60 +; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:56 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:80 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:76 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:72 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:96 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:92 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:88 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: scratch_load_b32 v23, off, s32 offset:112 +; GFX11-NEXT: scratch_load_b32 v22, off, s32 offset:108 +; GFX11-NEXT: scratch_load_b32 v21, off, s32 offset:104 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:136 -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32 +; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:128 +; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:124 +; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:120 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v11, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v10, off, s32 offset:152 -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16 +; GFX11-NEXT: scratch_load_b32 v15, off, s32 offset:144 +; GFX11-NEXT: scratch_load_b32 v14, off, s32 offset:140 +; GFX11-NEXT: scratch_load_b32 v13, off, s32 offset:136 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: s_clause 0xd -; GFX11-NEXT: scratch_load_b32 v8, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v7, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v6, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v5, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v9, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:160 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:156 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:152 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:148 +; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:132 +; GFX11-NEXT: scratch_load_b32 v16, off, s32 offset:116 +; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:100 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:84 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 +; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:36 +; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:20 +; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 -; GFX11-NEXT: s_add_i32 s1, s0, 0x110 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x100 -; GFX11-NEXT: s_add_i32 s3, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s34, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s35, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s36, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s37, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s38, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s39, s0, 0x90 -; GFX11-NEXT: s_add_i32 s40, s0, 0x70 -; GFX11-NEXT: s_add_i32 s41, s0, 0x60 -; GFX11-NEXT: s_add_i32 s42, s0, 0x50 -; GFX11-NEXT: s_add_i32 s43, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(10) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[60:63], off offset:272 ; GFX11-NEXT: s_waitcnt vmcnt(9) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[12:15], off offset:256 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[16:19], off offset:240 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[60:63], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[20:23], off offset:224 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[56:59], s34 +; GFX11-NEXT: scratch_store_b128 v0, v[56:59], off offset:208 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[41:44], s35 +; GFX11-NEXT: scratch_store_b128 v0, v[41:44], off offset:192 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[37:40], s36 +; GFX11-NEXT: scratch_store_b128 v0, v[37:40], off offset:176 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s37 +; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s38 +; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[33:36], s39 +; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s40 -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s41 -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s42 -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s43 +; GFX11-NEXT: s_clause 0x3 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:164 ; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:168 @@ -3306,7 +3176,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-LABEL: call_72xi32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s46, s33 +; GFX11-NEXT: s_mov_b32 s34, s33 ; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00 @@ -3353,11 +3223,11 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 +; GFX11-NEXT: s_add_i32 s2, s33, 0x200 +; GFX11-NEXT: v_writelane_b32 v60, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 -; GFX11-NEXT: s_add_i32 s0, s33, 0x200 -; GFX11-NEXT: v_writelane_b32 v60, s30, 0 -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, 0 +; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0 @@ -3373,14 +3243,14 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0 ; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0 ; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0 -; GFX11-NEXT: s_mov_b32 s45, return_72xi32@abs32@hi -; GFX11-NEXT: s_mov_b32 s44, return_72xi32@abs32@lo +; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v60, s31, 1 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45] +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624 ; GFX11-NEXT: scratch_load_b128 v[33:36], off, s33 offset:640 -; GFX11-NEXT: s_add_i32 s0, s32, 0xa0 +; GFX11-NEXT: s_add_i32 s2, s32, 0xa0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_mov_b32_e32 v32, v48 ; GFX11-NEXT: s_clause 0x9 @@ -3431,38 +3301,38 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6 ; GFX11-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v6, v9 ; GFX11-NEXT: v_mov_b32_e32 v9, v20 -; GFX11-NEXT: scratch_store_b32 off, v11, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x90 +; GFX11-NEXT: scratch_store_b32 off, v11, s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x90 ; GFX11-NEXT: v_mov_b32_e32 v11, v22 -; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x80 +; GFX11-NEXT: scratch_store_b128 off, v[4:7], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x80 ; GFX11-NEXT: v_mov_b32_e32 v5, v16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 ; GFX11-NEXT: v_mov_b32_e32 v0, 24 -; GFX11-NEXT: s_add_i32 s0, s32, 0x70 +; GFX11-NEXT: s_add_i32 s2, s32, 0x70 ; GFX11-NEXT: v_mov_b32_e32 v6, v17 -; GFX11-NEXT: scratch_store_b128 off, v[12:15], s0 +; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2 ; GFX11-NEXT: v_mov_b32_e32 v13, v24 -; GFX11-NEXT: s_add_i32 s0, s32, 0x6c +; GFX11-NEXT: s_add_i32 s2, s32, 0x6c ; GFX11-NEXT: v_mov_b32_e32 v7, v18 -; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x60 +; GFX11-NEXT: scratch_store_b32 off, v0, s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x60 ; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26 -; GFX11-NEXT: scratch_store_b96 off, v[56:58], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x50 +; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x50 ; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45 -; GFX11-NEXT: scratch_store_b128 off, v[40:43], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 64 ; GFX11-NEXT: v_mov_b32_e32 v14, v25 -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 48 +; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 48 ; GFX11-NEXT: v_mov_b32_e32 v16, v27 -; GFX11-NEXT: scratch_store_b128 off, v[36:39], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 32 +; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 32 ; GFX11-NEXT: v_mov_b32_e32 v30, v46 -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 16 -; GFX11-NEXT: scratch_store_b128 off, v[32:35], s0 +; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 16 +; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2 ; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, 42 @@ -3470,10 +3340,10 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1572 ; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1556 ; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1540 -; GFX11-NEXT: s_add_i32 s0, s33, 0x400 +; GFX11-NEXT: s_add_i32 s2, s33, 0x400 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v0, s0 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45] +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0xb ; GFX11-NEXT: scratch_load_b32 v59, off, s33 ; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4 @@ -3493,7 +3363,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0xf600 -; GFX11-NEXT: s_mov_b32 s33, s46 +; GFX11-NEXT: s_mov_b32 s33, s34 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll new file mode 100644 index 0000000000000..f950717c591a9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll @@ -0,0 +1,275 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,SDAG %s +; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck -check-prefix=GISEL %s + +; FIXME: GISEL can't handle the "fptrunc float to bfloat" that expand-large-fp-convert emits. + +; GISEL: unable to translate instruction: fptrunc + +define bfloat @sitofp_i128_to_bf16(i128 %x) { +; GCN-LABEL: sitofp_i128_to_bf16: +; GCN: ; %bb.0: ; %itofp-entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v5, v1, v3 +; GCN-NEXT: v_or_b32_e32 v4, v0, v2 +; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GCN-NEXT: v_mov_b32_e32 v4, 0 +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_cbranch_execz .LBB0_14 +; GCN-NEXT: ; %bb.1: ; %itofp-if-end +; GCN-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; GCN-NEXT: v_xor_b32_e32 v0, v5, v0 +; GCN-NEXT: v_xor_b32_e32 v1, v5, v1 +; GCN-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v5 +; GCN-NEXT: v_xor_b32_e32 v2, v5, v2 +; GCN-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc +; GCN-NEXT: v_xor_b32_e32 v6, v5, v3 +; GCN-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc +; GCN-NEXT: v_subb_co_u32_e32 v5, vcc, v6, v5, vcc +; GCN-NEXT: v_ffbh_u32_e32 v2, v4 +; GCN-NEXT: v_add_u32_e32 v2, 32, v2 +; GCN-NEXT: v_ffbh_u32_e32 v6, v5 +; GCN-NEXT: v_min_u32_e32 v2, v2, v6 +; GCN-NEXT: v_ffbh_u32_e32 v6, v0 +; GCN-NEXT: v_add_u32_e32 v6, 32, v6 +; GCN-NEXT: v_ffbh_u32_e32 v7, v1 +; GCN-NEXT: v_min_u32_e32 v6, v6, v7 +; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GCN-NEXT: v_add_u32_e32 v6, 64, v6 +; GCN-NEXT: v_cndmask_b32_e32 v7, v6, v2, vcc +; GCN-NEXT: v_sub_u32_e32 v6, 0x80, v7 +; GCN-NEXT: v_sub_u32_e32 v2, 0x7f, v7 +; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 25, v6 +; GCN-NEXT: ; implicit-def: $vgpr8 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GCN-NEXT: ; %bb.2: ; %itofp-if-else +; GCN-NEXT: v_add_u32_e32 v4, 0xffffff98, v7 +; GCN-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4 +; GCN-NEXT: v_cndmask_b32_e32 v8, 0, v0, vcc +; GCN-NEXT: ; implicit-def: $vgpr6 +; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN-NEXT: ; implicit-def: $vgpr7 +; GCN-NEXT: ; implicit-def: $vgpr4_vgpr5 +; GCN-NEXT: ; %bb.3: ; %Flow3 +; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GCN-NEXT: s_cbranch_execz .LBB0_13 +; GCN-NEXT: ; %bb.4: ; %NodeBlock +; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 25, v6 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GCN-NEXT: s_cbranch_execz .LBB0_8 +; GCN-NEXT: ; %bb.5: ; %LeafBlock +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 26, v6 +; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GCN-NEXT: s_cbranch_execz .LBB0_7 +; GCN-NEXT: ; %bb.6: ; %itofp-sw-default +; GCN-NEXT: v_sub_u32_e32 v12, 0x66, v7 +; GCN-NEXT: v_sub_u32_e32 v10, 64, v12 +; GCN-NEXT: v_lshrrev_b64 v[8:9], v12, v[0:1] +; GCN-NEXT: v_lshlrev_b64 v[10:11], v10, v[4:5] +; GCN-NEXT: v_sub_u32_e32 v13, 38, v7 +; GCN-NEXT: v_or_b32_e32 v11, v9, v11 +; GCN-NEXT: v_or_b32_e32 v10, v8, v10 +; GCN-NEXT: v_lshrrev_b64 v[8:9], v13, v[4:5] +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v12 +; GCN-NEXT: v_add_u32_e32 v14, 26, v7 +; GCN-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v12 +; GCN-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; GCN-NEXT: v_lshrrev_b64 v[10:11], v13, v[0:1] +; GCN-NEXT: v_lshlrev_b64 v[12:13], v14, v[4:5] +; GCN-NEXT: v_subrev_u32_e32 v7, 38, v7 +; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v0, s[4:5] +; GCN-NEXT: v_lshlrev_b64 v[7:8], v7, v[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v9, v9, v1, s[4:5] +; GCN-NEXT: v_or_b32_e32 v11, v13, v11 +; GCN-NEXT: v_or_b32_e32 v10, v12, v10 +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 +; GCN-NEXT: v_lshlrev_b64 v[0:1], v14, v[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc +; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 +; GCN-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc +; GCN-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_or_b32_e32 v1, v1, v5 +; GCN-NEXT: v_or_b32_e32 v0, v0, v4 +; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GCN-NEXT: v_or_b32_e32 v8, v15, v0 +; GCN-NEXT: v_mov_b32_e32 v0, v8 +; GCN-NEXT: v_mov_b32_e32 v1, v9 +; GCN-NEXT: .LBB0_7: ; %Flow1 +; GCN-NEXT: s_or_b64 exec, exec, s[12:13] +; GCN-NEXT: .LBB0_8: ; %Flow2 +; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GCN-NEXT: ; %bb.9: ; %itofp-sw-bb +; GCN-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GCN-NEXT: ; %bb.10: ; %itofp-sw-epilog +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: v_lshrrev_b32_e32 v4, 2, v0 +; GCN-NEXT: v_and_or_b32 v0, v4, 1, v0 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: v_and_b32_e32 v4, 0x4000000, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GCN-NEXT: v_alignbit_b32 v8, v1, v0, 2 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: ; %bb.11: ; %itofp-if-then20 +; GCN-NEXT: v_alignbit_b32 v8, v1, v0, 3 +; GCN-NEXT: v_mov_b32_e32 v2, v6 +; GCN-NEXT: ; %bb.12: ; %Flow +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: .LBB0_13: ; %Flow4 +; GCN-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-NEXT: v_and_b32_e32 v0, 0x80000000, v3 +; GCN-NEXT: v_lshl_add_u32 v1, v2, 23, 1.0 +; GCN-NEXT: v_and_b32_e32 v2, 0x7fffff, v8 +; GCN-NEXT: v_or3_b32 v0, v2, v0, v1 +; GCN-NEXT: v_bfe_u32 v1, v8, 16, 1 +; GCN-NEXT: s_movk_i32 s4, 0x7fff +; GCN-NEXT: v_add3_u32 v1, v1, v0, s4 +; GCN-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GCN-NEXT: .LBB0_14: ; %Flow5 +; GCN-NEXT: s_or_b64 exec, exec, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v0, v4 +; GCN-NEXT: s_setpc_b64 s[30:31] + %cvt = sitofp i128 %x to bfloat + ret bfloat %cvt +} + +define bfloat @uitofp_i128_to_bf16(i128 %x) { +; GCN-LABEL: uitofp_i128_to_bf16: +; GCN: ; %bb.0: ; %itofp-entry +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_or_b32_e32 v5, v1, v3 +; GCN-NEXT: v_or_b32_e32 v4, v0, v2 +; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GCN-NEXT: v_mov_b32_e32 v4, 0 +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_cbranch_execz .LBB1_14 +; GCN-NEXT: ; %bb.1: ; %itofp-if-end +; GCN-NEXT: v_ffbh_u32_e32 v4, v2 +; GCN-NEXT: v_add_u32_e32 v4, 32, v4 +; GCN-NEXT: v_ffbh_u32_e32 v5, v3 +; GCN-NEXT: v_min_u32_e32 v4, v4, v5 +; GCN-NEXT: v_ffbh_u32_e32 v5, v0 +; GCN-NEXT: v_add_u32_e32 v5, 32, v5 +; GCN-NEXT: v_ffbh_u32_e32 v6, v1 +; GCN-NEXT: v_min_u32_e32 v5, v5, v6 +; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; GCN-NEXT: v_add_u32_e32 v5, 64, v5 +; GCN-NEXT: v_cndmask_b32_e32 v6, v5, v4, vcc +; GCN-NEXT: v_sub_u32_e32 v5, 0x80, v6 +; GCN-NEXT: v_sub_u32_e32 v4, 0x7f, v6 +; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 25, v5 +; GCN-NEXT: ; implicit-def: $vgpr7 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_xor_b64 s[4:5], exec, s[4:5] +; GCN-NEXT: ; %bb.2: ; %itofp-if-else +; GCN-NEXT: v_add_u32_e32 v2, 0xffffff98, v6 +; GCN-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2 +; GCN-NEXT: v_cndmask_b32_e32 v7, 0, v0, vcc +; GCN-NEXT: ; implicit-def: $vgpr5 +; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GCN-NEXT: ; implicit-def: $vgpr6 +; GCN-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GCN-NEXT: ; %bb.3: ; %Flow3 +; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5] +; GCN-NEXT: s_cbranch_execz .LBB1_13 +; GCN-NEXT: ; %bb.4: ; %NodeBlock +; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 25, v5 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: s_xor_b64 s[10:11], exec, s[4:5] +; GCN-NEXT: s_cbranch_execz .LBB1_8 +; GCN-NEXT: ; %bb.5: ; %LeafBlock +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 26, v5 +; GCN-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GCN-NEXT: s_cbranch_execz .LBB1_7 +; GCN-NEXT: ; %bb.6: ; %itofp-sw-default +; GCN-NEXT: v_sub_u32_e32 v11, 0x66, v6 +; GCN-NEXT: v_sub_u32_e32 v9, 64, v11 +; GCN-NEXT: v_lshrrev_b64 v[7:8], v11, v[0:1] +; GCN-NEXT: v_lshlrev_b64 v[9:10], v9, v[2:3] +; GCN-NEXT: v_sub_u32_e32 v12, 38, v6 +; GCN-NEXT: v_or_b32_e32 v10, v8, v10 +; GCN-NEXT: v_or_b32_e32 v9, v7, v9 +; GCN-NEXT: v_lshrrev_b64 v[7:8], v12, v[2:3] +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11 +; GCN-NEXT: v_add_u32_e32 v13, 26, v6 +; GCN-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 +; GCN-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; GCN-NEXT: v_lshrrev_b64 v[9:10], v12, v[0:1] +; GCN-NEXT: v_lshlrev_b64 v[11:12], v13, v[2:3] +; GCN-NEXT: v_subrev_u32_e32 v6, 38, v6 +; GCN-NEXT: v_cndmask_b32_e64 v14, v7, v0, s[4:5] +; GCN-NEXT: v_lshlrev_b64 v[6:7], v6, v[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v8, v8, v1, s[4:5] +; GCN-NEXT: v_or_b32_e32 v10, v12, v10 +; GCN-NEXT: v_or_b32_e32 v9, v11, v9 +; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13 +; GCN-NEXT: v_lshlrev_b64 v[0:1], v13, v[0:1] +; GCN-NEXT: v_cndmask_b32_e32 v7, v7, v10, vcc +; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 +; GCN-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc +; GCN-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GCN-NEXT: v_or_b32_e32 v1, v1, v3 +; GCN-NEXT: v_or_b32_e32 v0, v0, v2 +; GCN-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GCN-NEXT: v_or_b32_e32 v7, v14, v0 +; GCN-NEXT: v_mov_b32_e32 v0, v7 +; GCN-NEXT: v_mov_b32_e32 v1, v8 +; GCN-NEXT: .LBB1_7: ; %Flow1 +; GCN-NEXT: s_or_b64 exec, exec, s[12:13] +; GCN-NEXT: .LBB1_8: ; %Flow2 +; GCN-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] +; GCN-NEXT: ; %bb.9: ; %itofp-sw-bb +; GCN-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] +; GCN-NEXT: ; %bb.10: ; %itofp-sw-epilog +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: v_lshrrev_b32_e32 v2, 2, v0 +; GCN-NEXT: v_and_or_b32 v0, v2, 1, v0 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc +; GCN-NEXT: v_and_b32_e32 v2, 0x4000000, v0 +; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GCN-NEXT: v_alignbit_b32 v7, v1, v0, 2 +; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc +; GCN-NEXT: ; %bb.11: ; %itofp-if-then20 +; GCN-NEXT: v_alignbit_b32 v7, v1, v0, 3 +; GCN-NEXT: v_mov_b32_e32 v4, v5 +; GCN-NEXT: ; %bb.12: ; %Flow +; GCN-NEXT: s_or_b64 exec, exec, s[4:5] +; GCN-NEXT: .LBB1_13: ; %Flow4 +; GCN-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-NEXT: v_and_b32_e32 v0, 0x7fffff, v7 +; GCN-NEXT: v_lshl_or_b32 v0, v4, 23, v0 +; GCN-NEXT: v_add_u32_e32 v0, 1.0, v0 +; GCN-NEXT: v_bfe_u32 v1, v0, 16, 1 +; GCN-NEXT: s_movk_i32 s4, 0x7fff +; GCN-NEXT: v_add3_u32 v1, v1, v0, s4 +; GCN-NEXT: v_or_b32_e32 v2, 0x400000, v0 +; GCN-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 +; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GCN-NEXT: .LBB1_14: ; %Flow5 +; GCN-NEXT: s_or_b64 exec, exec, s[6:7] +; GCN-NEXT: v_mov_b32_e32 v0, v4 +; GCN-NEXT: s_setpc_b64 s[30:31] + %cvt = uitofp i128 %x to bfloat + ret bfloat %cvt +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; SDAG: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll index bfeb214c5af8f..c6aa2182aec80 100644 --- a/llvm/test/CodeGen/AMDGPU/itofp.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/itofp.i128.ll @@ -1604,15 +1604,5 @@ define half @uitofp_i128_to_f16(i128 %x) { ret half %cvt } -; FIXME: ExpandLargeFpConvert asserts on bfloat -; define bfloat @sitofp_i128_to_bf16(i128 %x) { -; %cvt = sitofp i128 %x to bfloat -; ret bfloat %cvt -; } - -; define bfloat @uitofp_i128_to_bf16(i128 %x) { -; %cvt = uitofp i128 %x to bfloat -; ret bfloat %cvt -; } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir index 2e8219f99f1d1..9607889c71793 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir @@ -39,19 +39,19 @@ body: | S_CMP_EQ_U32 %15, undef %15, implicit-def $scc %19:sreg_32_xm0_xexec = S_CSELECT_B32 -1, 0, implicit killed undef $scc %20:sreg_32 = IMPLICIT_DEF - dead $vcc_lo = COPY undef %20 + $vcc_lo = COPY undef %20 S_CBRANCH_VCCNZ %bb.3, implicit $vcc S_BRANCH %bb.3 bb.3: - dead $vcc_lo = S_AND_B32 $exec_lo, undef %19, implicit-def dead $scc + $vcc_lo = S_AND_B32 $exec_lo, undef %19, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.6, implicit $vcc S_BRANCH %bb.4 bb.4: %21:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %19, implicit $exec %22:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 1, undef %21, implicit $exec - dead $vcc_lo = S_AND_B32 $exec_lo, undef %22, implicit-def dead $scc + $vcc_lo = S_AND_B32 $exec_lo, undef %22, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.7, implicit $vcc S_BRANCH %bb.5 @@ -174,7 +174,7 @@ body: | S_BRANCH %bb.20 bb.28: - dead $vcc_lo = S_AND_B32 $exec_lo, %22, implicit-def dead $scc + $vcc_lo = S_AND_B32 $exec_lo, %22, implicit-def dead $scc S_CBRANCH_VCCNZ %bb.29, implicit $vcc S_BRANCH %bb.29 diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir index e680eb2845f8e..116c04dea8b0f 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir @@ -4,7 +4,7 @@ # GCN: name: negated_cond_vop2 # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop2 body: | @@ -26,7 +26,7 @@ body: | # GCN: name: negated_cond_vop3 # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3 body: | @@ -48,10 +48,10 @@ body: | # GCN: name: negated_cond_vop2_redef_vcc1 # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec -# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec +# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc_lo, implicit $exec # GCN-NEXT: $vcc_lo = COPY $sgpr0 # GCN-NEXT: $vcc_lo = S_AND_B32 $exec_lo, $vcc_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop2_redef_vcc1 body: | @@ -77,7 +77,7 @@ body: | # GCN-NEXT: dead %3:sgpr_32 = V_CMP_NE_U32_e64 %1, 1, implicit $exec # GCN-NEXT: %2:sgpr_32 = COPY $sgpr0 # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3_redef_cmp body: | @@ -99,7 +99,7 @@ body: | # GCN: name: negated_cond_undef_vcc # GCN: $vcc_lo = S_AND_B32 $exec_lo, undef $vcc_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_undef_vcc body: | @@ -118,7 +118,7 @@ body: | # GCN: name: negated_cond_vop3_imp_vcc # GCN: $vcc_lo = IMPLICIT_DEF # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3_imp_vcc body: | @@ -140,7 +140,7 @@ body: | # GCN: name: negated_cond_vop2_imp_vcc # GCN: $vcc_lo = IMPLICIT_DEF # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop2_imp_vcc body: | @@ -165,7 +165,7 @@ body: | # GCN-NEXT: %1:vgpr_32 = COPY $vgpr0 # GCN-NEXT: %2:sgpr_32 = V_CMP_NE_U32_e64 %1, 1, implicit $exec # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3_redef_sel body: | @@ -189,7 +189,7 @@ body: | # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop2_used_sel body: | @@ -212,10 +212,10 @@ body: | # GCN: name: negated_cond_vop2_used_vcc # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec -# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec +# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc_lo, implicit $exec # GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop2_used_vcc body: | @@ -241,7 +241,7 @@ body: | # GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec # GCN-NEXT: %2:sgpr_32 = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3_sel_wrong_subreg1 body: | @@ -267,7 +267,7 @@ body: | # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF # GCN-NEXT: %2:sgpr_32 = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3_sel_wrong_subreg2 body: | @@ -291,7 +291,7 @@ body: | # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3_sel_right_subreg1 body: | @@ -315,7 +315,7 @@ body: | # GCN: %0:sgpr_32 = IMPLICIT_DEF # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF # GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3_sel_right_subreg2 body: | @@ -341,7 +341,7 @@ body: | # GCN-NEXT: %1.sub2_sub3:vreg_128 = IMPLICIT_DEF # GCN-NEXT: %2:sgpr_32 = V_CMP_NE_U32_e64 %1.sub2, 1, implicit $exec # GCN-NEXT: $vcc_lo = S_AND_B32 %2, $exec_lo, implicit-def dead $scc -# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc +# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc_lo --- name: negated_cond_vop3_sel_subreg_overlap body: | diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir index 6a2532147f886..f8e7cb397b475 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir @@ -118,7 +118,7 @@ body: | ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: KILL implicit-def $vcc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 + ; GCN-NEXT: KILL implicit-def $vcc_lo, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.3(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir index 695beab8dd24d..ecbd47a9e8d0d 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir @@ -68,7 +68,7 @@ body: | ; GCN-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.1, [[S_OR_B32_1]], %bb.2 ; GCN-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 4 - ; GCN-NEXT: [[V_ADD_U:%[0-9]+]]:vreg_64 = V_ADD_U64_PSEUDO [[PHI3]], killed [[S_MOV_B64_]], implicit-def dead $vcc, implicit $exec + ; GCN-NEXT: [[V_ADD_U:%[0-9]+]]:vreg_64 = V_ADD_U64_PSEUDO [[PHI3]], killed [[S_MOV_B64_]], implicit-def dead $vcc_lo, implicit $exec ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 1 ; GCN-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[PHI2]], killed [[S_MOV_B32_3]], implicit-def dead $scc ; GCN-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 9 diff --git a/llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir b/llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir index 39822d8754f61..6614d8f9c4b09 100644 --- a/llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-vopd-gfx12.mir @@ -1,7 +1,7 @@ # RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s # GFX12-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction *** -# GFX12-ERR: $vgpr2, $vgpr3 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx12 $sgpr0, $vgpr0, $sgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $vcc_lo +# GFX12-ERR: $vgpr2, $vgpr3 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx12 $sgpr0, $vgpr0, $sgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc_lo, implicit $vcc_lo --- name: vopd_cndmask_2sgpr body: | diff --git a/llvm/test/CodeGen/AMDGPU/verify-vopd.mir b/llvm/test/CodeGen/AMDGPU/verify-vopd.mir index 9bcc766466af2..374f898957193 100644 --- a/llvm/test/CodeGen/AMDGPU/verify-vopd.mir +++ b/llvm/test/CodeGen/AMDGPU/verify-vopd.mir @@ -1,7 +1,7 @@ # RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass machineverifier -o /dev/null %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s # GFX11-ERR: *** Bad machine code: VOP* instruction violates constant bus restriction *** -# GFX11-ERR: $vgpr2, $vgpr3 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $sgpr0, $vgpr0, $sgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc, implicit $vcc_lo +# GFX11-ERR: $vgpr2, $vgpr3 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $sgpr0, $vgpr0, $sgpr1, $vgpr1, implicit $exec, implicit $mode, implicit $vcc_lo, implicit $vcc_lo --- name: vopd_cndmask_2sgpr body: | diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir index feba06789f7f8..123893674ff5e 100644 --- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir @@ -18,7 +18,7 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: V_CMP_LT_F32_e32_dpp 0, [[COPY1]], 0, [[COPY]], 1, 15, 15, 1, implicit-def $vcc, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: V_CMPX_EQ_I16_t16_nosdst_e64 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $exec, implicit-def $vcc_lo, implicit $mode, implicit $exec ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64_dpp 0, [[COPY1]], [[COPY]], 1, 15, 15, 1, implicit $exec ; GCN-NEXT: [[V_CMP_GE_F16_t16_e64_dpp:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_t16_e64_dpp 1, [[COPY1]], 0, [[COPY]], 1, 1, 15, 15, 1, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec @@ -89,7 +89,7 @@ body: | ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec - ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_CMP_CLASS_F16_t16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_CLASS_F16_t16_e64 0, [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc_lo, implicit $mode, implicit $exec ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec ; GCN-NEXT: [[V_CMP_GE_F32_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F32_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec %0:vgpr_32 = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir index 3c1da043bcd6c..63bef40c34742 100644 --- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir +++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir @@ -135,42 +135,49 @@ name: vopd_cndmask tracksRegLiveness: true body: | bb.0: + liveins: $vcc_lo ; SCHED-LABEL: name: vopd_cndmask - ; SCHED: $vgpr2 = IMPLICIT_DEF + ; SCHED: liveins: $vcc_lo + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF ; SCHED-NEXT: $sgpr20 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 $sgpr20, killed $vgpr1, killed $vgpr2, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr5 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc - ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr5 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo + ; SCHED-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; SCHED-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo ; ; PAIR-GFX11-LABEL: name: vopd_cndmask - ; PAIR-GFX11: $vgpr2 = IMPLICIT_DEF + ; PAIR-GFX11: liveins: $vcc_lo + ; PAIR-GFX11-NEXT: {{ $}} + ; PAIR-GFX11-NEXT: $vgpr2 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $sgpr20 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx11 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX11-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX11-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx11 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX11-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX11-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo ; ; PAIR-GFX12-LABEL: name: vopd_cndmask - ; PAIR-GFX12: $vgpr2 = IMPLICIT_DEF + ; PAIR-GFX12: liveins: $vcc_lo + ; PAIR-GFX12-NEXT: {{ $}} + ; PAIR-GFX12-NEXT: $vgpr2 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr1 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $sgpr20 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx12 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX12-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX12-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx12 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX12-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX12-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF $vgpr2 = IMPLICIT_DEF @@ -417,9 +424,12 @@ name: vopd_schedule_unconstrained tracksRegLiveness: true body: | bb.0: + liveins: $vcc_lo ; SCHED-LABEL: name: vopd_schedule_unconstrained - ; SCHED: $vgpr2 = IMPLICIT_DEF + ; SCHED: liveins: $vcc_lo + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF @@ -429,16 +439,18 @@ body: | ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr12 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr19 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc - ; SCHED-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr19 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo + ; SCHED-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; SCHED-NEXT: $vgpr17 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr10 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc - ; SCHED-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr10 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo + ; SCHED-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo ; SCHED-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec ; ; PAIR-GFX11-LABEL: name: vopd_schedule_unconstrained - ; PAIR-GFX11: $vgpr2 = IMPLICIT_DEF + ; PAIR-GFX11: liveins: $vcc_lo + ; PAIR-GFX11-NEXT: {{ $}} + ; PAIR-GFX11-NEXT: $vgpr2 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -446,15 +458,17 @@ body: | ; PAIR-GFX11-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr12, $vgpr19 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX11-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX11-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx11 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX11-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX11-NEXT: $vgpr12, $vgpr19 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx11 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo ; PAIR-GFX11-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec ; ; PAIR-GFX12-LABEL: name: vopd_schedule_unconstrained - ; PAIR-GFX12: $vgpr2 = IMPLICIT_DEF + ; PAIR-GFX12: liveins: $vcc_lo + ; PAIR-GFX12-NEXT: {{ $}} + ; PAIR-GFX12-NEXT: $vgpr2 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr1 = IMPLICIT_DEF @@ -462,10 +476,10 @@ body: | ; PAIR-GFX12-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr12, $vgpr19 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX12-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX12-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx12 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX12-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX12-NEXT: $vgpr12, $vgpr19 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx12 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo ; PAIR-GFX12-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec $vgpr0 = IMPLICIT_DEF @@ -496,9 +510,12 @@ name: vopd_schedule_unconstrained_2 tracksRegLiveness: true body: | bb.0: + liveins: $vcc_lo ; SCHED-LABEL: name: vopd_schedule_unconstrained_2 - ; SCHED: $vgpr2 = IMPLICIT_DEF + ; SCHED: liveins: $vcc_lo + ; SCHED-NEXT: {{ $}} + ; SCHED-NEXT: $vgpr2 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr3 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr0 = IMPLICIT_DEF ; SCHED-NEXT: $vgpr1 = IMPLICIT_DEF @@ -510,28 +527,30 @@ body: | ; SCHED-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr4 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr29 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc - ; SCHED-NEXT: $vgpr19 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr29 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo + ; SCHED-NEXT: $vgpr19 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; SCHED-NEXT: $vgpr20 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr20, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc - ; SCHED-NEXT: $vgpr10 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo + ; SCHED-NEXT: $vgpr10 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo ; SCHED-NEXT: $vgpr17 = V_MUL_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; SCHED-NEXT: $vgpr12 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr37 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr37 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; SCHED-NEXT: $vgpr14 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr20 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr21 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr24 = V_MUL_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo ; SCHED-NEXT: $vgpr22 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr31 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; SCHED-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; SCHED-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo ; SCHED-NEXT: $vgpr34 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; SCHED-NEXT: $vgpr32 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec ; ; PAIR-GFX11-LABEL: name: vopd_schedule_unconstrained_2 - ; PAIR-GFX11: $vgpr2 = IMPLICIT_DEF + ; PAIR-GFX11: liveins: $vcc_lo + ; PAIR-GFX11-NEXT: {{ $}} + ; PAIR-GFX11-NEXT: $vgpr2 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -540,23 +559,25 @@ body: | ; PAIR-GFX11-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX11-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx11 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX11-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx11 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx11 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec + ; PAIR-GFX11-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx11 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX11-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX11-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx11 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX11-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx11 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr20 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr21, $vgpr24 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX11-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX11-NEXT: $vgpr22 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr31 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX11-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo ; PAIR-GFX11-NEXT: $vgpr34 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr32 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec ; ; PAIR-GFX12-LABEL: name: vopd_schedule_unconstrained_2 - ; PAIR-GFX12: $vgpr2 = IMPLICIT_DEF + ; PAIR-GFX12: liveins: $vcc_lo + ; PAIR-GFX12-NEXT: {{ $}} + ; PAIR-GFX12-NEXT: $vgpr2 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr0 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr1 = IMPLICIT_DEF @@ -565,18 +586,18 @@ body: | ; PAIR-GFX12-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX12-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx12 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc - ; PAIR-GFX12-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx12 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx12 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec + ; PAIR-GFX12-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx12 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX12-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX12-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx12 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX12-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx12 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr20 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr21, $vgpr24 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX12-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX12-NEXT: $vgpr22 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr31 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit $vcc + ; PAIR-GFX12-NEXT: $vgpr33 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo ; PAIR-GFX12-NEXT: $vgpr34 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr32 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec $vgpr0 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/ARM/misched-branch-targets.mir b/llvm/test/CodeGen/ARM/misched-branch-targets.mir deleted file mode 100644 index b071fbd4538a6..0000000000000 --- a/llvm/test/CodeGen/ARM/misched-branch-targets.mir +++ /dev/null @@ -1,166 +0,0 @@ -# RUN: llc -o - -run-pass=machine-scheduler -misched=shuffle %s | FileCheck %s -# RUN: llc -o - -run-pass=postmisched %s | FileCheck %s - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-arm-none-eabi" - - define i32 @foo_bti() #0 { - entry: - ret i32 0 - } - - define i32 @foo_pac() #0 { - entry: - ret i32 0 - } - - define i32 @foo_pacbti() #0 { - entry: - ret i32 0 - } - - define i32 @foo_setjmp() #0 { - entry: - ret i32 0 - if.then: - ret i32 0 - } - - define i32 @foo_sg() #0 { - entry: - ret i32 0 - } - - declare i32 @setjmp(ptr noundef) #1 - declare void @longjmp(ptr noundef, i32 noundef) #2 - - attributes #0 = { "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } - attributes #1 = { nounwind returns_twice "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } - attributes #2 = { noreturn nounwind "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } - -... ---- -name: foo_bti -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r0 - - t2BTI - renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg - tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 - -... - -# CHECK-LABEL: name: foo_bti -# CHECK: body: -# CHECK-NEXT: bb.0.entry: -# CHECK-NEXT: liveins: $r0 -# CHECK-NEXT: {{^ +$}} -# CHECK-NEXT: t2BTI - ---- -name: foo_pac -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r0, $lr, $r12 - - frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp - renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg - $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr - $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg - early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg - $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg - $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr - t2AUT implicit $r12, implicit $lr, implicit $sp - tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - -... - -# CHECK-LABEL: name: foo_pac -# CHECK: body: -# CHECK-NEXT: bb.0.entry: -# CHECK-NEXT: liveins: $r0, $lr, $r12 -# CHECK-NEXT: {{^ +$}} -# CHECK-NEXT: frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp - ---- -name: foo_pacbti -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r0, $lr, $r12 - - frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp - renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg - $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr - $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg - early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg - $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg - $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr - t2AUT implicit $r12, implicit $lr, implicit $sp - tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - -... - -# CHECK-LABEL: name: foo_pacbti -# CHECK: body: -# CHECK-NEXT: bb.0.entry: -# CHECK-NEXT: liveins: $r0, $lr, $r12 -# CHECK-NEXT: {{^ +$}} -# CHECK-NEXT: frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp - ---- -name: foo_setjmp -tracksRegLiveness: true -body: | - bb.0.entry: - successors: %bb.1 - liveins: $lr - - frame-setup tPUSH 14 /* CC::al */, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp - $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg - $sp = frame-setup tSUBspi $sp, 40, 14 /* CC::al */, $noreg - renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg - tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 - t2BTI - renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg - tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2IT 0, 2, implicit-def $itstate - renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit $itstate - $sp = frame-destroy tADDspi $sp, 40, 0 /* CC::eq */, $cpsr, implicit $itstate - frame-destroy tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $r0, implicit $sp, implicit killed $itstate - - bb.1.if.then: - renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg - renamable $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - tBL 14 /* CC::al */, $noreg, @longjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp - -... - -# CHECK-LABEL: name: foo_setjmp -# CHECK: body: -# CHECK: tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 -# CHECK-NEXT: t2BTI - ---- -name: foo_sg -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r0 - - t2SG 14 /* CC::al */, $noreg - renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg - tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 - -... - -# CHECK-LABEL: name: foo_sg -# CHECK: body: -# CHECK-NEXT: bb.0.entry: -# CHECK-NEXT: liveins: $r0 -# CHECK-NEXT: {{^ +$}} -# CHECK-NEXT: t2SG diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll index 1632c4e86c762..0060b4458081b 100644 --- a/llvm/test/CodeGen/ARM/sadd_sat.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat.ll @@ -130,8 +130,8 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-T15TE-NEXT: bics r4, r1 ; CHECK-T15TE-NEXT: asrs r1, r3, #31 ; CHECK-T15TE-NEXT: cmp r4, #0 -; CHECK-T15TE-NEXT: push {r1} -; CHECK-T15TE-NEXT: pop {r0} +; CHECK-T15TE-NEXT: mov r12, r1 +; CHECK-T15TE-NEXT: mov r0, r12 ; CHECK-T15TE-NEXT: bmi .LBB1_2 ; CHECK-T15TE-NEXT: @ %bb.1: ; CHECK-T15TE-NEXT: movs r0, r2 diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll index e12dd02f16c2f..df95af313eac6 100644 --- a/llvm/test/CodeGen/ARM/select_const.ll +++ b/llvm/test/CodeGen/ARM/select_const.ll @@ -665,8 +665,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) { ; THUMB-NEXT: movs r7, #1 ; THUMB-NEXT: ands r0, r7 ; THUMB-NEXT: subs r1, r0, #1 -; THUMB-NEXT: push {r0} -; THUMB-NEXT: pop {r4} +; THUMB-NEXT: mov r12, r0 +; THUMB-NEXT: mov r4, r12 ; THUMB-NEXT: sbcs r4, r1 ; THUMB-NEXT: cmp r0, #0 ; THUMB-NEXT: bne .LBB24_2 @@ -681,8 +681,8 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) { ; THUMB-NEXT: ands r5, r0 ; THUMB-NEXT: movs r6, #0 ; THUMB-NEXT: subs r0, r5, #1 -; THUMB-NEXT: push {r4} -; THUMB-NEXT: pop {r1} +; THUMB-NEXT: mov r12, r4 +; THUMB-NEXT: mov r1, r12 ; THUMB-NEXT: sbcs r1, r6 ; THUMB-NEXT: eors r3, r7 ; THUMB-NEXT: ldr r6, .LCPI24_0 @@ -786,11 +786,11 @@ define i64 @func(i64 %arg) { ; THUMB-NEXT: push {r4, lr} ; THUMB-NEXT: movs r2, #0 ; THUMB-NEXT: adds r3, r0, #1 -; THUMB-NEXT: push {r1} -; THUMB-NEXT: pop {r3} +; THUMB-NEXT: mov r12, r1 +; THUMB-NEXT: mov r3, r12 ; THUMB-NEXT: adcs r3, r2 -; THUMB-NEXT: push {r2} -; THUMB-NEXT: pop {r3} +; THUMB-NEXT: mov r12, r2 +; THUMB-NEXT: mov r3, r12 ; THUMB-NEXT: adcs r3, r2 ; THUMB-NEXT: subs r4, r3, #1 ; THUMB-NEXT: adds r0, r0, #1 diff --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll index 6584f0c7616c5..09e3592b6d420 100644 --- a/llvm/test/CodeGen/ARM/wide-compares.ll +++ b/llvm/test/CodeGen/ARM/wide-compares.ll @@ -257,12 +257,12 @@ define {i32, i32} @test_slt_not(i32 %c, i32 %d, i64 %a, i64 %b) { ; CHECK-THUMB1-NOMOV-NEXT: ldr r5, [sp, #16] ; CHECK-THUMB1-NOMOV-NEXT: subs r2, r2, r5 ; CHECK-THUMB1-NOMOV-NEXT: sbcs r3, r0 -; CHECK-THUMB1-NOMOV-NEXT: push {r1} -; CHECK-THUMB1-NOMOV-NEXT: pop {r0} +; CHECK-THUMB1-NOMOV-NEXT: mov r12, r1 +; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12 ; CHECK-THUMB1-NOMOV-NEXT: blt .LBB3_2 ; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %entry -; CHECK-THUMB1-NOMOV-NEXT: push {r4} -; CHECK-THUMB1-NOMOV-NEXT: pop {r0} +; CHECK-THUMB1-NOMOV-NEXT: mov r12, r4 +; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12 ; CHECK-THUMB1-NOMOV-NEXT: .LBB3_2: @ %entry ; CHECK-THUMB1-NOMOV-NEXT: bge .LBB3_4 ; CHECK-THUMB1-NOMOV-NEXT: @ %bb.3: @ %entry diff --git a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll index 08426b07bf74b..4565c63f08d99 100644 --- a/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll +++ b/llvm/test/CodeGen/LoongArch/emergency-spill-slot.ll @@ -6,9 +6,9 @@ define void @func() { ; CHECK-LABEL: func: ; CHECK: # %bb.0: -; CHECK-NEXT: addi.d $sp, $sp, -2048 -; CHECK-NEXT: addi.d $sp, $sp, -2048 -; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: lu12i.w $a0, 1 +; CHECK-NEXT: ori $a0, $a0, 16 +; CHECK-NEXT: sub.d $sp, $sp, $a0 ; CHECK-NEXT: .cfi_def_cfa_offset 4112 ; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(var) ; CHECK-NEXT: ld.d $a1, $a0, %got_pc_lo12(var) diff --git a/llvm/test/CodeGen/M68k/Arith/unary.ll b/llvm/test/CodeGen/M68k/Arith/unary.ll new file mode 100644 index 0000000000000..a28ac7328d260 --- /dev/null +++ b/llvm/test/CodeGen/M68k/Arith/unary.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out ";*\.cfi_*" +; RUN: llc < %s -mtriple=m68k-linux -verify-machineinstrs | FileCheck %s + +define i64 @notll(i64 %x) { +; CHECK-LABEL: notll: +; CHECK: ; %bb.0: +; CHECK: move.l (4,%sp), %d0 +; CHECK: not.l %d0 +; CHECK: move.l (8,%sp), %d1 +; CHECK: not.l %d1 +; CHECK: rts + %not = xor i64 %x, -1 + ret i64 %not +} + +define i32 @notl(i32 %x) { +; CHECK-LABEL: notl: +; CHECK: ; %bb.0: +; CHECK: move.l (4,%sp), %d0 +; CHECK: not.l %d0 +; CHECK: rts + %not = xor i32 %x, -1 + ret i32 %not +} + +define i16 @nots(i16 %x) { +; CHECK-LABEL: nots: +; CHECK: ; %bb.0: +; CHECK: move.w (6,%sp), %d0 +; CHECK: not.w %d0 +; CHECK: rts + %not = xor i16 %x, -1 + ret i16 %not +} + +define i8 @notb(i8 %x) { +; CHECK-LABEL: notb: +; CHECK: ; %bb.0: +; CHECK: move.b (7,%sp), %d0 +; CHECK: not.b %d0 +; CHECK: rts + %not = xor i8 %x, -1 + ret i8 %not +} + +define i64 @negll(i64 %x) { +; CHECK-LABEL: negll: +; CHECK: ; %bb.0: +; CHECK: move.l (4,%sp), %d0 +; CHECK: move.l (8,%sp), %d1 +; CHECK: neg.l %d1 +; CHECK: negx.l %d0 +; CHECK: rts + %neg = sub i64 0, %x + ret i64 %neg +} + +define i32 @negl(i32 %x) { +; CHECK-LABEL: negl: +; CHECK: ; %bb.0: +; CHECK: move.l (4,%sp), %d0 +; CHECK: neg.l %d0 +; CHECK: rts + %neg = sub i32 0, %x + ret i32 %neg +} + +define i16 @negs(i16 %x) { +; CHECK-LABEL: negs: +; CHECK: ; %bb.0: +; CHECK: move.w (6,%sp), %d0 +; CHECK: neg.w %d0 +; CHECK: rts + %neg = sub i16 0, %x + ret i16 %neg +} + +define i8 @negb(i8 %x) { +; CHECK-LABEL: negb: +; CHECK: ; %bb.0: +; CHECK: move.b (7,%sp), %d0 +; CHECK: neg.b %d0 +; CHECK: rts + %neg = sub i8 0, %x + ret i8 %neg +} diff --git a/llvm/test/CodeGen/M68k/Atomics/rmw.ll b/llvm/test/CodeGen/M68k/Atomics/rmw.ll index b589e7751d80e..1036a0a8ba3d2 100644 --- a/llvm/test/CodeGen/M68k/Atomics/rmw.ll +++ b/llvm/test/CodeGen/M68k/Atomics/rmw.ll @@ -237,7 +237,7 @@ define i16 @atmoicrmw_nand_i16(i16 %val, ptr %ptr) { ; ATOMIC-NEXT: ; =>This Inner Loop Header: Depth=1 ; ATOMIC-NEXT: move.w %d2, %d3 ; ATOMIC-NEXT: and.w %d0, %d3 -; ATOMIC-NEXT: eori.w #-1, %d3 +; ATOMIC-NEXT: not.w %d3 ; ATOMIC-NEXT: cas.w %d1, %d3, (%a0) ; ATOMIC-NEXT: move.w %d1, %d3 ; ATOMIC-NEXT: sub.w %d2, %d3 diff --git a/llvm/test/CodeGen/PowerPC/ppcsoftops.ll b/llvm/test/CodeGen/PowerPC/ppcsoftops.ll index dee2701bf6dc1..fcb7ce6db5295 100644 --- a/llvm/test/CodeGen/PowerPC/ppcsoftops.ll +++ b/llvm/test/CodeGen/PowerPC/ppcsoftops.ll @@ -312,8 +312,68 @@ define dso_local zeroext i32 @func(double noundef %0, double noundef %1) #0 { ret i32 %9 } +; To check ppc_fp128 soften without crash +define zeroext i1 @ppcf128_soften(ppc_fp128 %a) #0 { +; PPC-LABEL: ppcf128_soften: +; PPC: # %bb.0: # %entry +; PPC-NEXT: stwu 1, -16(1) +; PPC-NEXT: stw 5, 8(1) # 4-byte Folded Spill +; PPC-NEXT: mr 5, 4 +; PPC-NEXT: lwz 4, 8(1) # 4-byte Folded Reload +; PPC-NEXT: stw 5, 12(1) # 4-byte Folded Spill +; PPC-NEXT: mr 5, 3 +; PPC-NEXT: lwz 3, 12(1) # 4-byte Folded Reload +; PPC-NEXT: # kill: def $r4 killed $r3 +; PPC-NEXT: # kill: def $r4 killed $r5 +; PPC-NEXT: xoris 4, 5, 65520 +; PPC-NEXT: or 4, 3, 4 +; PPC-NEXT: cntlzw 4, 4 +; PPC-NEXT: clrlwi 5, 5, 1 +; PPC-NEXT: or 3, 3, 5 +; PPC-NEXT: cntlzw 3, 3 +; PPC-NEXT: or 3, 3, 4 +; PPC-NEXT: srwi 3, 3, 5 +; PPC-NEXT: addi 1, 1, 16 +; PPC-NEXT: blr +; +; PPC64-LABEL: ppcf128_soften: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: li 4, 4095 +; PPC64-NEXT: rldic 4, 4, 52, 0 +; PPC64-NEXT: cmpld 7, 3, 4 +; PPC64-NEXT: mfcr 4 # cr7 +; PPC64-NEXT: rlwinm 4, 4, 31, 31, 31 +; PPC64-NEXT: clrldi 3, 3, 1 +; PPC64-NEXT: cmpldi 7, 3, 0 +; PPC64-NEXT: mfcr 3 # cr7 +; PPC64-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64-NEXT: or 4, 3, 4 +; PPC64-NEXT: # implicit-def: $x3 +; PPC64-NEXT: mr 3, 4 +; PPC64-NEXT: clrldi 3, 3, 32 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: ppcf128_soften: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: li 3, 4095 +; PPC64LE-NEXT: rldic 3, 3, 52, 0 +; PPC64LE-NEXT: cmpd 4, 3 +; PPC64LE-NEXT: crmove 21, 2 +; PPC64LE-NEXT: clrldi. 3, 4, 1 +; PPC64LE-NEXT: crmove 20, 2 +; PPC64LE-NEXT: cror 20, 20, 21 +; PPC64LE-NEXT: li 4, 0 +; PPC64LE-NEXT: li 3, 1 +; PPC64LE-NEXT: isel 3, 3, 4, 20 +; PPC64LE-NEXT: blr +entry: + %fpclass = tail call i1 @llvm.is.fpclass.ppcf128(ppc_fp128 %a, i32 100) + ret i1 %fpclass +} + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) declare double @llvm.fmuladd.f64(double, double, double) #1 +declare i1 @llvm.is.fpclass.ppcf128(ppc_fp128, i32 immarg) #1 attributes #0 = {"use-soft-float"="true" nounwind } attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } diff --git a/llvm/test/CodeGen/PowerPC/rldimi.ll b/llvm/test/CodeGen/PowerPC/rldimi.ll index 78ea9aa862f2c..4ce015849d9ea 100644 --- a/llvm/test/CodeGen/PowerPC/rldimi.ll +++ b/llvm/test/CodeGen/PowerPC/rldimi.ll @@ -139,4 +139,158 @@ define i64 @rldimi11(i64 %a, i64 %b) { ret i64 %r } +define i64 @rldimi12(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi12: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 20 +; CHECK-NEXT: rldimi 4, 3, 44, 31 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 0, i64 18446726490113441791) + ret i64 %r +} + +define i64 @rldimi13(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi13: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 62 +; CHECK-NEXT: rldimi 4, 3, 32, 2 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 30, i64 4611686014132420608) + ret i64 %r +} + +define i64 @rldimi14(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi14: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 23 +; CHECK-NEXT: rldimi 4, 3, 53, 0 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874454810624) ; mb=0, me=10 + ret i64 %r +} + +define i64 @rldimi15(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi15: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 36 +; CHECK-NEXT: rldimi 4, 3, 40, 10 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18013298997854208) ; mb=10, me=23 + ret i64 %r +} + +define i64 @rldimi16(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi16: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 57 +; CHECK-NEXT: rldimi 4, 3, 19, 10 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18014398508957696) ; mb=10, me=44 + ret i64 %r +} + +define i64 @rldimi17(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi17: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 43 +; CHECK-NEXT: rldimi 4, 3, 33, 25 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 541165879296) ; mb=25, me=30 + ret i64 %r +} + +define i64 @rldimi18(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi18: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 57 +; CHECK-NEXT: rldimi 4, 3, 19, 25 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 549755289600) ; mb=25, me=44 + ret i64 %r +} + +define i64 @rldimi19(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi19: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 57 +; CHECK-NEXT: rldimi 4, 3, 19, 33 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 2146959360) ; mb=33, me=44 + ret i64 %r +} + +define i64 @rldimi20(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi20: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 23 +; CHECK-NEXT: rldimi 4, 3, 53, 15 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18438299824408231935) ; mb=15, me=10 + ret i64 %r +} + +define i64 @rldimi21(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi21: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 23 +; CHECK-NEXT: rldimi 4, 3, 53, 25 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437737424210624511) ; mb=25, me=10 + ret i64 %r +} + +define i64 @rldimi22(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi22: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 34 +; CHECK-NEXT: rldimi 4, 3, 42, 25 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446740225418854399) ; mb=25, me=21 + ret i64 %r +} + +define i64 @rldimi23(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi23: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 23 +; CHECK-NEXT: rldimi 4, 3, 53, 44 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18437736874455859199) ; mb=44, me=10 + ret i64 %r +} + +define i64 @rldimi24(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi24: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 38 +; CHECK-NEXT: rldimi 4, 3, 38, 44 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446743798832693247) ; mb=44, me=25 + ret i64 %r +} + +define i64 @rldimi25(i64 %a, i64 %b) { +; CHECK-LABEL: rldimi25: +; CHECK: # %bb.0: +; CHECK-NEXT: rotldi 3, 3, 48 +; CHECK-NEXT: rldimi 4, 3, 28, 44 +; CHECK-NEXT: mr 3, 4 +; CHECK-NEXT: blr + %r = call i64 @llvm.ppc.rldimi(i64 %a, i64 %b, i32 12, i64 18446744073442164735) ; mb=44, me=35 + ret i64 %r +} + declare i64 @llvm.ppc.rldimi(i64, i64, i32 immarg, i64 immarg) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/insertelement.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/insertelement.ll index c23d1e7c70992..a1347d2306cad 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/insertelement.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/insertelement.ll @@ -18,8 +18,8 @@ define @insertelement_nxv1i1_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 0, i32 0 @@ -40,8 +40,8 @@ define @insertelement_nxv1i1_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 -1, i32 0 @@ -70,7 +70,8 @@ define @insertelement_nxv1i1_2(i1 %x, i32 %idx) { ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 ; RV64-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[TRUNC1]](s32) + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[ZEXT]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 %x, i32 %idx @@ -91,8 +92,8 @@ define @insertelement_nxv2i1_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 0, i32 1 @@ -113,8 +114,8 @@ define @insertelement_nxv2i1_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 -1, i32 0 @@ -143,7 +144,8 @@ define @insertelement_nxv2i1_2(i1 %x, i32 %idx) { ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 ; RV64-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[TRUNC1]](s32) + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[ZEXT]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 %x, i32 %idx @@ -164,8 +166,8 @@ define @insertelement_nxv4i1_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 0, i32 2 @@ -186,8 +188,8 @@ define @insertelement_nxv4i1_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 -1, i32 0 @@ -214,8 +216,8 @@ define @insertelement_nxv4i1_2(i1 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 %x, i32 0 @@ -236,8 +238,8 @@ define @insertelement_nxv8i1_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 0, i32 0 @@ -258,8 +260,8 @@ define @insertelement_nxv8i1_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 -1, i32 0 @@ -288,7 +290,8 @@ define @insertelement_nxv8i1_2(i1 %x, i32 %idx) { ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 ; RV64-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[TRUNC1]](s32) + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[ZEXT]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 %x, i32 %idx @@ -309,8 +312,8 @@ define @insertelement_nxv16i1_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 0, i32 15 @@ -331,8 +334,8 @@ define @insertelement_nxv16i1_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s1), [[C1]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 -1, i32 0 @@ -361,7 +364,8 @@ define @insertelement_nxv16i1_2(i1 %x, i32 %idx) { ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11 ; RV64-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[TRUNC1]](s32) + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[ZEXT]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement poison, i1 %x, i32 %idx @@ -388,8 +392,8 @@ define @insertelement_nxv4i1_3( %v, i1 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s64) - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s1), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s1), [[C]](s64) ; RV64-NEXT: $v0 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v0 %a = insertelement %v, i1 %x, i32 0 @@ -410,8 +414,8 @@ define @insertelement_nxv1i8_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 0, i32 0 @@ -432,8 +436,8 @@ define @insertelement_nxv1i8_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 -1, i32 0 @@ -460,8 +464,8 @@ define @insertelement_nxv1i8_2(i8 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 %x, i32 0 @@ -482,8 +486,8 @@ define @insertelement_nxv2i8_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 0, i32 0 @@ -504,8 +508,8 @@ define @insertelement_nxv2i8_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 -1, i32 0 @@ -532,8 +536,8 @@ define @insertelement_nxv2i8_2(i8 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 %x, i32 0 @@ -554,8 +558,8 @@ define @insertelement_nxv4i8_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 0, i32 0 @@ -576,8 +580,8 @@ define @insertelement_nxv4i8_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 -1, i32 0 @@ -604,8 +608,8 @@ define @insertelement_nxv4i8_2(i8 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 %x, i32 0 @@ -626,8 +630,8 @@ define @insertelement_nxv8i8_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 0, i32 0 @@ -648,8 +652,8 @@ define @insertelement_nxv8i8_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 -1, i32 0 @@ -676,8 +680,8 @@ define @insertelement_nxv8i8_2(i8 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i8 %x, i32 0 @@ -689,8 +693,8 @@ define @insertelement_nxv16i8_0() { ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV32-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0 - ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) ; RV32-NEXT: $v8m2 = COPY [[IVEC]]() ; RV32-NEXT: PseudoRET implicit $v8m2 ; @@ -720,8 +724,8 @@ define @insertelement_nxv16i8_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s8), [[C1]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i8 -1, i32 0 @@ -739,7 +743,8 @@ define @insertelement_nxv16i8_2(i8 %x, i64 %idx) { ; RV32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[MV]](s64) + ; RV32-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64) + ; RV32-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s8), [[TRUNC1]](s32) ; RV32-NEXT: $v8m2 = COPY [[IVEC]]() ; RV32-NEXT: PseudoRET implicit $v8m2 ; @@ -778,8 +783,8 @@ define @insertelement_nxv4i8_3( %v, i8 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s64) - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s8), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s8), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement %v, i8 %x, i32 0 @@ -800,8 +805,8 @@ define @insertelement_nxv1i16_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i16 0, i32 0 @@ -822,8 +827,8 @@ define @insertelement_nxv1i16_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i16 -1, i32 0 @@ -850,8 +855,8 @@ define @insertelement_nxv1i16_2(i16 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i16 %x, i32 0 @@ -863,8 +868,8 @@ define @insertelement_nxv2i16_0() { ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV32-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; RV32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; RV32-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) ; RV32-NEXT: $v8 = COPY [[IVEC]]() ; RV32-NEXT: PseudoRET implicit $v8 ; @@ -894,8 +899,8 @@ define @insertelement_nxv2i16_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i16 -1, i32 0 @@ -922,8 +927,8 @@ define @insertelement_nxv2i16_2(i16 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i16 %x, i32 0 @@ -944,8 +949,8 @@ define @insertelement_nxv4i16_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i16 0, i32 0 @@ -966,8 +971,8 @@ define @insertelement_nxv4i16_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i16 -1, i32 0 @@ -994,8 +999,8 @@ define @insertelement_nxv4i16_2(i16 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i16 %x, i32 0 @@ -1016,8 +1021,8 @@ define @insertelement_nxv8i16_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i16 0, i32 0 @@ -1038,8 +1043,8 @@ define @insertelement_nxv8i16_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i16 -1, i32 0 @@ -1066,8 +1071,8 @@ define @insertelement_nxv8i16_2(i16 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i16 %x, i32 0 @@ -1088,8 +1093,8 @@ define @insertelement_nxv16i16_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i16 0, i32 0 @@ -1110,8 +1115,8 @@ define @insertelement_nxv16i16_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s16), [[C1]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i16 -1, i32 0 @@ -1138,8 +1143,8 @@ define @insertelement_nxv16i16_2(i16 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s16), [[C]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i16 %x, i32 0 @@ -1166,8 +1171,8 @@ define @insertelement_nxv4i16( %v, i16 %x) ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s64) - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s16), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s16), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement %v, i16 %x, i32 0 @@ -1187,7 +1192,8 @@ define @insertelement_nxv1i32_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i32 0, i32 0 @@ -1208,8 +1214,8 @@ define @insertelement_nxv1i32_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i32 -1, i32 0 @@ -1235,8 +1241,8 @@ define @insertelement_nxv1i32_2(i32 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i32 %x, i32 0 @@ -1256,7 +1262,8 @@ define @insertelement_nxv2i32_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i32 0, i32 0 @@ -1277,8 +1284,8 @@ define @insertelement_nxv2i32_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i32 -1, i32 0 @@ -1304,8 +1311,8 @@ define @insertelement_nxv2i32_2(i32 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i32 %x, i32 0 @@ -1325,7 +1332,8 @@ define @insertelement_nxv4i32_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i32 0, i32 0 @@ -1346,8 +1354,8 @@ define @insertelement_nxv4i32_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i32 -1, i32 0 @@ -1373,8 +1381,8 @@ define @insertelement_nxv4i32_2(i32 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i32 %x, i32 0 @@ -1394,7 +1402,8 @@ define @insertelement_nxv8i32_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i32 0, i32 0 @@ -1415,8 +1424,8 @@ define @insertelement_nxv8i32_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i32 -1, i32 0 @@ -1442,8 +1451,8 @@ define @insertelement_nxv8i32_2(i32 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i32 %x, i32 0 @@ -1463,7 +1472,8 @@ define @insertelement_nxv16i32_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8m8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m8 %a = insertelement poison, i32 0, i32 0 @@ -1484,8 +1494,8 @@ define @insertelement_nxv16i32_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s32), [[C1]](s64) ; RV64-NEXT: $v8m8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m8 %a = insertelement poison, i32 -1, i32 0 @@ -1511,8 +1521,8 @@ define @insertelement_nxv16i32_2(i32 %x) { ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s32), [[C]](s64) ; RV64-NEXT: $v8m8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m8 %a = insertelement poison, i32 %x, i32 0 @@ -1538,8 +1548,8 @@ define @insertelement_nxv4i32( %v, i32 %x) ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s32), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[TRUNC]](s32), [[C]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement %v, i32 %x, i32 0 @@ -1560,8 +1570,7 @@ define @insertelement_nxv1i64_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i64 0, i32 0 @@ -1582,8 +1591,8 @@ define @insertelement_nxv1i64_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i64 -1, i32 0 @@ -1610,8 +1619,8 @@ define @insertelement_nxv1i64_2(i64 %x) { ; RV64-NEXT: {{ $}} ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) ; RV64-NEXT: $v8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8 %a = insertelement poison, i64 %x, i32 0 @@ -1632,8 +1641,7 @@ define @insertelement_nxv2i64_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i64 0, i32 0 @@ -1654,8 +1662,8 @@ define @insertelement_nxv2i64_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i64 -1, i32 0 @@ -1682,8 +1690,8 @@ define @insertelement_nxv2i64_2(i64 %x) { ; RV64-NEXT: {{ $}} ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) ; RV64-NEXT: $v8m2 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m2 %a = insertelement poison, i64 %x, i32 0 @@ -1704,8 +1712,7 @@ define @insertelement_nxv4i64_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i64 0, i32 0 @@ -1726,8 +1733,8 @@ define @insertelement_nxv4i64_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i64 -1, i32 0 @@ -1754,8 +1761,8 @@ define @insertelement_nxv4i64_2(i64 %x) { ; RV64-NEXT: {{ $}} ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement poison, i64 %x, i32 0 @@ -1776,8 +1783,7 @@ define @insertelement_nxv8i64_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) ; RV64-NEXT: $v8m8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m8 %a = insertelement poison, i64 0, i32 0 @@ -1798,8 +1804,8 @@ define @insertelement_nxv8i64_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) ; RV64-NEXT: $v8m8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m8 %a = insertelement poison, i64 -1, i32 0 @@ -1826,8 +1832,8 @@ define @insertelement_nxv8i64_2(i64 %x) { ; RV64-NEXT: {{ $}} ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) ; RV64-NEXT: $v8m8 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m8 %a = insertelement poison, i64 %x, i32 0 @@ -1850,8 +1856,7 @@ define @insertelement_nxv16i64_0() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C]](s64) ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[IVEC]]() ; RV64-NEXT: $v8m8 = COPY [[UV]]() ; RV64-NEXT: $v16m8 = COPY [[UV1]]() @@ -1876,8 +1881,8 @@ define @insertelement_nxv16i64_1() { ; RV64: bb.1 (%ir-block.0): ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s32) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[C]](s64), [[C1]](s64) ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[IVEC]]() ; RV64-NEXT: $v8m8 = COPY [[UV]]() ; RV64-NEXT: $v16m8 = COPY [[UV1]]() @@ -1908,8 +1913,8 @@ define @insertelement_nxv16i64_2(i64 %x) { ; RV64-NEXT: {{ $}} ; RV64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 ; RV64-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s64), [[C]](s64) ; RV64-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[IVEC]]() ; RV64-NEXT: $v8m8 = COPY [[UV]]() ; RV64-NEXT: $v16m8 = COPY [[UV1]]() @@ -1938,8 +1943,8 @@ define @insertelement_nxv4i64( %v, i64 %x) ; RV64-NEXT: {{ $}} ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 ; RV64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10 - ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s64), [[C]](s32) + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[IVEC:%[0-9]+]]:_() = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s64), [[C]](s64) ; RV64-NEXT: $v8m4 = COPY [[IVEC]]() ; RV64-NEXT: PseudoRET implicit $v8m4 %a = insertelement %v, i64 %x, i32 0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll index df7778899b0d0..7ea67073bc28d 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/shufflevector.ll @@ -8,8 +8,8 @@ define @shufflevector_nxv1i1_0() { ; RV32-LABEL: name: shufflevector_nxv1i1_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -30,8 +30,8 @@ define @shufflevector_nxv1i1_1() { ; RV32-LABEL: name: shufflevector_nxv1i1_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -54,8 +54,8 @@ define @shufflevector_nxv1i1_2( %a) { ; RV32-NEXT: liveins: $v0 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -78,8 +78,8 @@ define @shufflevector_nxv2i1_0() { ; RV32-LABEL: name: shufflevector_nxv2i1_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -100,8 +100,8 @@ define @shufflevector_nxv2i1_1() { ; RV32-LABEL: name: shufflevector_nxv2i1_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -124,8 +124,8 @@ define @shufflevector_nxv2i1_2( %a) { ; RV32-NEXT: liveins: $v0 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -148,8 +148,8 @@ define @shufflevector_nxv4i1_0() { ; RV32-LABEL: name: shufflevector_nxv4i1_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -170,8 +170,8 @@ define @shufflevector_nxv4i1_1() { ; RV32-LABEL: name: shufflevector_nxv4i1_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -194,8 +194,8 @@ define @shufflevector_nxv4i1_2( %a) { ; RV32-NEXT: liveins: $v0 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -218,8 +218,8 @@ define @shufflevector_nxv8i1_0() { ; RV32-LABEL: name: shufflevector_nxv8i1_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -240,8 +240,8 @@ define @shufflevector_nxv8i1_1() { ; RV32-LABEL: name: shufflevector_nxv8i1_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -264,8 +264,8 @@ define @shufflevector_nxv8i1_2( %a) { ; RV32-NEXT: liveins: $v0 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -288,8 +288,8 @@ define @shufflevector_nxv16i1_0() { ; RV32-LABEL: name: shufflevector_nxv16i1_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -310,8 +310,8 @@ define @shufflevector_nxv16i1_1() { ; RV32-LABEL: name: shufflevector_nxv16i1_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -334,8 +334,8 @@ define @shufflevector_nxv16i1_2( %a) { ; RV32-NEXT: liveins: $v0 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s1) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s1) ; RV32-NEXT: $v0 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v0 @@ -358,8 +358,8 @@ define @shufflevector_nxv1i8_0() { ; RV32-LABEL: name: shufflevector_nxv1i8_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -380,8 +380,8 @@ define @shufflevector_nxv1i8_1() { ; RV32-LABEL: name: shufflevector_nxv1i8_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -404,8 +404,8 @@ define @shufflevector_nxv1i8_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -428,8 +428,8 @@ define @shufflevector_nxv2i8_0() { ; RV32-LABEL: name: shufflevector_nxv2i8_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -450,8 +450,8 @@ define @shufflevector_nxv2i8_1() { ; RV32-LABEL: name: shufflevector_nxv2i8_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -474,8 +474,8 @@ define @shufflevector_nxv2i8_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -498,8 +498,8 @@ define @shufflevector_nxv4i8_0() { ; RV32-LABEL: name: shufflevector_nxv4i8_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -520,8 +520,8 @@ define @shufflevector_nxv4i8_1() { ; RV32-LABEL: name: shufflevector_nxv4i8_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -544,8 +544,8 @@ define @shufflevector_nxv4i8_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -568,8 +568,8 @@ define @shufflevector_nxv8i8_0() { ; RV32-LABEL: name: shufflevector_nxv8i8_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -590,8 +590,8 @@ define @shufflevector_nxv8i8_1() { ; RV32-LABEL: name: shufflevector_nxv8i8_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -614,8 +614,8 @@ define @shufflevector_nxv8i8_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -638,8 +638,8 @@ define @shufflevector_nxv16i8_0() { ; RV32-LABEL: name: shufflevector_nxv16i8_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -660,8 +660,8 @@ define @shufflevector_nxv16i8_1() { ; RV32-LABEL: name: shufflevector_nxv16i8_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -684,8 +684,8 @@ define @shufflevector_nxv16i8_2( %a) { ; RV32-NEXT: liveins: $v8m2 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s8) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -708,8 +708,8 @@ define @shufflevector_nxv1i16_0() { ; RV32-LABEL: name: shufflevector_nxv1i16_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -730,8 +730,8 @@ define @shufflevector_nxv1i16_1() { ; RV32-LABEL: name: shufflevector_nxv1i16_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -754,8 +754,8 @@ define @shufflevector_nxv1i16_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -778,8 +778,8 @@ define @shufflevector_nxv2i16_0() { ; RV32-LABEL: name: shufflevector_nxv2i16_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -800,8 +800,8 @@ define @shufflevector_nxv2i16_1() { ; RV32-LABEL: name: shufflevector_nxv2i16_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -824,8 +824,8 @@ define @shufflevector_nxv2i16_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -848,8 +848,8 @@ define @shufflevector_nxv4i16_0() { ; RV32-LABEL: name: shufflevector_nxv4i16_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -870,8 +870,8 @@ define @shufflevector_nxv4i16_1() { ; RV32-LABEL: name: shufflevector_nxv4i16_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -894,8 +894,8 @@ define @shufflevector_nxv4i16_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -918,8 +918,8 @@ define @shufflevector_nxv8i16_0() { ; RV32-LABEL: name: shufflevector_nxv8i16_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -940,8 +940,8 @@ define @shufflevector_nxv8i16_1() { ; RV32-LABEL: name: shufflevector_nxv8i16_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -964,8 +964,8 @@ define @shufflevector_nxv8i16_2( %a) { ; RV32-NEXT: liveins: $v8m2 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -988,8 +988,8 @@ define @shufflevector_nxv16i16_0() { ; RV32-LABEL: name: shufflevector_nxv16i16_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1010,8 +1010,8 @@ define @shufflevector_nxv16i16_1() { ; RV32-LABEL: name: shufflevector_nxv16i16_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1034,8 +1034,8 @@ define @shufflevector_nxv16i16_2( %a) { ; RV32-NEXT: liveins: $v8m4 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s16) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1058,8 +1058,8 @@ define @shufflevector_nxv1i32_0() { ; RV32-LABEL: name: shufflevector_nxv1i32_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1080,8 +1080,8 @@ define @shufflevector_nxv1i32_1() { ; RV32-LABEL: name: shufflevector_nxv1i32_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1104,8 +1104,8 @@ define @shufflevector_nxv1i32_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1128,8 +1128,8 @@ define @shufflevector_nxv2i32_0() { ; RV32-LABEL: name: shufflevector_nxv2i32_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1150,8 +1150,8 @@ define @shufflevector_nxv2i32_1() { ; RV32-LABEL: name: shufflevector_nxv2i32_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1174,8 +1174,8 @@ define @shufflevector_nxv2i32_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1198,8 +1198,8 @@ define @shufflevector_nxv4i32_0() { ; RV32-LABEL: name: shufflevector_nxv4i32_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -1220,8 +1220,8 @@ define @shufflevector_nxv4i32_1() { ; RV32-LABEL: name: shufflevector_nxv4i32_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -1244,8 +1244,8 @@ define @shufflevector_nxv4i32_2( %a) { ; RV32-NEXT: liveins: $v8m2 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -1268,8 +1268,8 @@ define @shufflevector_nxv8i32_0() { ; RV32-LABEL: name: shufflevector_nxv8i32_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1290,8 +1290,8 @@ define @shufflevector_nxv8i32_1() { ; RV32-LABEL: name: shufflevector_nxv8i32_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1314,8 +1314,8 @@ define @shufflevector_nxv8i32_2( %a) { ; RV32-NEXT: liveins: $v8m4 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1338,8 +1338,8 @@ define @shufflevector_nxv16i32_0() { ; RV32-LABEL: name: shufflevector_nxv16i32_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m8 @@ -1360,8 +1360,8 @@ define @shufflevector_nxv16i32_1() { ; RV32-LABEL: name: shufflevector_nxv16i32_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m8 @@ -1384,8 +1384,8 @@ define @shufflevector_nxv16i32_2( %a) { ; RV32-NEXT: liveins: $v8m8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s32) ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m8 @@ -1408,8 +1408,8 @@ define @shufflevector_nxv1i64_0() { ; RV32-LABEL: name: shufflevector_nxv1i64_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1430,8 +1430,8 @@ define @shufflevector_nxv1i64_1() { ; RV32-LABEL: name: shufflevector_nxv1i64_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1454,8 +1454,8 @@ define @shufflevector_nxv1i64_2( %a) { ; RV32-NEXT: liveins: $v8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8 @@ -1478,8 +1478,8 @@ define @shufflevector_nxv2i64_0() { ; RV32-LABEL: name: shufflevector_nxv2i64_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -1500,8 +1500,8 @@ define @shufflevector_nxv2i64_1() { ; RV32-LABEL: name: shufflevector_nxv2i64_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -1524,8 +1524,8 @@ define @shufflevector_nxv2i64_2( %a) { ; RV32-NEXT: liveins: $v8m2 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m2 @@ -1548,8 +1548,8 @@ define @shufflevector_nxv4i64_0() { ; RV32-LABEL: name: shufflevector_nxv4i64_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1570,8 +1570,8 @@ define @shufflevector_nxv4i64_1() { ; RV32-LABEL: name: shufflevector_nxv4i64_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1594,8 +1594,8 @@ define @shufflevector_nxv4i64_2( %a) { ; RV32-NEXT: liveins: $v8m4 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m4 @@ -1618,8 +1618,8 @@ define @shufflevector_nxv8i64_0() { ; RV32-LABEL: name: shufflevector_nxv8i64_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m8 @@ -1640,8 +1640,8 @@ define @shufflevector_nxv8i64_1() { ; RV32-LABEL: name: shufflevector_nxv8i64_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m8 @@ -1664,8 +1664,8 @@ define @shufflevector_nxv8i64_2( %a) { ; RV32-NEXT: liveins: $v8m8 ; RV32-NEXT: {{ $}} ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() ; RV32-NEXT: PseudoRET implicit $v8m8 @@ -1688,8 +1688,8 @@ define @shufflevector_nxv16i64_0() { ; RV32-LABEL: name: shufflevector_nxv16i64_0 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() ; RV32-NEXT: $v8m8 = COPY [[UV]]() @@ -1714,8 +1714,8 @@ define @shufflevector_nxv16i64_1() { ; RV32-LABEL: name: shufflevector_nxv16i64_1 ; RV32: bb.1 (%ir-block.0): ; RV32-NEXT: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[DEF]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() ; RV32-NEXT: $v8m8 = COPY [[UV]]() @@ -1744,8 +1744,8 @@ define @shufflevector_nxv16i64_2( %a) { ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m8 ; RV32-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 ; RV32-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_() = G_CONCAT_VECTORS [[COPY]](), [[COPY1]]() - ; RV32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s64) + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](), [[C]](s32) ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[EVEC]](s64) ; RV32-NEXT: [[UV:%[0-9]+]]:_(), [[UV1:%[0-9]+]]:_() = G_UNMERGE_VALUES [[SPLAT_VECTOR]]() ; RV32-NEXT: $v8m8 = COPY [[UV]]() diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index cc332df271043..2326599bf3513 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -103,8 +103,8 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+zvkt %s -o - | FileCheck --check-prefix=RV32ZVKT %s ; RUN: llc -mtriple=riscv32 -mattr=+zvfh %s -o - | FileCheck --check-prefix=RV32ZVFH %s ; RUN: llc -mtriple=riscv32 -mattr=+zicond %s -o - | FileCheck --check-prefix=RV32ZICOND %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zimop %s -o - | FileCheck --check-prefix=RV32ZIMOP %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zcmop %s -o - | FileCheck --check-prefix=RV32ZCMOP %s +; RUN: llc -mtriple=riscv32 -mattr=+zimop %s -o - | FileCheck --check-prefix=RV32ZIMOP %s +; RUN: llc -mtriple=riscv32 -mattr=+zcmop %s -o - | FileCheck --check-prefix=RV32ZCMOP %s ; RUN: llc -mtriple=riscv32 -mattr=+smaia %s -o - | FileCheck --check-prefixes=CHECK,RV32SMAIA %s ; RUN: llc -mtriple=riscv32 -mattr=+ssaia %s -o - | FileCheck --check-prefixes=CHECK,RV32SSAIA %s ; RUN: llc -mtriple=riscv32 -mattr=+smepmp %s -o - | FileCheck --check-prefixes=CHECK,RV32SMEPMP %s @@ -233,8 +233,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+zvkt %s -o - | FileCheck --check-prefix=RV64ZVKT %s ; RUN: llc -mtriple=riscv64 -mattr=+zvfh %s -o - | FileCheck --check-prefix=RV64ZVFH %s ; RUN: llc -mtriple=riscv64 -mattr=+zicond %s -o - | FileCheck --check-prefix=RV64ZICOND %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zimop %s -o - | FileCheck --check-prefix=RV64ZIMOP %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zcmop %s -o - | FileCheck --check-prefix=RV64ZCMOP %s +; RUN: llc -mtriple=riscv64 -mattr=+zimop %s -o - | FileCheck --check-prefix=RV64ZIMOP %s +; RUN: llc -mtriple=riscv64 -mattr=+zcmop %s -o - | FileCheck --check-prefix=RV64ZCMOP %s ; RUN: llc -mtriple=riscv64 -mattr=+smaia %s -o - | FileCheck --check-prefixes=CHECK,RV64SMAIA %s ; RUN: llc -mtriple=riscv64 -mattr=+ssaia %s -o - | FileCheck --check-prefixes=CHECK,RV64SSAIA %s ; RUN: llc -mtriple=riscv64 -mattr=+smepmp %s -o - | FileCheck --check-prefixes=CHECK,RV64SMEPMP %s @@ -358,8 +358,8 @@ ; RV32ZVKT: .attribute 5, "rv32i2p1_zicsr2p0_zve32x1p0_zvkt1p0_zvl32b1p0" ; RV32ZVFH: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfhmin1p0_zve32f1p0_zve32x1p0_zvfh1p0_zvfhmin1p0_zvl32b1p0" ; RV32ZICOND: .attribute 5, "rv32i2p1_zicond1p0" -; RV32ZIMOP: .attribute 5, "rv32i2p1_zimop0p1" -; RV32ZCMOP: .attribute 5, "rv32i2p1_zca1p0_zcmop0p2" +; RV32ZIMOP: .attribute 5, "rv32i2p1_zimop1p0" +; RV32ZCMOP: .attribute 5, "rv32i2p1_zca1p0_zcmop1p0" ; RV32SMAIA: .attribute 5, "rv32i2p1_smaia1p0" ; RV32SSAIA: .attribute 5, "rv32i2p1_ssaia1p0" ; RV32SMEPMP: .attribute 5, "rv32i2p1_smepmp1p0" @@ -487,8 +487,8 @@ ; RV64ZVKT: .attribute 5, "rv64i2p1_zicsr2p0_zve32x1p0_zvkt1p0_zvl32b1p0" ; RV64ZVFH: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfhmin1p0_zve32f1p0_zve32x1p0_zvfh1p0_zvfhmin1p0_zvl32b1p0" ; RV64ZICOND: .attribute 5, "rv64i2p1_zicond1p0" -; RV64ZIMOP: .attribute 5, "rv64i2p1_zimop0p1" -; RV64ZCMOP: .attribute 5, "rv64i2p1_zca1p0_zcmop0p2" +; RV64ZIMOP: .attribute 5, "rv64i2p1_zimop1p0" +; RV64ZCMOP: .attribute 5, "rv64i2p1_zca1p0_zcmop1p0" ; RV64SMAIA: .attribute 5, "rv64i2p1_smaia1p0" ; RV64SSAIA: .attribute 5, "rv64i2p1_ssaia1p0" ; RV64SMEPMP: .attribute 5, "rv64i2p1_smepmp1p0" diff --git a/llvm/test/CodeGen/RISCV/compress-opt-branch.ll b/llvm/test/CodeGen/RISCV/compress-opt-branch.ll index b6ae6419c4cd0..f740e7fc4e313 100644 --- a/llvm/test/CodeGen/RISCV/compress-opt-branch.ll +++ b/llvm/test/CodeGen/RISCV/compress-opt-branch.ll @@ -3,45 +3,32 @@ ; The optimization should appear only with +c, otherwise default isel should be ; choosen. ; -; RUN: cat %s | sed 's/CMPCOND/eq/g' | sed 's/RESBRNORMAL/bne/g' | \ -; RUN: sed 's/RESBROPT/c.bnez/g' > %t.compress_eq ; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+c,+f,+d -filetype=obj \ -; RUN: -disable-block-placement < %t.compress_eq \ +; RUN: -disable-block-placement < %s \ ; RUN: | llvm-objdump -d --triple=riscv32 --mattr=+c,+f,+d -M no-aliases - \ -; RUN: | FileCheck -check-prefix=RV32IFDC %t.compress_eq +; RUN: | FileCheck -check-prefix=RV32IFDC %s ; -; RUN: cat %s | sed -e 's/CMPCOND/eq/g' | sed -e 's/RESBRNORMAL/bne/g'\ -; RUN: | sed -e 's/RESBROPT/c.bnez/g' > %t.nocompr_eq -; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=-c,+f,+d -filetype=obj \ -; RUN: -disable-block-placement < %t.nocompr_eq \ -; RUN: | llvm-objdump -d --triple=riscv32 --mattr=-c,+f,+d -M no-aliases - \ -; RUN: | FileCheck -check-prefix=RV32IFD %t.nocompr_eq +; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+zca,+f,+d -filetype=obj \ +; RUN: -disable-block-placement < %s \ +; RUN: | llvm-objdump -d --triple=riscv32 --mattr=+zca,+f,+d -M no-aliases - \ +; RUN: | FileCheck -check-prefix=RV32IFDC %s ; -; RUN: cat %s | sed 's/CMPCOND/ne/g' | sed 's/RESBRNORMAL/beq/g' | \ -; RUN: sed 's/RESBROPT/c.beqz/g' > %t.compress_neq -; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+c,+f,+d -filetype=obj \ -; RUN: -disable-block-placement < %t.compress_neq \ -; RUN: | llvm-objdump -d --triple=riscv32 --mattr=+c,+f,+d -M no-aliases - \ -; RUN: | FileCheck -check-prefix=RV32IFDC %t.compress_neq -; -; RUN: cat %s | sed -e 's/CMPCOND/ne/g' | sed -e 's/RESBRNORMAL/beq/g'\ -; RUN: | sed -e 's/RESBROPT/c.beqz/g' > %t.nocompr_neq ; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=-c,+f,+d -filetype=obj \ -; RUN: -disable-block-placement < %t.nocompr_neq \ +; RUN: -disable-block-placement < %s \ ; RUN: | llvm-objdump -d --triple=riscv32 --mattr=-c,+f,+d -M no-aliases - \ -; RUN: | FileCheck -check-prefix=RV32IFD %t.nocompr_neq +; RUN: | FileCheck -check-prefix=RV32IFD %s ; constant is small and fit in 6 bit (compress imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : ; RV32IFDC: c.li [[REG:.*]], 0x14 -; RV32IFDC: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; RV32IFDC: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, 0x14 -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_small_pos(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, 20 +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_small_pos_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, 20 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -56,15 +43,84 @@ if.end: } ; constant is small and fit in 6 bit (compress imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : +; RV32IFDC: c.li [[REG:.*]], 0x14 +; RV32IFDC: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, 0x14 +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_small_pos_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, 20 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is small and fit in 6 bit (compress imm) +; RV32IFDC-LABEL: : ; RV32IFDC: c.li [[REG:.*]], -0x14 -; RV32IFDC: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; RV32IFDC: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, -0x14 -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_small_neg(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, -20 +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_small_neg_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, -20 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is small and fit in 6 bit (compress imm) +; RV32IFDC-LABEL: : +; RV32IFDC: c.li [[REG:.*]], -0x14 +; RV32IFDC: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, -0x14 +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_small_neg_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, -20 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is small and fit in 6 bit (compress imm) +; RV32IFDC-LABEL: : +; RV32IFDC: c.li [[REG:.*]], 0x1f +; RV32IFDC: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, 0x1f +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_small_edge_pos_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, 31 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -79,15 +135,15 @@ if.end: } ; constant is small and fit in 6 bit (compress imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : ; RV32IFDC: c.li [[REG:.*]], 0x1f -; RV32IFDC: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; RV32IFDC: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, 0x1f -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_small_edge_pos(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, 31 +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_small_edge_pos_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, 31 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -102,15 +158,38 @@ if.end: } ; constant is small and fit in 6 bit (compress imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : ; RV32IFDC: c.li [[REG:.*]], -0x20 -; RV32IFDC: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; RV32IFDC: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, -0x20 -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_small_edge_neg(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, -32 +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_small_edge_neg_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, -32 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is small and fit in 6 bit (compress imm) +; RV32IFDC-LABEL: : +; RV32IFDC: c.li [[REG:.*]], -0x20 +; RV32IFDC: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, -0x20 +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_small_edge_neg_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, -32 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -126,15 +205,15 @@ if.end: ; constant is medium and not fit in 6 bit (compress imm), ; but fit in 12 bit (imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : ; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], -0x20 -; RV32IFDC: RESBROPT [[MAYZEROREG]], [[PLACE:.*]] +; RV32IFDC: c.bnez [[MAYZEROREG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, 0x20 -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_medium_ledge_pos(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, 32 +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_ledge_pos_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, 32 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -150,15 +229,39 @@ if.end: ; constant is medium and not fit in 6 bit (compress imm), ; but fit in 12 bit (imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : +; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], -0x20 +; RV32IFDC: c.beqz [[MAYZEROREG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, 0x20 +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_ledge_pos_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, 32 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is medium and not fit in 6 bit (compress imm), +; but fit in 12 bit (imm) +; RV32IFDC-LABEL: : ; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], 0x21 -; RV32IFDC: RESBROPT [[MAYZEROREG]], [[PLACE:.*]] +; RV32IFDC: c.bnez [[MAYZEROREG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, -0x21 -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_medium_ledge_neg(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, -33 +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_ledge_neg_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, -33 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -174,15 +277,39 @@ if.end: ; constant is medium and not fit in 6 bit (compress imm), ; but fit in 12 bit (imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : +; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], 0x21 +; RV32IFDC: c.beqz [[MAYZEROREG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, -0x21 +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_ledge_neg_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, -33 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is medium and not fit in 6 bit (compress imm), +; but fit in 12 bit (imm) +; RV32IFDC-LABEL: : ; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], -0x3f -; RV32IFDC: RESBROPT [[MAYZEROREG]], [[PLACE:.*]] +; RV32IFDC: c.bnez [[MAYZEROREG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, 0x3f -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_medium_pos(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, 63 +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_pos_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, 63 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -198,15 +325,39 @@ if.end: ; constant is medium and not fit in 6 bit (compress imm), ; but fit in 12 bit (imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : +; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], -0x3f +; RV32IFDC: c.beqz [[MAYZEROREG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, 0x3f +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_pos_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, 63 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is medium and not fit in 6 bit (compress imm), +; but fit in 12 bit (imm) +; RV32IFDC-LABEL: : ; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], 0x3f -; RV32IFDC: RESBROPT [[MAYZEROREG]], [[PLACE:.*]] +; RV32IFDC: c.bnez [[MAYZEROREG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, -0x3f -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_medium_neg(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, -63 +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_neg_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, -63 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -222,15 +373,87 @@ if.end: ; constant is medium and not fit in 6 bit (compress imm), ; but fit in 12 bit (imm) -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : +; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], 0x3f +; RV32IFDC: c.beqz [[MAYZEROREG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, -0x3f +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_neg_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, -63 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is medium and not fit in 6 bit (compress imm), +; but fit in 12 bit (imm) +; RV32IFDC-LABEL: : ; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], -0x7ff -; RV32IFDC: RESBROPT [[MAYZEROREG]], [[PLACE:.*]] +; RV32IFDC: c.bnez [[MAYZEROREG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, 0x7ff -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_medium_bedge_pos(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, 2047 +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_bedge_pos_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, 2047 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is medium and not fit in 6 bit (compress imm), +; but fit in 12 bit (imm) +; RV32IFDC-LABEL: : +; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], -0x7ff +; RV32IFDC: c.beqz [[MAYZEROREG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, 0x7ff +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_bedge_pos_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, 2047 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is medium and not fit in 6 bit (compress imm), +; but fit in 12 bit (imm), negative value fit in 12 bit too. +; RV32IFDC-LABEL: : +; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], 0x7ff +; RV32IFDC: c.bnez [[MAYZEROREG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG:.*]], zero, -0x7ff +; RV32IFD: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_bedge_neg_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, -2047 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -246,15 +469,65 @@ if.end: ; constant is medium and not fit in 6 bit (compress imm), ; but fit in 12 bit (imm), negative value fit in 12 bit too. -; RV32IFDC-LABEL: : +; RV32IFDC-LABEL: : ; RV32IFDC: addi [[MAYZEROREG:.*]], [[REG:.*]], 0x7ff -; RV32IFDC: RESBROPT [[MAYZEROREG]], [[PLACE:.*]] +; RV32IFDC: c.beqz [[MAYZEROREG]], [[PLACE:.*]] ; --- no compress extension -; RV32IFD-LABEL: : +; RV32IFD-LABEL: : ; RV32IFD: addi [[REG:.*]], zero, -0x7ff -; RV32IFD: RESBRNORMAL [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] -define i32 @f_medium_bedge_neg(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, -2047 +; RV32IFD: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +define i32 @f_medium_bedge_neg_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, -2047 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is big and do not fit in 12 bit (imm), fit in i32 +; RV32IFDC-LABEL: : +; RV32IFDC: c.li [[REG:.*]], 0x1 +; RV32IFDC: c.slli [[REG]], 0xb +; RV32IFDC: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG1:.*]], zero, 0x1 +; RV32IFD: slli [[REG2:.*]], [[REG1]], 0xb +; RV32IFD: bne [[ANOTHER:.*]], [[REG2]], [[PLACE:.*]] +define i32 @f_big_ledge_pos_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, 2048 + br i1 %cmp, label %if.then, label %if.else +if.then: + %call = shl i32 %in0, 1 + br label %if.end +if.else: + %call2 = add i32 %in0, 42 + br label %if.end + +if.end: + %toRet = phi i32 [ %call, %if.then ], [ %call2, %if.else ] + ret i32 %toRet +} + +; constant is big and do not fit in 12 bit (imm), fit in i32 +; RV32IFDC-LABEL: : +; RV32IFDC: c.li [[REG:.*]], 0x1 +; RV32IFDC: c.slli [[REG]], 0xb +; RV32IFDC: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] +; --- no compress extension +; RV32IFD-LABEL: : +; RV32IFD: addi [[REG1:.*]], zero, 0x1 +; RV32IFD: slli [[REG2:.*]], [[REG1]], 0xb +; RV32IFD: beq [[ANOTHER:.*]], [[REG2]], [[PLACE:.*]] +define i32 @f_big_ledge_pos_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, 2048 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -269,12 +542,17 @@ if.end: } ; constant is big and do not fit in 12 bit (imm), fit in i32 -; RV32IFDC-LABEL: : -; RV32IFDC-NOT: RESBROPT +; RV32IFDC-LABEL: : +; RV32IFDC: c.lui [[REG1:.*]], 0xfffff +; RV32IFDC: addi [[REG2:.*]], [[REG1]], 0x7ff +; RV32IFDC: bne [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] ; --- no compress extension -; nothing to check. -define i32 @f_big_ledge_pos(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, 2048 +; RV32IFD-LABEL: : +; RV32IFD: lui [[REG1:.*]], 0xfffff +; RV32IFD: addi [[REG2:.*]], [[REG1]], 0x7ff +; RV32IFD: bne [[ANOTHER:.*]], [[REG2]], [[PLACE:.*]] +define i32 @f_big_ledge_neg_eq(i32 %in0) minsize { + %cmp = icmp eq i32 %in0, -2049 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 @@ -289,12 +567,17 @@ if.end: } ; constant is big and do not fit in 12 bit (imm), fit in i32 -; RV32IFDC-LABEL: : -; RV32IFDC-NOT: c.beqz +; RV32IFDC-LABEL: : +; RV32IFDC: c.lui [[REG1:.*]], 0xfffff +; RV32IFDC: addi [[REG2:.*]], [[REG1]], 0x7ff +; RV32IFDC: beq [[ANOTHER:.*]], [[REG]], [[PLACE:.*]] ; --- no compress extension -; nothing to check. -define i32 @f_big_ledge_neg(i32 %in0) minsize { - %cmp = icmp CMPCOND i32 %in0, -2048 +; RV32IFD-LABEL: : +; RV32IFD: lui [[REG1:.*]], 0xfffff +; RV32IFD: addi [[REG2:.*]], [[REG1]], 0x7ff +; RV32IFD: beq [[ANOTHER:.*]], [[REG2]], [[PLACE:.*]] +define i32 @f_big_ledge_neg_ne(i32 %in0) minsize { + %cmp = icmp ne i32 %in0, -2049 br i1 %cmp, label %if.then, label %if.else if.then: %call = shl i32 %in0, 1 diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll index e22720d6b3e2b..50c789f8f86dc 100644 --- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll +++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll @@ -6,6 +6,13 @@ ; RUN: llc -mtriple riscv32-unknown-elf -mattr=+f,+d -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-FD ; +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+f -target-abi ilp32 -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-F-ILP3 +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+f,+d -target-abi ilp32f -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-FD-ILP32F +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+f,+d -target-abi ilp32 -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV32-FD-ILP32 + ; RUN: llc -mtriple riscv32-unknown-elf -mattr=+i -target-abi ilp32e -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV32I-ILP32E ; RUN: llc -mtriple riscv32-unknown-elf -mattr=+e -o - %s \ @@ -19,6 +26,13 @@ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-F ; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f,+d -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-FD + +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f -target-abi=lp64 -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-F-LP64 +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f,+d -target-abi=lp64f -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-FD-LP64F +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+f,+d -target-abi=lp64 -o - %s \ +; RUN: 2>&1 | FileCheck %s -check-prefix CHECK -check-prefix CHECK-RV64-FD-LP64 ; ; RUN: llc -mtriple riscv64-unknown-elf -mattr=+i -target-abi lp64e -o - %s \ ; RUN: 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-RV64I-LP64E @@ -305,6 +319,315 @@ define void @foo_with_call() #1 { ; CHECK-RV32-FD-NEXT: addi sp, sp, 320 ; CHECK-RV32-FD-NEXT: mret ; +; CHECK-RV32-F-ILP3-LABEL: foo_with_call: +; CHECK-RV32-F-ILP3: # %bb.0: +; CHECK-RV32-F-ILP3-NEXT: addi sp, sp, -192 +; CHECK-RV32-F-ILP3-NEXT: sw ra, 188(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t0, 184(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t1, 180(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t2, 176(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a0, 172(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a1, 168(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a2, 164(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a3, 160(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a4, 156(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a5, 152(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a6, 148(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a7, 144(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t3, 140(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t4, 136(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t5, 132(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t6, 128(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: call otherfoo +; CHECK-RV32-F-ILP3-NEXT: lw ra, 188(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t0, 184(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t1, 180(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t2, 176(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a0, 172(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a1, 168(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a2, 164(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a3, 160(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a4, 156(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a5, 152(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a6, 148(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a7, 144(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t3, 140(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t4, 136(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t5, 132(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t6, 128(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: addi sp, sp, 192 +; CHECK-RV32-F-ILP3-NEXT: mret +; +; CHECK-RV32-FD-ILP32F-LABEL: foo_with_call: +; CHECK-RV32-FD-ILP32F: # %bb.0: +; CHECK-RV32-FD-ILP32F-NEXT: addi sp, sp, -320 +; CHECK-RV32-FD-ILP32F-NEXT: sw ra, 316(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t0, 312(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t1, 308(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t2, 304(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a0, 300(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a1, 296(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a2, 292(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a3, 288(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a4, 284(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a5, 280(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a6, 276(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a7, 272(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t3, 268(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t4, 264(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t5, 260(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t6, 256(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: call otherfoo +; CHECK-RV32-FD-ILP32F-NEXT: lw ra, 316(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t0, 312(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t1, 308(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t2, 304(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a0, 300(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a1, 296(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a2, 292(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a3, 288(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a4, 284(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a5, 280(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a6, 276(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a7, 272(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t3, 268(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t4, 264(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t5, 260(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t6, 256(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: addi sp, sp, 320 +; CHECK-RV32-FD-ILP32F-NEXT: mret +; +; CHECK-RV32-FD-ILP32-LABEL: foo_with_call: +; CHECK-RV32-FD-ILP32: # %bb.0: +; CHECK-RV32-FD-ILP32-NEXT: addi sp, sp, -320 +; CHECK-RV32-FD-ILP32-NEXT: sw ra, 316(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t0, 312(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t1, 308(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t2, 304(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a0, 300(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a1, 296(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a2, 292(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a3, 288(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a4, 284(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a5, 280(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a6, 276(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a7, 272(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t3, 268(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t4, 264(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t5, 260(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t6, 256(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: call otherfoo +; CHECK-RV32-FD-ILP32-NEXT: lw ra, 316(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t0, 312(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t1, 308(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t2, 304(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a0, 300(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a1, 296(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a2, 292(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a3, 288(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a4, 284(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a5, 280(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a6, 276(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a7, 272(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t3, 268(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t4, 264(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t5, 260(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t6, 256(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: addi sp, sp, 320 +; CHECK-RV32-FD-ILP32-NEXT: mret +; ; CHECK-RV32I-ILP32E-LABEL: foo_with_call: ; CHECK-RV32I-ILP32E: # %bb.0: ; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, -104 @@ -727,6 +1050,315 @@ define void @foo_with_call() #1 { ; CHECK-RV64-FD-NEXT: addi sp, sp, 384 ; CHECK-RV64-FD-NEXT: mret ; +; CHECK-RV64-F-LP64-LABEL: foo_with_call: +; CHECK-RV64-F-LP64: # %bb.0: +; CHECK-RV64-F-LP64-NEXT: addi sp, sp, -256 +; CHECK-RV64-F-LP64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t0, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t1, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t2, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a0, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a1, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a3, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a4, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a5, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a6, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a7, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t3, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t4, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t5, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t6, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft0, 124(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft1, 120(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft2, 116(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft3, 112(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft4, 108(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft5, 104(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft6, 100(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft7, 96(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs0, 92(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs1, 88(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa0, 84(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa1, 80(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa2, 76(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa3, 72(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa4, 68(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa5, 64(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa6, 60(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa7, 56(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs2, 52(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs3, 48(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs4, 44(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs5, 40(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs6, 36(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs7, 32(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs8, 28(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs9, 24(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs10, 20(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs11, 16(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft8, 12(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft9, 8(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft10, 4(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft11, 0(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: call otherfoo +; CHECK-RV64-F-LP64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t0, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t1, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t2, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a1, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a2, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a3, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a4, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a5, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a6, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a7, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t3, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t4, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t5, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t6, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft0, 124(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft1, 120(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft2, 116(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft3, 112(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft4, 108(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft5, 104(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft6, 100(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft7, 96(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs0, 92(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs1, 88(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa0, 84(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa1, 80(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa2, 76(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa3, 72(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa4, 68(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa5, 64(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa6, 60(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa7, 56(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs2, 52(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs3, 48(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs4, 44(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs5, 40(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs6, 36(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs7, 32(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs8, 28(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs9, 24(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs10, 20(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs11, 16(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft8, 12(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft9, 8(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft10, 4(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft11, 0(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: addi sp, sp, 256 +; CHECK-RV64-F-LP64-NEXT: mret +; +; CHECK-RV64-FD-LP64F-LABEL: foo_with_call: +; CHECK-RV64-FD-LP64F: # %bb.0: +; CHECK-RV64-FD-LP64F-NEXT: addi sp, sp, -384 +; CHECK-RV64-FD-LP64F-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t0, 368(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t1, 360(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t2, 352(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a0, 344(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a2, 328(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a3, 320(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a4, 312(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a5, 304(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a6, 296(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a7, 288(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t3, 280(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t4, 272(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t5, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: call otherfoo +; CHECK-RV64-FD-LP64F-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t0, 368(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t1, 360(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t2, 352(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a0, 344(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a1, 336(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a2, 328(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a3, 320(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a4, 312(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a5, 304(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a6, 296(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a7, 288(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t3, 280(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t4, 272(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t5, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t6, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: addi sp, sp, 384 +; CHECK-RV64-FD-LP64F-NEXT: mret +; +; CHECK-RV64-FD-LP64-LABEL: foo_with_call: +; CHECK-RV64-FD-LP64: # %bb.0: +; CHECK-RV64-FD-LP64-NEXT: addi sp, sp, -384 +; CHECK-RV64-FD-LP64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t0, 368(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t1, 360(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t2, 352(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a0, 344(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a1, 336(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a2, 328(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a3, 320(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a4, 312(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a5, 304(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a6, 296(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a7, 288(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t3, 280(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t4, 272(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t5, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t6, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft0, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft1, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft2, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft3, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft4, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft5, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft6, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft7, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs0, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs1, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa0, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa1, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa2, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa3, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa4, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa5, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa6, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa7, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs2, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs3, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs4, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs5, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs6, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs7, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs8, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs9, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs10, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs11, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft8, 24(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft9, 16(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft10, 8(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft11, 0(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: call otherfoo +; CHECK-RV64-FD-LP64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t0, 368(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t1, 360(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t2, 352(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a0, 344(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a1, 336(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a2, 328(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a3, 320(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a4, 312(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a5, 304(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a6, 296(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a7, 288(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t3, 280(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t4, 272(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t5, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t6, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft0, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft1, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft2, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft3, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft4, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft5, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft6, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft7, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs0, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs1, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa0, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa1, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa2, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa3, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa4, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa5, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa6, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa7, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs2, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs3, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs4, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs5, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs6, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs7, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs8, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs9, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs10, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs11, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft8, 24(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft9, 16(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft10, 8(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft11, 0(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: addi sp, sp, 384 +; CHECK-RV64-FD-LP64-NEXT: mret +; ; CHECK-RV64I-LP64E-LABEL: foo_with_call: ; CHECK-RV64I-LP64E: # %bb.0: ; CHECK-RV64I-LP64E-NEXT: addi sp, sp, -208 @@ -1289,6 +1921,324 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV32-FD-NEXT: addi sp, sp, 336 ; CHECK-RV32-FD-NEXT: mret ; +; CHECK-RV32-F-ILP3-LABEL: foo_fp_with_call: +; CHECK-RV32-F-ILP3: # %bb.0: +; CHECK-RV32-F-ILP3-NEXT: addi sp, sp, -208 +; CHECK-RV32-F-ILP3-NEXT: sw ra, 204(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t0, 200(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t1, 196(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t2, 192(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw s0, 188(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a0, 184(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a1, 180(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a2, 176(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a3, 172(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a4, 168(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a5, 164(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a6, 160(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw a7, 156(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t3, 152(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t4, 148(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t5, 144(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: sw t6, 140(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft0, 136(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft1, 132(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft2, 128(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft3, 124(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft4, 120(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft5, 116(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft6, 112(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft7, 108(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs0, 104(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs1, 100(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa0, 96(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa1, 92(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa2, 88(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa3, 84(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa4, 80(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa5, 76(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa6, 72(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fa7, 68(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs2, 64(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs3, 60(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs4, 56(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs5, 52(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs6, 48(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs7, 44(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs8, 40(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs9, 36(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs10, 32(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw fs11, 28(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft8, 24(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft9, 20(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft10, 16(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: fsw ft11, 12(sp) # 4-byte Folded Spill +; CHECK-RV32-F-ILP3-NEXT: addi s0, sp, 208 +; CHECK-RV32-F-ILP3-NEXT: call otherfoo +; CHECK-RV32-F-ILP3-NEXT: lw ra, 204(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t0, 200(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t1, 196(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t2, 192(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw s0, 188(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a0, 184(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a1, 180(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a2, 176(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a3, 172(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a4, 168(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a5, 164(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a6, 160(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw a7, 156(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t3, 152(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t4, 148(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t5, 144(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: lw t6, 140(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft0, 136(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft1, 132(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft2, 128(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft3, 124(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft4, 120(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft5, 116(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft6, 112(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft7, 108(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs0, 104(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs1, 100(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa0, 96(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa1, 92(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa2, 88(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa3, 84(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa4, 80(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa5, 76(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa6, 72(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fa7, 68(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs2, 64(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs3, 60(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs4, 56(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs5, 52(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs6, 48(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs7, 44(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs8, 40(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs9, 36(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs10, 32(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw fs11, 28(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft8, 24(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft9, 20(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft10, 16(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: flw ft11, 12(sp) # 4-byte Folded Reload +; CHECK-RV32-F-ILP3-NEXT: addi sp, sp, 208 +; CHECK-RV32-F-ILP3-NEXT: mret +; +; CHECK-RV32-FD-ILP32F-LABEL: foo_fp_with_call: +; CHECK-RV32-FD-ILP32F: # %bb.0: +; CHECK-RV32-FD-ILP32F-NEXT: addi sp, sp, -336 +; CHECK-RV32-FD-ILP32F-NEXT: sw ra, 332(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t0, 328(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t1, 324(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t2, 320(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw s0, 316(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a0, 312(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a1, 308(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a2, 304(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a3, 300(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a4, 296(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a5, 292(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a6, 288(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw a7, 284(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t3, 280(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t4, 276(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t5, 272(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: sw t6, 268(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft0, 256(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft1, 248(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft2, 240(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft3, 232(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft4, 224(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft5, 216(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft6, 208(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft7, 200(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs0, 192(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs1, 184(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa0, 176(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa1, 168(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa2, 160(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa3, 152(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa4, 144(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa6, 128(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fa7, 120(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs2, 112(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs3, 104(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs4, 96(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs5, 88(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs6, 80(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs7, 72(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs8, 64(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs9, 56(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs10, 48(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd fs11, 40(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft8, 32(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft9, 24(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft10, 16(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: fsd ft11, 8(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32F-NEXT: addi s0, sp, 336 +; CHECK-RV32-FD-ILP32F-NEXT: call otherfoo +; CHECK-RV32-FD-ILP32F-NEXT: lw ra, 332(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t0, 328(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t1, 324(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t2, 320(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw s0, 316(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a0, 312(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a1, 308(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a2, 304(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a3, 300(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a4, 296(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a5, 292(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a6, 288(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw a7, 284(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t3, 280(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t4, 276(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t5, 272(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: lw t6, 268(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft0, 256(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft1, 248(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft2, 240(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft3, 232(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft4, 224(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft5, 216(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft6, 208(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft7, 200(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs0, 192(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs1, 184(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa0, 176(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa1, 168(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa2, 160(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa3, 152(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa4, 144(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa6, 128(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fa7, 120(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs2, 112(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs3, 104(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs4, 96(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs5, 88(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs6, 80(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs7, 72(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs8, 64(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs9, 56(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs10, 48(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld fs11, 40(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft8, 32(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft9, 24(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft10, 16(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32F-NEXT: addi sp, sp, 336 +; CHECK-RV32-FD-ILP32F-NEXT: mret +; +; CHECK-RV32-FD-ILP32-LABEL: foo_fp_with_call: +; CHECK-RV32-FD-ILP32: # %bb.0: +; CHECK-RV32-FD-ILP32-NEXT: addi sp, sp, -336 +; CHECK-RV32-FD-ILP32-NEXT: sw ra, 332(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t0, 328(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t1, 324(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t2, 320(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw s0, 316(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a0, 312(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a1, 308(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a2, 304(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a3, 300(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a4, 296(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a5, 292(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a6, 288(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw a7, 284(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t3, 280(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t4, 276(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t5, 272(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: sw t6, 268(sp) # 4-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft0, 256(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft1, 248(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft2, 240(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft3, 232(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft4, 224(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft5, 216(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft6, 208(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft7, 200(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs0, 192(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs1, 184(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa0, 176(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa1, 168(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa2, 160(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa3, 152(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa4, 144(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa6, 128(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fa7, 120(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs2, 112(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs3, 104(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs4, 96(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs5, 88(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs6, 80(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs7, 72(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs8, 64(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs9, 56(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs10, 48(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd fs11, 40(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft8, 32(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft9, 24(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft10, 16(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: fsd ft11, 8(sp) # 8-byte Folded Spill +; CHECK-RV32-FD-ILP32-NEXT: addi s0, sp, 336 +; CHECK-RV32-FD-ILP32-NEXT: call otherfoo +; CHECK-RV32-FD-ILP32-NEXT: lw ra, 332(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t0, 328(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t1, 324(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t2, 320(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw s0, 316(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a0, 312(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a1, 308(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a2, 304(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a3, 300(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a4, 296(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a5, 292(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a6, 288(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw a7, 284(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t3, 280(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t4, 276(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t5, 272(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: lw t6, 268(sp) # 4-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft0, 256(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft1, 248(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft2, 240(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft3, 232(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft4, 224(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft5, 216(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft6, 208(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft7, 200(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs0, 192(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs1, 184(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa0, 176(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa1, 168(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa2, 160(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa3, 152(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa4, 144(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa6, 128(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fa7, 120(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs2, 112(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs3, 104(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs4, 96(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs5, 88(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs6, 80(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs7, 72(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs8, 64(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs9, 56(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs10, 48(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld fs11, 40(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft8, 32(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft9, 24(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft10, 16(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload +; CHECK-RV32-FD-ILP32-NEXT: addi sp, sp, 336 +; CHECK-RV32-FD-ILP32-NEXT: mret +; ; CHECK-RV32I-ILP32E-LABEL: foo_fp_with_call: ; CHECK-RV32I-ILP32E: # %bb.0: ; CHECK-RV32I-ILP32E-NEXT: addi sp, sp, -108 @@ -1729,6 +2679,324 @@ define void @foo_fp_with_call() #2 { ; CHECK-RV64-FD-NEXT: addi sp, sp, 400 ; CHECK-RV64-FD-NEXT: mret ; +; CHECK-RV64-F-LP64-LABEL: foo_fp_with_call: +; CHECK-RV64-F-LP64: # %bb.0: +; CHECK-RV64-F-LP64-NEXT: addi sp, sp, -272 +; CHECK-RV64-F-LP64-NEXT: sd ra, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t0, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t1, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t2, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd s0, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a0, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a1, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a2, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a3, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a4, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a5, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a6, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd a7, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t3, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t4, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t5, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: sd t6, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft0, 132(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft1, 128(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft2, 124(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft3, 120(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft4, 116(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft5, 112(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft6, 108(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft7, 104(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs0, 100(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs1, 96(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa0, 92(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa1, 88(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa2, 84(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa3, 80(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa4, 76(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa5, 72(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa6, 68(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fa7, 64(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs2, 60(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs3, 56(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs4, 52(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs5, 48(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs6, 44(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs7, 40(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs8, 36(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs9, 32(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs10, 28(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw fs11, 24(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft8, 20(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft9, 16(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft10, 12(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: fsw ft11, 8(sp) # 4-byte Folded Spill +; CHECK-RV64-F-LP64-NEXT: addi s0, sp, 272 +; CHECK-RV64-F-LP64-NEXT: call otherfoo +; CHECK-RV64-F-LP64-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t0, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t1, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t2, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld s0, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a1, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a2, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a3, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a4, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a5, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a6, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld a7, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t3, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t4, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t5, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: ld t6, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft0, 132(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft1, 128(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft2, 124(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft3, 120(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft4, 116(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft5, 112(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft6, 108(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft7, 104(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs0, 100(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs1, 96(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa0, 92(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa1, 88(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa2, 84(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa3, 80(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa4, 76(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa5, 72(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa6, 68(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fa7, 64(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs2, 60(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs3, 56(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs4, 52(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs5, 48(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs6, 44(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs7, 40(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs8, 36(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs9, 32(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs10, 28(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw fs11, 24(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft8, 20(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft9, 16(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft10, 12(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: flw ft11, 8(sp) # 4-byte Folded Reload +; CHECK-RV64-F-LP64-NEXT: addi sp, sp, 272 +; CHECK-RV64-F-LP64-NEXT: mret +; +; CHECK-RV64-FD-LP64F-LABEL: foo_fp_with_call: +; CHECK-RV64-FD-LP64F: # %bb.0: +; CHECK-RV64-FD-LP64F-NEXT: addi sp, sp, -400 +; CHECK-RV64-FD-LP64F-NEXT: sd ra, 392(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t0, 384(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t1, 376(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t2, 368(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd s0, 360(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a0, 352(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a1, 344(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a2, 336(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a3, 328(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a4, 320(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a5, 312(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a6, 304(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd a7, 296(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t3, 288(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t4, 280(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t5, 272(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: sd t6, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft0, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft1, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft2, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft3, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft4, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft5, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft6, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft7, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs0, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs1, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa0, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa1, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa2, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa3, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa4, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa6, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fa7, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs2, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs3, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs4, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs5, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs6, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs7, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs8, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs9, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs10, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd fs11, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft8, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft9, 24(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft10, 16(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: fsd ft11, 8(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64F-NEXT: addi s0, sp, 400 +; CHECK-RV64-FD-LP64F-NEXT: call otherfoo +; CHECK-RV64-FD-LP64F-NEXT: ld ra, 392(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t0, 384(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t1, 376(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t2, 368(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld s0, 360(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a0, 352(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a1, 344(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a2, 336(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a3, 328(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a4, 320(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a5, 312(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a6, 304(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld a7, 296(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t3, 288(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t4, 280(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t5, 272(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: ld t6, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft0, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft1, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft2, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft3, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft4, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft5, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft6, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft7, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs0, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs1, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa0, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa1, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa2, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa3, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa4, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa6, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fa7, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs2, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs3, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs4, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs5, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs6, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs7, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs8, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs9, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs10, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld fs11, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft8, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft9, 24(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft10, 16(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64F-NEXT: addi sp, sp, 400 +; CHECK-RV64-FD-LP64F-NEXT: mret +; +; CHECK-RV64-FD-LP64-LABEL: foo_fp_with_call: +; CHECK-RV64-FD-LP64: # %bb.0: +; CHECK-RV64-FD-LP64-NEXT: addi sp, sp, -400 +; CHECK-RV64-FD-LP64-NEXT: sd ra, 392(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t0, 384(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t1, 376(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t2, 368(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd s0, 360(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a0, 352(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a1, 344(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a2, 336(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a3, 328(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a4, 320(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a5, 312(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a6, 304(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd a7, 296(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t3, 288(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t4, 280(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t5, 272(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: sd t6, 264(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft0, 256(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft1, 248(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft2, 240(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft3, 232(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft4, 224(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft5, 216(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft6, 208(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft7, 200(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs0, 192(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs1, 184(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa0, 176(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa1, 168(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa2, 160(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa3, 152(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa4, 144(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa5, 136(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa6, 128(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fa7, 120(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs2, 112(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs3, 104(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs4, 96(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs5, 88(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs6, 80(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs7, 72(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs8, 64(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs9, 56(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs10, 48(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd fs11, 40(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft8, 32(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft9, 24(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft10, 16(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: fsd ft11, 8(sp) # 8-byte Folded Spill +; CHECK-RV64-FD-LP64-NEXT: addi s0, sp, 400 +; CHECK-RV64-FD-LP64-NEXT: call otherfoo +; CHECK-RV64-FD-LP64-NEXT: ld ra, 392(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t0, 384(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t1, 376(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t2, 368(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld s0, 360(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a0, 352(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a1, 344(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a2, 336(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a3, 328(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a4, 320(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a5, 312(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a6, 304(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld a7, 296(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t3, 288(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t4, 280(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t5, 272(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: ld t6, 264(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft0, 256(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft1, 248(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft2, 240(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft3, 232(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft4, 224(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft5, 216(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft6, 208(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft7, 200(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs0, 192(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs1, 184(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa0, 176(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa1, 168(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa2, 160(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa3, 152(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa4, 144(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa5, 136(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa6, 128(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fa7, 120(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs2, 112(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs3, 104(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs4, 96(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs5, 88(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs6, 80(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs7, 72(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs8, 64(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs9, 56(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs10, 48(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld fs11, 40(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft8, 32(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft9, 24(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft10, 16(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: fld ft11, 8(sp) # 8-byte Folded Reload +; CHECK-RV64-FD-LP64-NEXT: addi sp, sp, 400 +; CHECK-RV64-FD-LP64-NEXT: mret +; ; CHECK-RV64I-LP64E-LABEL: foo_fp_with_call: ; CHECK-RV64I-LP64E: # %bb.0: ; CHECK-RV64I-LP64E-NEXT: addi sp, sp, -216 diff --git a/llvm/test/CodeGen/RISCV/prolog-epilogue.ll b/llvm/test/CodeGen/RISCV/prolog-epilogue.ll new file mode 100644 index 0000000000000..50b236470ae64 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/prolog-epilogue.ll @@ -0,0 +1,534 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32,RV32I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64,RV64I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+zba < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64,RV64ZBA + +declare void @callee(ptr) + +define void @frame_16b() { +; RV32-LABEL: frame_16b: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: li a0, 0 +; RV32-NEXT: call callee +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: frame_16b: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: li a0, 0 +; RV64-NEXT: call callee +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + call void @callee(ptr null) + ret void +} + +define void @frame_1024b() { +; RV32-LABEL: frame_1024b: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -1024 +; RV32-NEXT: .cfi_def_cfa_offset 1024 +; RV32-NEXT: sw ra, 1020(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: addi a0, sp, 12 +; RV32-NEXT: call callee +; RV32-NEXT: lw ra, 1020(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 1024 +; RV32-NEXT: ret +; +; RV64-LABEL: frame_1024b: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -1024 +; RV64-NEXT: .cfi_def_cfa_offset 1024 +; RV64-NEXT: sd ra, 1016(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: call callee +; RV64-NEXT: ld ra, 1016(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 1024 +; RV64-NEXT: ret + %a = alloca [1008 x i8] + call void @callee(ptr %a) + ret void +} + +define void @frame_2048b() { +; RV32-LABEL: frame_2048b: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -2032 +; RV32-NEXT: .cfi_def_cfa_offset 2032 +; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 2048 +; RV32-NEXT: addi a0, sp, 12 +; RV32-NEXT: call callee +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: ret +; +; RV64-LABEL: frame_2048b: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -2032 +; RV64-NEXT: .cfi_def_cfa_offset 2032 +; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 2048 +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: call callee +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: ret + %a = alloca [2032 x i8] + call void @callee(ptr %a) + ret void +} + +define void @frame_4096b() { +; RV32-LABEL: frame_4096b: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -2032 +; RV32-NEXT: .cfi_def_cfa_offset 2032 +; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: addi sp, sp, -2048 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 4096 +; RV32-NEXT: addi a0, sp, 12 +; RV32-NEXT: call callee +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: ret +; +; RV64-LABEL: frame_4096b: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -2032 +; RV64-NEXT: .cfi_def_cfa_offset 2032 +; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: addi sp, sp, -2048 +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 4096 +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: call callee +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: addi sp, sp, 32 +; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: ret + %a = alloca [4080 x i8] + call void @callee(ptr %a) + ret void +} + +;; 2^12-16+2032 +define void @frame_4kb() { +; RV32-LABEL: frame_4kb: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -2032 +; RV32-NEXT: .cfi_def_cfa_offset 2032 +; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa_offset 6128 +; RV32-NEXT: addi a0, sp, 12 +; RV32-NEXT: call callee +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: ret +; +; RV64-LABEL: frame_4kb: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -2032 +; RV64-NEXT: .cfi_def_cfa_offset 2032 +; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: lui a0, 1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa_offset 6128 +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: call callee +; RV64-NEXT: lui a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: ret + %a = alloca [6112 x i8] + call void @callee(ptr %a) + ret void +} + +define void @frame_4kb_offset_128() { +; RV32I-LABEL: frame_4kb_offset_128: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 128 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_def_cfa_offset 6256 +; RV32I-NEXT: addi a0, sp, 12 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 128 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: frame_4kb_offset_128: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: addi sp, sp, -2032 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: li a0, -528 +; RV32ZBA-NEXT: sh3add sp, a0, sp +; RV32ZBA-NEXT: .cfi_def_cfa_offset 6256 +; RV32ZBA-NEXT: addi a0, sp, 12 +; RV32ZBA-NEXT: call callee +; RV32ZBA-NEXT: li a0, 528 +; RV32ZBA-NEXT: sh3add sp, a0, sp +; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: ret +; +; RV64I-LABEL: frame_4kb_offset_128: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: .cfi_def_cfa_offset 6256 +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: frame_4kb_offset_128: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: addi sp, sp, -2032 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64ZBA-NEXT: .cfi_offset ra, -8 +; RV64ZBA-NEXT: li a0, -528 +; RV64ZBA-NEXT: sh3add sp, a0, sp +; RV64ZBA-NEXT: .cfi_def_cfa_offset 6256 +; RV64ZBA-NEXT: addi a0, sp, 8 +; RV64ZBA-NEXT: call callee +; RV64ZBA-NEXT: li a0, 528 +; RV64ZBA-NEXT: sh3add sp, a0, sp +; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: ret + %a = alloca [6240 x i8] + call void @callee(ptr %a) + ret void +} + + +;; 2^13-16+2032 +define void @frame_8kb() { +; RV32-LABEL: frame_8kb: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -2032 +; RV32-NEXT: .cfi_def_cfa_offset 2032 +; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: lui a0, 2 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa_offset 10224 +; RV32-NEXT: addi a0, sp, 12 +; RV32-NEXT: call callee +; RV32-NEXT: lui a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: ret +; +; RV64-LABEL: frame_8kb: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -2032 +; RV64-NEXT: .cfi_def_cfa_offset 2032 +; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: lui a0, 2 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa_offset 10224 +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: call callee +; RV64-NEXT: lui a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: ret + %a = alloca [10208 x i8] + call void @callee(ptr %a) + ret void +} + +define void @frame_8kb_offset_128() { +; RV32I-LABEL: frame_8kb_offset_128: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, 128 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_def_cfa_offset 10352 +; RV32I-NEXT: addi a0, sp, 12 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, 128 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: frame_8kb_offset_128: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: addi sp, sp, -2032 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: li a0, -1040 +; RV32ZBA-NEXT: sh3add sp, a0, sp +; RV32ZBA-NEXT: .cfi_def_cfa_offset 10352 +; RV32ZBA-NEXT: addi a0, sp, 12 +; RV32ZBA-NEXT: call callee +; RV32ZBA-NEXT: li a0, 1040 +; RV32ZBA-NEXT: sh3add sp, a0, sp +; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: ret +; +; RV64I-LABEL: frame_8kb_offset_128: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: .cfi_def_cfa_offset 10352 +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addiw a0, a0, 128 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: frame_8kb_offset_128: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: addi sp, sp, -2032 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64ZBA-NEXT: .cfi_offset ra, -8 +; RV64ZBA-NEXT: li a0, -1040 +; RV64ZBA-NEXT: sh3add sp, a0, sp +; RV64ZBA-NEXT: .cfi_def_cfa_offset 10352 +; RV64ZBA-NEXT: addi a0, sp, 8 +; RV64ZBA-NEXT: call callee +; RV64ZBA-NEXT: li a0, 1040 +; RV64ZBA-NEXT: sh3add sp, a0, sp +; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: ret + %a = alloca [10336 x i8] + call void @callee(ptr %a) + ret void +} + +define void @frame_16kb_minus_80() { +; RV32I-LABEL: frame_16kb_minus_80: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: lui a0, 4 +; RV32I-NEXT: addi a0, a0, -80 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_def_cfa_offset 18336 +; RV32I-NEXT: addi a0, sp, 12 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 4 +; RV32I-NEXT: addi a0, a0, -80 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: frame_16kb_minus_80: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: addi sp, sp, -2032 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: li a0, -2038 +; RV32ZBA-NEXT: sh3add sp, a0, sp +; RV32ZBA-NEXT: .cfi_def_cfa_offset 18336 +; RV32ZBA-NEXT: addi a0, sp, 12 +; RV32ZBA-NEXT: call callee +; RV32ZBA-NEXT: li a0, 2038 +; RV32ZBA-NEXT: sh3add sp, a0, sp +; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: ret +; +; RV64I-LABEL: frame_16kb_minus_80: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: lui a0, 4 +; RV64I-NEXT: addiw a0, a0, -80 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: .cfi_def_cfa_offset 18336 +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 4 +; RV64I-NEXT: addiw a0, a0, -80 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: frame_16kb_minus_80: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: addi sp, sp, -2032 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64ZBA-NEXT: .cfi_offset ra, -8 +; RV64ZBA-NEXT: li a0, -2038 +; RV64ZBA-NEXT: sh3add sp, a0, sp +; RV64ZBA-NEXT: .cfi_def_cfa_offset 18336 +; RV64ZBA-NEXT: addi a0, sp, 8 +; RV64ZBA-NEXT: call callee +; RV64ZBA-NEXT: li a0, 2038 +; RV64ZBA-NEXT: sh3add sp, a0, sp +; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: ret + %a = alloca [18320 x i8] + call void @callee(ptr %a) + ret void +} + +;; 2^14-16+2032 +define void @frame_16kb() { +; RV32-LABEL: frame_16kb: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -2032 +; RV32-NEXT: .cfi_def_cfa_offset 2032 +; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: lui a0, 4 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa_offset 18416 +; RV32-NEXT: addi a0, sp, 12 +; RV32-NEXT: call callee +; RV32-NEXT: lui a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: ret +; +; RV64-LABEL: frame_16kb: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -2032 +; RV64-NEXT: .cfi_def_cfa_offset 2032 +; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: lui a0, 4 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa_offset 18416 +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: call callee +; RV64-NEXT: lui a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: ret + %a = alloca [18400 x i8] + call void @callee(ptr %a) + ret void +} + +;; 2^15-16+2032 +define void @frame_32kb() { +; RV32-LABEL: frame_32kb: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -2032 +; RV32-NEXT: .cfi_def_cfa_offset 2032 +; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa_offset 34800 +; RV32-NEXT: addi a0, sp, 12 +; RV32-NEXT: call callee +; RV32-NEXT: lui a0, 8 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: ret +; +; RV64-LABEL: frame_32kb: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -2032 +; RV64-NEXT: .cfi_def_cfa_offset 2032 +; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa_offset 34800 +; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: call callee +; RV64-NEXT: lui a0, 8 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: ret + %a = alloca [34784 x i8] + call void @callee(ptr %a) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rv32zimop-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zimop-intrinsic.ll index e5f36086f1cfc..8e843fa47db69 100644 --- a/llvm/test/CodeGen/RISCV/rv32zimop-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv32zimop-intrinsic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-zimop -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+zimop -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32ZIMOP declare i32 @llvm.riscv.mopr.i32(i32 %a, i32 %b) diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index c81c6aeaab890..7e32253c8653f 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -2036,3 +2036,364 @@ define i64 @pack_i64_disjoint_2(i32 signext %a, i64 %b) nounwind { %or = or disjoint i64 %b, %zexta ret i64 %or } + +define i8 @array_index_sh1_sh0(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh1_sh0: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh1_sh0: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: add a0, a0, a2 +; RV64ZBA-NEXT: lbu a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [2 x i8], ptr %p, i64 %idx1, i64 %idx2 + %b = load i8, ptr %a, align 1 + ret i8 %b +} + +define i16 @array_index_sh1_sh1(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh1_sh1: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh1_sh1: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: lh a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [2 x i16], ptr %p, i64 %idx1, i64 %idx2 + %b = load i16, ptr %a, align 2 + ret i16 %b +} + +define i32 @array_index_sh1_sh2(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh1_sh2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh1_sh2: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [2 x i32], ptr %p, i64 %idx1, i64 %idx2 + %b = load i32, ptr %a, align 4 + ret i32 %b +} + +define i64 @array_index_sh1_sh3(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh1_sh3: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh1_sh3: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 4 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [2 x i64], ptr %p, i64 %idx1, i64 %idx2 + %b = load i64, ptr %a, align 8 + ret i64 %b +} + +define i8 @array_index_sh2_sh0(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh2_sh0: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh2_sh0: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: add a0, a0, a2 +; RV64ZBA-NEXT: lbu a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [4 x i8], ptr %p, i64 %idx1, i64 %idx2 + %b = load i8, ptr %a, align 1 + ret i8 %b +} + +define i16 @array_index_sh2_sh1(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh2_sh1: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh2_sh1: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: lh a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [4 x i16], ptr %p, i64 %idx1, i64 %idx2 + %b = load i16, ptr %a, align 2 + ret i16 %b +} + +define i32 @array_index_sh2_sh2(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh2_sh2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh2_sh2: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 4 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [4 x i32], ptr %p, i64 %idx1, i64 %idx2 + %b = load i32, ptr %a, align 4 + ret i32 %b +} + +define i64 @array_index_sh2_sh3(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh2_sh3: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 5 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh2_sh3: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 5 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [4 x i64], ptr %p, i64 %idx1, i64 %idx2 + %b = load i64, ptr %a, align 8 + ret i64 %b +} + +define i8 @array_index_sh3_sh0(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh3_sh0: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh3_sh0: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: add a0, a0, a2 +; RV64ZBA-NEXT: lbu a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [8 x i8], ptr %p, i64 %idx1, i64 %idx2 + %b = load i8, ptr %a, align 1 + ret i8 %b +} + +define i16 @array_index_sh3_sh1(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh3_sh1: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 4 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh3_sh1: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 4 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: lh a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [8 x i16], ptr %p, i64 %idx1, i64 %idx2 + %b = load i16, ptr %a, align 2 + ret i16 %b +} + +define i32 @array_index_sh3_sh2(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh3_sh2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 5 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh3_sh2: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 5 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [8 x i32], ptr %p, i64 %idx1, i64 %idx2 + %b = load i32, ptr %a, align 4 + ret i32 %b +} + +define i64 @array_index_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh3_sh3: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh3_sh3: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 6 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [8 x i64], ptr %p, i64 %idx1, i64 %idx2 + %b = load i64, ptr %a, align 8 + ret i64 %b +} + +; Similar to above, but with a lshr on one of the indices. This requires +; special handling during isel to form a shift pair. +define i64 @array_index_lshr_sh3_sh3(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_lshr_sh3_sh3: +; RV64I: # %bb.0: +; RV64I-NEXT: srli a1, a1, 58 +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_lshr_sh3_sh3: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srli a1, a1, 58 +; RV64ZBA-NEXT: slli a1, a1, 6 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret + %shr = lshr i64 %idx1, 58 + %a = getelementptr inbounds [8 x i64], ptr %p, i64 %shr, i64 %idx2 + %b = load i64, ptr %a, align 8 + ret i64 %b +} + +define i8 @array_index_sh4_sh0(ptr %p, i64 %idx1, i64 %idx2) { +; CHECK-LABEL: array_index_sh4_sh0: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a0, a0, a2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: lbu a0, 0(a0) +; CHECK-NEXT: ret + %a = getelementptr inbounds [16 x i8], ptr %p, i64 %idx1, i64 %idx2 + %b = load i8, ptr %a, align 1 + ret i8 %b +} + +define i16 @array_index_sh4_sh1(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh4_sh1: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 5 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lh a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh4_sh1: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 5 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: lh a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [16 x i16], ptr %p, i64 %idx1, i64 %idx2 + %b = load i16, ptr %a, align 2 + ret i16 %b +} + +define i32 @array_index_sh4_sh2(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh4_sh2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 6 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh4_sh2: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 6 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [16 x i32], ptr %p, i64 %idx1, i64 %idx2 + %b = load i32, ptr %a, align 4 + ret i32 %b +} + +define i64 @array_index_sh4_sh3(ptr %p, i64 %idx1, i64 %idx2) { +; RV64I-LABEL: array_index_sh4_sh3: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 7 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: array_index_sh4_sh3: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a1, 7 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret + %a = getelementptr inbounds [16 x i64], ptr %p, i64 %idx1, i64 %idx2 + %b = load i64, ptr %a, align 8 + ret i64 %b +} diff --git a/llvm/test/CodeGen/RISCV/rv64zimop-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zimop-intrinsic.ll index cd57739a955d5..a407fe552ff74 100644 --- a/llvm/test/CodeGen/RISCV/rv64zimop-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv64zimop-intrinsic.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-zimop -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+zimop -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV64ZIMOP declare i64 @llvm.riscv.mopr.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll index 8898ce509ecb7..eb74e2d302f1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll @@ -24,9 +24,7 @@ define @vp_abs_nxv1i8_unmasked( %va, i32 zero ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv1i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv1i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -50,9 +48,7 @@ define @vp_abs_nxv2i8_unmasked( %va, i32 zero ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv2i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv2i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -76,9 +72,7 @@ define @vp_abs_nxv4i8_unmasked( %va, i32 zero ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv4i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv4i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -102,9 +96,7 @@ define @vp_abs_nxv8i8_unmasked( %va, i32 zero ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv8i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv8i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -128,9 +120,7 @@ define @vp_abs_nxv16i8_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv16i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv16i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -154,9 +144,7 @@ define @vp_abs_nxv32i8_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v12, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv32i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv32i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -180,9 +168,7 @@ define @vp_abs_nxv64i8_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v16, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv64i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv64i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -206,9 +192,7 @@ define @vp_abs_nxv1i16_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv1i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv1i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -232,9 +216,7 @@ define @vp_abs_nxv2i16_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv2i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv2i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -258,9 +240,7 @@ define @vp_abs_nxv4i16_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv4i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv4i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -284,9 +264,7 @@ define @vp_abs_nxv8i16_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv8i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv8i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -310,9 +288,7 @@ define @vp_abs_nxv16i16_unmasked( %va, i3 ; CHECK-NEXT: vrsub.vi v12, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv16i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv16i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -336,9 +312,7 @@ define @vp_abs_nxv32i16_unmasked( %va, i3 ; CHECK-NEXT: vrsub.vi v16, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv32i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv32i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -362,9 +336,7 @@ define @vp_abs_nxv1i32_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv1i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv1i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -388,9 +360,7 @@ define @vp_abs_nxv2i32_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv2i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv2i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -414,9 +384,7 @@ define @vp_abs_nxv4i32_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv4i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv4i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -440,9 +408,7 @@ define @vp_abs_nxv8i32_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v12, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv8i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv8i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -466,9 +432,7 @@ define @vp_abs_nxv16i32_unmasked( %va, i3 ; CHECK-NEXT: vrsub.vi v16, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv16i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv16i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -492,9 +456,7 @@ define @vp_abs_nxv1i64_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv1i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv1i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -518,9 +480,7 @@ define @vp_abs_nxv2i64_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv2i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv2i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -544,9 +504,7 @@ define @vp_abs_nxv4i64_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v12, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv4i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv4i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -570,9 +528,7 @@ define @vp_abs_nxv7i64_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v16, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv7i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv7i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -596,9 +552,7 @@ define @vp_abs_nxv8i64_unmasked( %va, i32 z ; CHECK-NEXT: vrsub.vi v16, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv8i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv8i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -675,8 +629,6 @@ define @vp_abs_nxv16i64_unmasked( %va, i3 ; CHECK-NEXT: vrsub.vi v24, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.abs.nxv16i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.abs.nxv16i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll index 466ab085b266b..90794820ddd84 100644 --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -7,29 +7,67 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL define void @lmul1() nounwind { -; CHECK-LABEL: lmul1: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul1: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 1 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul1: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add sp, a0, sp +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul1: +; NOMUL: # %bb.0: +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add sp, sp, a0 +; NOMUL-NEXT: ret %v = alloca ret void } define void @lmul2() nounwind { -; CHECK-LABEL: lmul2: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 1 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul2: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 1 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh1add sp, a0, sp +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul2: +; NOMUL: # %bb.0: +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add sp, sp, a0 +; NOMUL-NEXT: ret %v = alloca ret void } @@ -75,15 +113,34 @@ define void @lmul8() nounwind { } define void @lmul1_and_2() nounwind { -; CHECK-LABEL: lmul1_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul1_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul1_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 2 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh2add sp, a0, sp +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul1_and_2: +; NOMUL: # %bb.0: +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: add sp, sp, a0 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -132,15 +189,34 @@ define void @lmul1_and_4() nounwind { } define void @lmul2_and_1() nounwind { -; CHECK-LABEL: lmul2_and_1: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: lmul2_and_1: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul2_and_1: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 2 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh2add sp, a0, sp +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul2_and_1: +; NOMUL: # %bb.0: +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: add sp, sp, a0 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -273,19 +349,46 @@ define void @lmul4_and_2_x2_1() nounwind { define void @gpr_and_lmul1_and_2() nounwind { -; CHECK-LABEL: gpr_and_lmul1_and_2: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: sd a0, 8(sp) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret +; NOZBA-LABEL: gpr_and_lmul1_and_2: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -16 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: li a0, 3 +; NOZBA-NEXT: sd a0, 8(sp) +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: addi sp, sp, 16 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: gpr_and_lmul1_and_2: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -16 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 2 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: li a0, 3 +; ZBA-NEXT: sd a0, 8(sp) +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh2add sp, a0, sp +; ZBA-NEXT: addi sp, sp, 16 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: gpr_and_lmul1_and_2: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -16 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: li a0, 3 +; NOMUL-NEXT: sd a0, 8(sp) +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: add sp, sp, a0 +; NOMUL-NEXT: addi sp, sp, 16 +; NOMUL-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -396,15 +499,34 @@ define void @lmul_1_2_4_8_x2_1() nounwind { } define void @masks() nounwind { -; CHECK-LABEL: masks: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: ret +; NOZBA-LABEL: masks: +; NOZBA: # %bb.0: +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: slli a0, a0, 2 +; NOZBA-NEXT: add sp, sp, a0 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: masks: +; ZBA: # %bb.0: +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 2 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: sh2add sp, a0, sp +; ZBA-NEXT: ret +; +; NOMUL-LABEL: masks: +; NOMUL: # %bb.0: +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 2 +; NOMUL-NEXT: add sp, sp, a0 +; NOMUL-NEXT: ret %v1 = alloca %v2 = alloca %v4 = alloca diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 66eab2f653622..879dff4a6e490 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -70,9 +70,7 @@ define @vp_bitreverse_nxv1i8_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv1i8( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv1i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -138,9 +136,7 @@ define @vp_bitreverse_nxv2i8_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv2i8( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv2i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -206,9 +202,7 @@ define @vp_bitreverse_nxv4i8_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv4i8( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv4i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -274,9 +268,7 @@ define @vp_bitreverse_nxv8i8_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv8i8( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv8i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -342,9 +334,7 @@ define @vp_bitreverse_nxv16i8_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv16i8( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv16i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -410,9 +400,7 @@ define @vp_bitreverse_nxv32i8_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv32i8( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv32i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -478,9 +466,7 @@ define @vp_bitreverse_nxv64i8_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv64i8( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv64i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -560,9 +546,7 @@ define @vp_bitreverse_nxv1i16_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv1i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv1i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -642,9 +626,7 @@ define @vp_bitreverse_nxv2i16_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv2i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv2i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -724,9 +706,7 @@ define @vp_bitreverse_nxv4i16_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv4i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv4i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -806,9 +786,7 @@ define @vp_bitreverse_nxv8i16_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv8i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv8i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -888,9 +866,7 @@ define @vp_bitreverse_nxv16i16_unmasked( ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv16i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv16i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -970,9 +946,7 @@ define @vp_bitreverse_nxv32i16_unmasked( ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv32i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv32i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -1068,9 +1042,7 @@ define @vp_bitreverse_nxv1i32_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv1i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv1i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1166,9 +1138,7 @@ define @vp_bitreverse_nxv2i32_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv2i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv2i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1264,9 +1234,7 @@ define @vp_bitreverse_nxv4i32_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv4i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv4i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1362,9 +1330,7 @@ define @vp_bitreverse_nxv8i32_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv8i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv8i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1460,9 +1426,7 @@ define @vp_bitreverse_nxv16i32_unmasked( ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv16i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv16i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1746,9 +1710,7 @@ define @vp_bitreverse_nxv1i64_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv1i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv1i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -2032,9 +1994,7 @@ define @vp_bitreverse_nxv2i64_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv2i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv2i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -2318,9 +2278,7 @@ define @vp_bitreverse_nxv4i64_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv4i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv4i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -2678,9 +2636,7 @@ define @vp_bitreverse_nxv7i64_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv7i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv7i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -3038,9 +2994,7 @@ define @vp_bitreverse_nxv8i64_unmasked( %va ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv8i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv8i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -3237,9 +3191,7 @@ define @vp_bitreverse_nxv64i16_unmasked( ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVBB-NEXT: vbrev.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bitreverse.nxv64i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bitreverse.nxv64i16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 800dc7ec38859..800d06c5a78f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -42,9 +42,7 @@ define @vp_bswap_nxv1i16_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv1i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv1i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -82,9 +80,7 @@ define @vp_bswap_nxv2i16_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv2i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv2i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -122,9 +118,7 @@ define @vp_bswap_nxv4i16_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv4i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv4i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -162,9 +156,7 @@ define @vp_bswap_nxv8i16_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv8i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv8i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -202,9 +194,7 @@ define @vp_bswap_nxv16i16_unmasked( %va, ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv16i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv16i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -242,9 +232,7 @@ define @vp_bswap_nxv32i16_unmasked( %va, ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv32i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv32i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -298,9 +286,7 @@ define @vp_bswap_nxv1i32_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv1i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv1i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -354,9 +340,7 @@ define @vp_bswap_nxv2i32_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv2i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv2i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -410,9 +394,7 @@ define @vp_bswap_nxv4i32_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv4i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv4i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -466,9 +448,7 @@ define @vp_bswap_nxv8i32_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv8i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv8i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -522,9 +502,7 @@ define @vp_bswap_nxv16i32_unmasked( %va, ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv16i32( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv16i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -694,9 +672,7 @@ define @vp_bswap_nxv1i64_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv1i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv1i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -866,9 +842,7 @@ define @vp_bswap_nxv2i64_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv2i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv2i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1038,9 +1012,7 @@ define @vp_bswap_nxv4i64_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv4i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv4i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1283,9 +1255,7 @@ define @vp_bswap_nxv7i64_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv7i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv7i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1528,9 +1498,7 @@ define @vp_bswap_nxv8i64_unmasked( %va, i32 ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv8i64( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv8i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1655,9 +1623,7 @@ define @vp_bswap_nxv64i16_unmasked( %va, ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVKB-NEXT: vrev8.v v8, v8 ; CHECK-ZVKB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.bswap.nxv64i16( %va, %m, i32 %evl) + %v = call @llvm.vp.bswap.nxv64i16( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index edc348ebc68ff..5b271606f08ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -42,9 +42,7 @@ define @vp_ceil_vv_nxv1f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -86,9 +84,7 @@ define @vp_ceil_vv_nxv2f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -130,9 +126,7 @@ define @vp_ceil_vv_nxv4f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -176,9 +170,7 @@ define @vp_ceil_vv_nxv8f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -222,9 +214,7 @@ define @vp_ceil_vv_nxv16f16_unmasked( % ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -268,9 +258,7 @@ define @vp_ceil_vv_nxv32f16_unmasked( % ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -312,9 +300,7 @@ define @vp_ceil_vv_nxv1f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -356,9 +342,7 @@ define @vp_ceil_vv_nxv2f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -402,9 +386,7 @@ define @vp_ceil_vv_nxv4f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -448,9 +430,7 @@ define @vp_ceil_vv_nxv8f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -494,9 +474,7 @@ define @vp_ceil_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -538,9 +516,7 @@ define @vp_ceil_vv_nxv1f64_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -584,9 +560,7 @@ define @vp_ceil_vv_nxv2f64_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -630,9 +604,7 @@ define @vp_ceil_vv_nxv4f64_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -676,9 +648,7 @@ define @vp_ceil_vv_nxv7f64_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -722,9 +692,7 @@ define @vp_ceil_vv_nxv8f64_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -837,8 +805,6 @@ define @vp_ceil_vv_nxv16f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ceil.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.ceil.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/cmp-folds.ll b/llvm/test/CodeGen/RISCV/rvv/cmp-folds.ll index 7831512c658e7..4c40b7c74451d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cmp-folds.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cmp-folds.ll @@ -9,9 +9,7 @@ define @not_icmp_sle_nxv8i16( %a, %a, %b - %tmp = insertelement poison, i1 true, i32 0 - %ones = shufflevector %tmp, poison, zeroinitializer - %not = xor %ones, %icmp + %not = xor splat (i1 true), %icmp ret %not } @@ -22,9 +20,7 @@ define @not_icmp_sgt_nxv4i32( %a, %a, %b - %tmp = insertelement poison, i1 true, i32 0 - %ones = shufflevector %tmp, poison, zeroinitializer - %not = xor %icmp, %ones + %not = xor %icmp, splat (i1 true) ret %not } @@ -35,9 +31,7 @@ define @not_fcmp_une_nxv2f64( %a, %a, %b - %tmp = insertelement poison, i1 true, i32 0 - %ones = shufflevector %tmp, poison, zeroinitializer - %not = xor %icmp, %ones + %not = xor %icmp, splat (i1 true) ret %not } @@ -48,8 +42,6 @@ define @not_fcmp_uge_nxv4f32( %a, %a, %b - %tmp = insertelement poison, i1 true, i32 0 - %ones = shufflevector %tmp, poison, zeroinitializer - %not = xor %icmp, %ones + %not = xor %icmp, splat (i1 true) ret %not } diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll index 46bedcd4e9666..8f917becafec0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll @@ -25,12 +25,8 @@ define @add_umax_nxv2i64( %a0) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %ins1 = insertelement poison, i64 7, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %ins2 = insertelement poison, i64 -7, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %v1 = call @llvm.umax.nxv2i64( %a0, %splat1) - %v2 = add %v1, %splat2 + %v1 = call @llvm.umax.nxv2i64( %a0, splat (i64 7)) + %v2 = add %v1, splat (i64 -7) ret %v2 } @@ -162,12 +158,8 @@ define @vselect_add_const_nxv2i64( %a0) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %cm1 = insertelement poison, i64 -6, i32 0 - %splatcm1 = shufflevector %cm1, poison, zeroinitializer - %nc = insertelement poison, i64 5, i32 0 - %splatnc = shufflevector %nc, poison, zeroinitializer - %v1 = add %a0, %splatcm1 - %cmp = icmp ugt %a0, %splatnc + %v1 = add %a0, splat (i64 -6) + %cmp = icmp ugt %a0, splat (i64 5) %v2 = select %cmp, %v1, zeroinitializer ret %v2 } @@ -194,12 +186,8 @@ define @vselect_add_const_signbit_nxv2i16( ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %cm1 = insertelement poison, i16 32766, i32 0 - %splatcm1 = shufflevector %cm1, poison, zeroinitializer - %nc = insertelement poison, i16 -32767, i32 0 - %splatnc = shufflevector %nc, poison, zeroinitializer - %cmp = icmp ugt %a0, %splatcm1 - %v1 = add %a0, %splatnc + %cmp = icmp ugt %a0, splat (i16 32766) + %v1 = add %a0, splat (i16 -32767) %v2 = select %cmp, %v1, zeroinitializer ret %v2 } @@ -227,9 +215,7 @@ define @vselect_xor_const_signbit_nxv2i16( ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret %cmp = icmp slt %a0, zeroinitializer - %ins = insertelement poison, i16 -32768, i32 0 - %splat = shufflevector %ins, poison, zeroinitializer - %v1 = xor %a0, %splat + %v1 = xor %a0, splat (i16 -32768) %v2 = select %cmp, %v1, zeroinitializer ret %v2 } @@ -259,9 +245,7 @@ define @vselect_add_nxv2i64( %a0, %a0, %a1 %cmp = icmp ule %a0, %v1 - %allones = insertelement poison, i64 -1, i32 0 - %splatallones = shufflevector %allones, poison, zeroinitializer - %v2 = select %cmp, %v1, %splatallones + %v2 = select %cmp, %v1, splat (i64 -1) ret %v2 } @@ -286,15 +270,9 @@ define @vselect_add_const_2_nxv2i64( %a0) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 6 ; CHECK-NEXT: ret - %cm1 = insertelement poison, i64 6, i32 0 - %splatcm1 = shufflevector %cm1, poison, zeroinitializer - %nc = insertelement poison, i64 -7, i32 0 - %splatnc = shufflevector %nc, poison, zeroinitializer - %v1 = add %a0, %splatcm1 - %cmp = icmp ule %a0, %splatnc - %allones = insertelement poison, i64 -1, i32 0 - %splatallones = shufflevector %allones, poison, zeroinitializer - %v2 = select %cmp, %v1, %splatallones + %v1 = add %a0, splat (i64 6) + %cmp = icmp ule %a0, splat (i64 -7) + %v2 = select %cmp, %v1, splat (i64 -1) ret %v2 } diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll index 80b6c8ced3a45..5a67d0fbebf31 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll @@ -10,12 +10,8 @@ define @and_or_nxv4i32( %A) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 8 ; CHECK-NEXT: ret - %ins1 = insertelement poison, i32 255, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %ins2 = insertelement poison, i32 8, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %v1 = or %A, %splat1 - %v2 = and %v1, %splat2 + %v1 = or %A, splat (i32 255) + %v2 = and %v1, splat (i32 8) ret %v2 } @@ -28,12 +24,8 @@ define @or_and_nxv2i64( %a0) { ; CHECK-NEXT: vor.vi v8, v8, 3 ; CHECK-NEXT: vand.vi v8, v8, 7 ; CHECK-NEXT: ret - %ins1 = insertelement poison, i64 7, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %ins2 = insertelement poison, i64 3, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %v1 = and %a0, %splat1 - %v2 = or %v1, %splat2 + %v1 = and %a0, splat (i64 7) + %v2 = or %v1, splat (i64 3) ret %v2 } @@ -45,12 +37,8 @@ define @or_and_nxv2i64_fold( %a0) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 3 ; CHECK-NEXT: ret - %ins1 = insertelement poison, i64 1, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %ins2 = insertelement poison, i64 3, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %v1 = and %a0, %splat1 - %v2 = or %v1, %splat2 + %v1 = and %a0, splat (i64 1) + %v2 = or %v1, splat (i64 3) ret %v2 } @@ -62,12 +50,8 @@ define @combine_vec_shl_shl( %x) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %ins1 = insertelement poison, i32 2, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %ins2 = insertelement poison, i32 4, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %v1 = shl %x, %splat1 - %v2 = shl %v1, %splat2 + %v1 = shl %x, splat (i32 2) + %v2 = shl %v1, splat (i32 4) ret %v2 } @@ -79,12 +63,8 @@ define @combine_vec_ashr_ashr( %x) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %ins1 = insertelement poison, i32 2, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %ins2 = insertelement poison, i32 4, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %v1 = ashr %x, %splat1 - %v2 = ashr %v1, %splat2 + %v1 = ashr %x, splat (i32 2) + %v2 = ashr %v1, splat (i32 4) ret %v2 } @@ -96,12 +76,8 @@ define @combine_vec_lshr_lshr( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %ins1 = insertelement poison, i16 2, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %ins2 = insertelement poison, i16 4, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %v1 = lshr %x, %splat2 - %v2 = lshr %v1, %splat2 + %v1 = lshr %x, splat (i16 4) + %v2 = lshr %v1, splat (i16 4) ret %v2 } @@ -110,9 +86,7 @@ define @combine_fmul_one( %x) { ; CHECK-LABEL: combine_fmul_one: ; CHECK: # %bb.0: ; CHECK-NEXT: ret - %ins = insertelement poison, float 1.0, i32 0 - %splat = shufflevector %ins, poison, zeroinitializer - %v = fmul %x, %splat + %v = fmul %x, splat (float 1.0) ret %v } @@ -121,8 +95,6 @@ define @combine_fmul_one_commuted( %x) ; CHECK-LABEL: combine_fmul_one_commuted: ; CHECK: # %bb.0: ; CHECK-NEXT: ret - %ins = insertelement poison, float 1.0, i32 0 - %splat = shufflevector %ins, poison, zeroinitializer - %v = fmul %splat, %x + %v = fmul splat (float 1.0), %x ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll index b3f561a52f411..05747ff0d049a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll @@ -21,9 +21,7 @@ define <2 x i16> @fixedlen(<2 x i32> %x) { ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: ret - %v41 = insertelement <2 x i32> poison, i32 16, i32 0 - %v42 = shufflevector <2 x i32> %v41, <2 x i32> poison, <2 x i32> zeroinitializer - %v43 = lshr <2 x i32> %x, %v42 + %v43 = lshr <2 x i32> %x, splat (i32 16) %v44 = trunc <2 x i32> %v43 to <2 x i16> %v45 = insertelement <2 x i32> poison, i32 -32768, i32 0 %v46 = shufflevector <2 x i32> %v45, <2 x i32> poison, <2 x i32> zeroinitializer @@ -40,9 +38,7 @@ define @scalable( %x) { ; CHECK-NEXT: lui a0, 1048568 ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %v41 = insertelement poison, i32 16, i32 0 - %v42 = shufflevector %v41, poison, zeroinitializer - %v43 = lshr %x, %v42 + %v43 = lshr %x, splat (i32 16) %v44 = trunc %v43 to %v45 = insertelement poison, i32 -32768, i32 0 %v46 = shufflevector %v45, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index fc94f8c2a5279..d756cfcf70772 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I ; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I -; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32F +; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64F ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB @@ -1229,21 +1229,36 @@ define @ctlz_nxv1i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_nxv1i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vmv.v.x v9, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v8, v10, 23 -; CHECK-F-NEXT: vwsubu.wv v9, v9, v8 -; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v9, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_nxv1i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32F-NEXT: vmv.v.x v9, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vwsubu.wv v9, v9, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32F-NEXT: vminu.vx v8, v9, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_nxv1i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64F-NEXT: vmv.v.x v9, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: vsrl.vi v8, v10, 23 +; RV64F-NEXT: vwsubu.vv v10, v9, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV64F-NEXT: vminu.vx v8, v10, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv1i64: ; CHECK-D: # %bb.0: @@ -1370,21 +1385,36 @@ define @ctlz_nxv2i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_nxv2i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vmv.v.x v10, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v8, v12, 23 -; CHECK-F-NEXT: vwsubu.wv v10, v10, v8 -; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v10, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_nxv2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32F-NEXT: vmv.v.x v10, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: vsrl.vi v8, v12, 23 +; RV32F-NEXT: vwsubu.wv v10, v10, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32F-NEXT: vminu.vx v8, v10, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_nxv2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64F-NEXT: vmv.v.x v10, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; RV64F-NEXT: vsrl.vi v8, v11, 23 +; RV64F-NEXT: vwsubu.vv v12, v10, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV64F-NEXT: vminu.vx v8, v12, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv2i64: ; CHECK-D: # %bb.0: @@ -1511,21 +1541,36 @@ define @ctlz_nxv4i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_nxv4i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vmv.v.x v12, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v8, v16, 23 -; CHECK-F-NEXT: vwsubu.wv v12, v12, v8 -; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v12, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_nxv4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32F-NEXT: vmv.v.x v12, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v16, v8 +; RV32F-NEXT: vsrl.vi v8, v16, 23 +; RV32F-NEXT: vwsubu.wv v12, v12, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32F-NEXT: vminu.vx v8, v12, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_nxv4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV64F-NEXT: vmv.v.x v12, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v14, v8 +; RV64F-NEXT: vsrl.vi v8, v14, 23 +; RV64F-NEXT: vwsubu.vv v16, v12, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV64F-NEXT: vminu.vx v8, v16, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv4i64: ; CHECK-D: # %bb.0: @@ -1652,21 +1697,36 @@ define @ctlz_nxv8i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_nxv8i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vmv.v.x v16, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v24, v8 -; CHECK-F-NEXT: vsrl.vi v8, v24, 23 -; CHECK-F-NEXT: vwsubu.wv v16, v16, v8 -; CHECK-F-NEXT: li a1, 64 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v16, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_nxv8i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32F-NEXT: vmv.v.x v16, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v24, v8 +; RV32F-NEXT: vsrl.vi v8, v24, 23 +; RV32F-NEXT: vwsubu.wv v16, v16, v8 +; RV32F-NEXT: li a1, 64 +; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32F-NEXT: vminu.vx v8, v16, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_nxv8i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64F-NEXT: vmv.v.x v16, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v20, v8 +; RV64F-NEXT: vsrl.vi v8, v20, 23 +; RV64F-NEXT: vwsubu.vv v24, v16, v8 +; RV64F-NEXT: li a1, 64 +; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64F-NEXT: vminu.vx v8, v24, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv8i64: ; CHECK-D: # %bb.0: @@ -2835,19 +2895,31 @@ define @ctlz_zero_undef_nxv1i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_zero_undef_nxv1i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vmv.v.x v9, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v8, v10, 23 -; CHECK-F-NEXT: vwsubu.wv v9, v9, v8 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv1r.v v8, v9 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_zero_undef_nxv1i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; RV32F-NEXT: vmv.v.x v9, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vwsubu.wv v9, v9, v8 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: vmv1r.v v8, v9 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_nxv1i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; RV64F-NEXT: vmv.v.x v9, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: vsrl.vi v10, v10, 23 +; RV64F-NEXT: vwsubu.vv v8, v9, v10 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64: ; CHECK-D: # %bb.0: @@ -2971,19 +3043,31 @@ define @ctlz_zero_undef_nxv2i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_zero_undef_nxv2i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vmv.v.x v10, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v8, v12, 23 -; CHECK-F-NEXT: vwsubu.wv v10, v10, v8 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv2r.v v8, v10 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_zero_undef_nxv2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32F-NEXT: vmv.v.x v10, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: vsrl.vi v8, v12, 23 +; RV32F-NEXT: vwsubu.wv v10, v10, v8 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: vmv2r.v v8, v10 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_nxv2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; RV64F-NEXT: vmv.v.x v10, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v11, v8 +; RV64F-NEXT: vsrl.vi v11, v11, 23 +; RV64F-NEXT: vwsubu.vv v8, v10, v11 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64: ; CHECK-D: # %bb.0: @@ -3107,19 +3191,31 @@ define @ctlz_zero_undef_nxv4i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_zero_undef_nxv4i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vmv.v.x v12, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v8, v16, 23 -; CHECK-F-NEXT: vwsubu.wv v12, v12, v8 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: vmv4r.v v8, v12 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_zero_undef_nxv4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; RV32F-NEXT: vmv.v.x v12, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v16, v8 +; RV32F-NEXT: vsrl.vi v8, v16, 23 +; RV32F-NEXT: vwsubu.wv v12, v12, v8 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: vmv4r.v v8, v12 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_nxv4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; RV64F-NEXT: vmv.v.x v12, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v14, v8 +; RV64F-NEXT: vsrl.vi v14, v14, 23 +; RV64F-NEXT: vwsubu.vv v8, v12, v14 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64: ; CHECK-D: # %bb.0: @@ -3243,19 +3339,31 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: ctlz_zero_undef_nxv8i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vmv8r.v v16, v8 -; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vmv.v.x v8, a0 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v24, v16 -; CHECK-F-NEXT: vsrl.vi v16, v24, 23 -; CHECK-F-NEXT: vwsubu.wv v8, v8, v16 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: ctlz_zero_undef_nxv8i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vmv8r.v v16, v8 +; RV32F-NEXT: li a0, 190 +; RV32F-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32F-NEXT: vmv.v.x v8, a0 +; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v24, v16 +; RV32F-NEXT: vsrl.vi v16, v24, 23 +; RV32F-NEXT: vwsubu.wv v8, v8, v16 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: ctlz_zero_undef_nxv8i64: +; RV64F: # %bb.0: +; RV64F-NEXT: li a0, 190 +; RV64F-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64F-NEXT: vmv.v.x v16, a0 +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v20, v8 +; RV64F-NEXT: vsrl.vi v20, v20, 23 +; RV64F-NEXT: vwsubu.vv v8, v16, v20 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64: ; CHECK-D: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index e4f030a642f7d..2a75e5ce7175d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -57,9 +57,7 @@ define @vp_ctlz_nxv1i8_unmasked( %va, i32 zer ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -112,9 +110,7 @@ define @vp_ctlz_nxv2i8_unmasked( %va, i32 zer ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -167,9 +163,7 @@ define @vp_ctlz_nxv4i8_unmasked( %va, i32 zer ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -222,9 +216,7 @@ define @vp_ctlz_nxv8i8_unmasked( %va, i32 zer ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -277,9 +269,7 @@ define @vp_ctlz_nxv16i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -349,9 +339,7 @@ define @vp_ctlz_nxv32i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -421,9 +409,7 @@ define @vp_ctlz_nxv64i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -470,9 +456,7 @@ define @vp_ctlz_nxv1i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -519,9 +503,7 @@ define @vp_ctlz_nxv2i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -568,9 +550,7 @@ define @vp_ctlz_nxv4i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -617,9 +597,7 @@ define @vp_ctlz_nxv8i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -666,9 +644,7 @@ define @vp_ctlz_nxv16i16_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -756,9 +732,7 @@ define @vp_ctlz_nxv32i16_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -807,9 +781,7 @@ define @vp_ctlz_nxv1i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -858,9 +830,7 @@ define @vp_ctlz_nxv2i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -909,9 +879,7 @@ define @vp_ctlz_nxv4i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -960,9 +928,7 @@ define @vp_ctlz_nxv8i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1010,9 +976,7 @@ define @vp_ctlz_nxv16i32_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1062,9 +1026,7 @@ define @vp_ctlz_nxv1i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1114,9 +1076,7 @@ define @vp_ctlz_nxv2i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1166,9 +1126,7 @@ define @vp_ctlz_nxv4i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1218,9 +1176,7 @@ define @vp_ctlz_nxv7i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1270,9 +1226,7 @@ define @vp_ctlz_nxv8i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1386,9 +1340,7 @@ define @vp_ctlz_nxv16i64_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1435,9 +1387,7 @@ define @vp_ctlz_zero_undef_nxv1i8_unmasked( % ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv1i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1485,9 +1435,7 @@ define @vp_ctlz_zero_undef_nxv2i8_unmasked( % ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv2i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1535,9 +1483,7 @@ define @vp_ctlz_zero_undef_nxv4i8_unmasked( % ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv4i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1585,9 +1531,7 @@ define @vp_ctlz_zero_undef_nxv8i8_unmasked( % ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vclz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv8i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1635,9 +1579,7 @@ define @vp_ctlz_zero_undef_nxv16i8_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv16i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1706,9 +1648,7 @@ define @vp_ctlz_zero_undef_nxv32i8_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv32i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1777,9 +1717,7 @@ define @vp_ctlz_zero_undef_nxv64i8_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv64i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1821,9 +1759,7 @@ define @vp_ctlz_zero_undef_nxv1i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv1i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1865,9 +1801,7 @@ define @vp_ctlz_zero_undef_nxv2i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv2i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1909,9 +1843,7 @@ define @vp_ctlz_zero_undef_nxv4i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv4i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1953,9 +1885,7 @@ define @vp_ctlz_zero_undef_nxv8i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv8i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -1997,9 +1927,7 @@ define @vp_ctlz_zero_undef_nxv16i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv16i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2086,9 +2014,7 @@ define @vp_ctlz_zero_undef_nxv32i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv32i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2132,9 +2058,7 @@ define @vp_ctlz_zero_undef_nxv1i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv1i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2178,9 +2102,7 @@ define @vp_ctlz_zero_undef_nxv2i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv2i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2224,9 +2146,7 @@ define @vp_ctlz_zero_undef_nxv4i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv4i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2270,9 +2190,7 @@ define @vp_ctlz_zero_undef_nxv8i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv8i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2315,9 +2233,7 @@ define @vp_ctlz_zero_undef_nxv16i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv16i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2362,9 +2278,7 @@ define @vp_ctlz_zero_undef_nxv1i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv1i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2409,9 +2323,7 @@ define @vp_ctlz_zero_undef_nxv2i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv2i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2456,9 +2368,7 @@ define @vp_ctlz_zero_undef_nxv4i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv4i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2503,9 +2413,7 @@ define @vp_ctlz_zero_undef_nxv7i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv7i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2550,9 +2458,7 @@ define @vp_ctlz_zero_undef_nxv8i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv8i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2658,9 +2564,7 @@ define @vp_ctlz_zero_undef_nxv16i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index 2310f85b1fba9..883f68aec1f42 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -60,9 +60,7 @@ define @vp_ctpop_nxv1i8_unmasked( %va, i32 ze ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv1i8( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv1i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -118,9 +116,7 @@ define @vp_ctpop_nxv2i8_unmasked( %va, i32 ze ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv2i8( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv2i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -176,9 +172,7 @@ define @vp_ctpop_nxv4i8_unmasked( %va, i32 ze ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv4i8( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv4i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -234,9 +228,7 @@ define @vp_ctpop_nxv8i8_unmasked( %va, i32 ze ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv8i8( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv8i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -292,9 +284,7 @@ define @vp_ctpop_nxv16i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv16i8( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv16i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -350,9 +340,7 @@ define @vp_ctpop_nxv32i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv32i8( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv32i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -408,9 +396,7 @@ define @vp_ctpop_nxv64i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv64i8( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv64i8( %va, splat (i1 true), i32 %evl) ret %v } @@ -480,9 +466,7 @@ define @vp_ctpop_nxv1i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv1i16( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv1i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -552,9 +536,7 @@ define @vp_ctpop_nxv2i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv2i16( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv2i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -624,9 +606,7 @@ define @vp_ctpop_nxv4i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv4i16( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv4i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -696,9 +676,7 @@ define @vp_ctpop_nxv8i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv8i16( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv8i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -768,9 +746,7 @@ define @vp_ctpop_nxv16i16_unmasked( %va, ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv16i16( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv16i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -840,9 +816,7 @@ define @vp_ctpop_nxv32i16_unmasked( %va, ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv32i16( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv32i16( %va, splat (i1 true), i32 %evl) ret %v } @@ -914,9 +888,7 @@ define @vp_ctpop_nxv1i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv1i32( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv1i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -988,9 +960,7 @@ define @vp_ctpop_nxv2i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv2i32( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv2i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1062,9 +1032,7 @@ define @vp_ctpop_nxv4i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv4i32( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv4i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1136,9 +1104,7 @@ define @vp_ctpop_nxv8i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv8i32( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv8i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1210,9 +1176,7 @@ define @vp_ctpop_nxv16i32_unmasked( %va, ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv16i32( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv16i32( %va, splat (i1 true), i32 %evl) ret %v } @@ -1378,9 +1342,7 @@ define @vp_ctpop_nxv1i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv1i64( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv1i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1546,9 +1508,7 @@ define @vp_ctpop_nxv2i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv2i64( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv2i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1714,9 +1674,7 @@ define @vp_ctpop_nxv4i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv4i64( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv4i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1882,9 +1840,7 @@ define @vp_ctpop_nxv7i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv7i64( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv7i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -2050,9 +2006,7 @@ define @vp_ctpop_nxv8i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv8i64( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv8i64( %va, splat (i1 true), i32 %evl) ret %v } @@ -2581,9 +2535,7 @@ define @vp_ctpop_nxv16i64_unmasked( %va, ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vcpop.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ctpop.nxv16i64( %va, %m, i32 %evl) + %v = call @llvm.vp.ctpop.nxv16i64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index b14cde25aa85b..d13f4d2dca1ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1241,13 +1241,12 @@ define @cttz_nxv1i64( %va) { ; RV64F-NEXT: fsrmi a0, 1 ; RV64F-NEXT: vfncvt.f.xu.w v10, v9 ; RV64F-NEXT: vsrl.vi v9, v10, 23 -; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64F-NEXT: vzext.vf2 v10, v9 ; RV64F-NEXT: li a1, 127 -; RV64F-NEXT: vsub.vx v9, v10, a1 +; RV64F-NEXT: vwsubu.vx v10, v9, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64F-NEXT: vmseq.vi v0, v8, 0 ; RV64F-NEXT: li a1, 64 -; RV64F-NEXT: vmerge.vxm v8, v9, a1, v0 +; RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 ; RV64F-NEXT: fsrm a0 ; RV64F-NEXT: ret ; @@ -1404,13 +1403,12 @@ define @cttz_nxv2i64( %va) { ; RV64F-NEXT: fsrmi a0, 1 ; RV64F-NEXT: vfncvt.f.xu.w v12, v10 ; RV64F-NEXT: vsrl.vi v10, v12, 23 -; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64F-NEXT: vzext.vf2 v12, v10 ; RV64F-NEXT: li a1, 127 -; RV64F-NEXT: vsub.vx v10, v12, a1 +; RV64F-NEXT: vwsubu.vx v12, v10, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64F-NEXT: vmseq.vi v0, v8, 0 ; RV64F-NEXT: li a1, 64 -; RV64F-NEXT: vmerge.vxm v8, v10, a1, v0 +; RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 ; RV64F-NEXT: fsrm a0 ; RV64F-NEXT: ret ; @@ -1567,13 +1565,12 @@ define @cttz_nxv4i64( %va) { ; RV64F-NEXT: fsrmi a0, 1 ; RV64F-NEXT: vfncvt.f.xu.w v16, v12 ; RV64F-NEXT: vsrl.vi v12, v16, 23 -; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64F-NEXT: vzext.vf2 v16, v12 ; RV64F-NEXT: li a1, 127 -; RV64F-NEXT: vsub.vx v12, v16, a1 +; RV64F-NEXT: vwsubu.vx v16, v12, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV64F-NEXT: vmseq.vi v0, v8, 0 ; RV64F-NEXT: li a1, 64 -; RV64F-NEXT: vmerge.vxm v8, v12, a1, v0 +; RV64F-NEXT: vmerge.vxm v8, v16, a1, v0 ; RV64F-NEXT: fsrm a0 ; RV64F-NEXT: ret ; @@ -1730,13 +1727,12 @@ define @cttz_nxv8i64( %va) { ; RV64F-NEXT: fsrmi a0, 1 ; RV64F-NEXT: vfncvt.f.xu.w v24, v16 ; RV64F-NEXT: vsrl.vi v16, v24, 23 -; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64F-NEXT: vzext.vf2 v24, v16 ; RV64F-NEXT: li a1, 127 -; RV64F-NEXT: vsub.vx v16, v24, a1 +; RV64F-NEXT: vwsubu.vx v24, v16, a1 +; RV64F-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64F-NEXT: vmseq.vi v0, v8, 0 ; RV64F-NEXT: li a1, 64 -; RV64F-NEXT: vmerge.vxm v8, v16, a1, v0 +; RV64F-NEXT: vmerge.vxm v8, v24, a1, v0 ; RV64F-NEXT: fsrm a0 ; RV64F-NEXT: ret ; @@ -2891,21 +2887,35 @@ define @cttz_zero_undef_nxv1i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: cttz_zero_undef_nxv1i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vrsub.vi v9, v8, 0 -; CHECK-F-NEXT: vand.vv v8, v8, v9 -; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8 -; CHECK-F-NEXT: vsrl.vi v8, v9, 23 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vzext.vf2 v9, v8 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v9, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: cttz_zero_undef_nxv1i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32F-NEXT: vrsub.vi v9, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v9 +; RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; RV32F-NEXT: vsrl.vi v8, v9, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; RV32F-NEXT: vzext.vf2 v9, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v9, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_nxv1i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64F-NEXT: vrsub.vi v9, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v9 +; RV64F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v9, v8 +; RV64F-NEXT: vsrl.vi v9, v9, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v8, v9, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i64: ; CHECK-D: # %bb.0: @@ -3011,21 +3021,35 @@ define @cttz_zero_undef_nxv2i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: cttz_zero_undef_nxv2i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vrsub.vi v10, v8, 0 -; CHECK-F-NEXT: vand.vv v8, v8, v10 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v8, v10, 23 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vzext.vf2 v10, v8 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v10, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: cttz_zero_undef_nxv2i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32F-NEXT: vrsub.vi v10, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v10 +; RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; RV32F-NEXT: vsrl.vi v8, v10, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; RV32F-NEXT: vzext.vf2 v10, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v10, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_nxv2i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV64F-NEXT: vrsub.vi v10, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v10 +; RV64F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v10, v8 +; RV64F-NEXT: vsrl.vi v10, v10, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v8, v10, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i64: ; CHECK-D: # %bb.0: @@ -3131,21 +3155,35 @@ define @cttz_zero_undef_nxv4i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: cttz_zero_undef_nxv4i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vrsub.vi v12, v8, 0 -; CHECK-F-NEXT: vand.vv v8, v8, v12 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v8, v12, 23 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vzext.vf2 v12, v8 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v12, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: cttz_zero_undef_nxv4i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32F-NEXT: vrsub.vi v12, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v12 +; RV32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v12, v8 +; RV32F-NEXT: vsrl.vi v8, v12, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32F-NEXT: vzext.vf2 v12, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v12, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_nxv4i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV64F-NEXT: vrsub.vi v12, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v12 +; RV64F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v12, v8 +; RV64F-NEXT: vsrl.vi v12, v12, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v8, v12, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i64: ; CHECK-D: # %bb.0: @@ -3251,21 +3289,35 @@ define @cttz_zero_undef_nxv8i64( %va) { ; RV64I-NEXT: vsrl.vx v8, v8, a0 ; RV64I-NEXT: ret ; -; CHECK-F-LABEL: cttz_zero_undef_nxv8i64: -; CHECK-F: # %bb.0: -; CHECK-F-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vrsub.vi v16, v8, 0 -; CHECK-F-NEXT: vand.vv v8, v8, v16 -; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v8, v16, 23 -; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vzext.vf2 v16, v8 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v16, a1 -; CHECK-F-NEXT: fsrm a0 -; CHECK-F-NEXT: ret +; RV32F-LABEL: cttz_zero_undef_nxv8i64: +; RV32F: # %bb.0: +; RV32F-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32F-NEXT: vrsub.vi v16, v8, 0 +; RV32F-NEXT: vand.vv v8, v8, v16 +; RV32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32F-NEXT: fsrmi a0, 1 +; RV32F-NEXT: vfncvt.f.xu.w v16, v8 +; RV32F-NEXT: vsrl.vi v8, v16, 23 +; RV32F-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32F-NEXT: vzext.vf2 v16, v8 +; RV32F-NEXT: li a1, 127 +; RV32F-NEXT: vsub.vx v8, v16, a1 +; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: ret +; +; RV64F-LABEL: cttz_zero_undef_nxv8i64: +; RV64F: # %bb.0: +; RV64F-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV64F-NEXT: vrsub.vi v16, v8, 0 +; RV64F-NEXT: vand.vv v8, v8, v16 +; RV64F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64F-NEXT: fsrmi a0, 1 +; RV64F-NEXT: vfncvt.f.xu.w v16, v8 +; RV64F-NEXT: vsrl.vi v16, v16, 23 +; RV64F-NEXT: li a1, 127 +; RV64F-NEXT: vwsubu.vx v8, v16, a1 +; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i64: ; CHECK-D: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index 145ce6e917f96..ef8a6c704a44b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -68,9 +68,7 @@ define @vp_cttz_nxv1i8_unmasked( %va, i32 zer ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv1i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -134,9 +132,7 @@ define @vp_cttz_nxv2i8_unmasked( %va, i32 zer ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv2i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -200,9 +196,7 @@ define @vp_cttz_nxv4i8_unmasked( %va, i32 zer ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv4i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -266,9 +260,7 @@ define @vp_cttz_nxv8i8_unmasked( %va, i32 zer ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv8i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -332,9 +324,7 @@ define @vp_cttz_nxv16i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv16i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -398,9 +388,7 @@ define @vp_cttz_nxv32i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv32i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv32i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -464,9 +452,7 @@ define @vp_cttz_nxv64i8_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv64i8( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv64i8( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -544,9 +530,7 @@ define @vp_cttz_nxv1i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv1i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -624,9 +608,7 @@ define @vp_cttz_nxv2i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv2i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -704,9 +686,7 @@ define @vp_cttz_nxv4i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv4i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -784,9 +764,7 @@ define @vp_cttz_nxv8i16_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv8i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -864,9 +842,7 @@ define @vp_cttz_nxv16i16_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv16i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -944,9 +920,7 @@ define @vp_cttz_nxv32i16_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv32i16( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv32i16( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1026,9 +1000,7 @@ define @vp_cttz_nxv1i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv1i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1108,9 +1080,7 @@ define @vp_cttz_nxv2i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv2i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1190,9 +1160,7 @@ define @vp_cttz_nxv4i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv4i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1272,9 +1240,7 @@ define @vp_cttz_nxv8i32_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv8i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1354,9 +1320,7 @@ define @vp_cttz_nxv16i32_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i32( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv16i32( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1538,9 +1502,7 @@ define @vp_cttz_nxv1i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv1i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1722,9 +1684,7 @@ define @vp_cttz_nxv2i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv2i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -1906,9 +1866,7 @@ define @vp_cttz_nxv4i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv4i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -2090,9 +2048,7 @@ define @vp_cttz_nxv7i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv7i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv7i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -2274,9 +2230,7 @@ define @vp_cttz_nxv8i64_unmasked( %va, i32 ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv8i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -2855,9 +2809,7 @@ define @vp_cttz_nxv16i64_unmasked( %va, i ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i64( %va, i1 false, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv16i64( %va, i1 false, splat (i1 true), i32 %evl) ret %v } @@ -2910,9 +2862,7 @@ define @vp_cttz_zero_undef_nxv1i8_unmasked( % ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv1i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -2966,9 +2916,7 @@ define @vp_cttz_zero_undef_nxv2i8_unmasked( % ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv2i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3022,9 +2970,7 @@ define @vp_cttz_zero_undef_nxv4i8_unmasked( % ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv4i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3078,9 +3024,7 @@ define @vp_cttz_zero_undef_nxv8i8_unmasked( % ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vctz.v v8, v8 ; CHECK-ZVBB-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv8i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3134,9 +3078,7 @@ define @vp_cttz_zero_undef_nxv16i8_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv16i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3199,9 +3141,7 @@ define @vp_cttz_zero_undef_nxv32i8_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv32i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv32i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3264,9 +3204,7 @@ define @vp_cttz_zero_undef_nxv64i8_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv64i8( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv64i8( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3312,9 +3250,7 @@ define @vp_cttz_zero_undef_nxv1i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv1i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3360,9 +3296,7 @@ define @vp_cttz_zero_undef_nxv2i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv2i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3408,9 +3342,7 @@ define @vp_cttz_zero_undef_nxv4i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv4i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3456,9 +3388,7 @@ define @vp_cttz_zero_undef_nxv8i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv8i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3504,9 +3434,7 @@ define @vp_cttz_zero_undef_nxv16i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv16i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3583,9 +3511,7 @@ define @vp_cttz_zero_undef_nxv32i16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv32i16( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3633,9 +3559,7 @@ define @vp_cttz_zero_undef_nxv1i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv1i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3683,9 +3607,7 @@ define @vp_cttz_zero_undef_nxv2i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv2i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3733,9 +3655,7 @@ define @vp_cttz_zero_undef_nxv4i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv4i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3783,9 +3703,7 @@ define @vp_cttz_zero_undef_nxv8i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv8i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3832,9 +3750,7 @@ define @vp_cttz_zero_undef_nxv16i32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv16i32( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3883,9 +3799,7 @@ define @vp_cttz_zero_undef_nxv1i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv1i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3934,9 +3848,7 @@ define @vp_cttz_zero_undef_nxv2i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv2i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -3985,9 +3897,7 @@ define @vp_cttz_zero_undef_nxv4i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv4i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -4036,9 +3946,7 @@ define @vp_cttz_zero_undef_nxv7i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv7i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -4087,9 +3995,7 @@ define @vp_cttz_zero_undef_nxv8i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv8i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } @@ -4227,9 +4133,7 @@ define @vp_cttz_zero_undef_nxv16i64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) + %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll index f1ac9aa53f579..3b7952f9f5e6d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -523,9 +523,7 @@ define float @extractelt_fadd_nxv4f32_splat( %x) { ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fadd.s fa0, fa5, fa4 ; CHECK-NEXT: ret - %head = insertelement poison, float 3.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = fadd %x, %splat + %bo = fadd %x, splat (float 3.0) %ext = extractelement %bo, i32 2 ret float %ext } @@ -540,9 +538,7 @@ define float @extractelt_fsub_nxv4f32_splat( %x) { ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fsub.s fa0, fa4, fa5 ; CHECK-NEXT: ret - %head = insertelement poison, float 3.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = fsub %splat, %x + %bo = fsub splat (float 3.0), %x %ext = extractelement %bo, i32 1 ret float %ext } @@ -557,9 +553,7 @@ define float @extractelt_fmul_nxv4f32_splat( %x) { ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fmul.s fa0, fa5, fa4 ; CHECK-NEXT: ret - %head = insertelement poison, float 3.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = fmul %x, %splat + %bo = fmul %x, splat (float 3.0) %ext = extractelement %bo, i32 3 ret float %ext } @@ -573,9 +567,7 @@ define float @extractelt_fdiv_nxv4f32_splat( %x) { ; CHECK-NEXT: fmv.w.x fa4, a0 ; CHECK-NEXT: fdiv.s fa0, fa5, fa4 ; CHECK-NEXT: ret - %head = insertelement poison, float 3.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = fdiv %x, %splat + %bo = fdiv %x, splat (float 3.0) %ext = extractelement %bo, i32 0 ret float %ext } diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll index d9fdec3041cb0..df9949e617b80 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -753,9 +753,7 @@ define i32 @extractelt_add_nxv4i32_splat( %x) { ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: addi a0, a0, 3 ; CHECK-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = add %x, %splat + %bo = add %x, splat (i32 3) %ext = extractelement %bo, i32 2 ret i32 %ext } @@ -769,9 +767,7 @@ define i32 @extractelt_sub_nxv4i32_splat( %x) { ; CHECK-NEXT: li a1, 3 ; CHECK-NEXT: sub a0, a1, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = sub %splat, %x + %bo = sub splat (i32 3), %x %ext = extractelement %bo, i32 1 ret i32 %ext } @@ -795,9 +791,7 @@ define i32 @extractelt_mul_nxv4i32_splat( %x) { ; RV32M-NEXT: slli a1, a0, 1 ; RV32M-NEXT: add a0, a1, a0 ; RV32M-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = mul %x, %splat + %bo = mul %x, splat (i32 3) %ext = extractelement %bo, i32 3 ret i32 %ext } @@ -824,9 +818,7 @@ define i32 @extractelt_sdiv_nxv4i32_splat( %x) { ; RV32M-NEXT: srli a1, a0, 31 ; RV32M-NEXT: add a0, a0, a1 ; RV32M-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = sdiv %x, %splat + %bo = sdiv %x, splat (i32 3) %ext = extractelement %bo, i32 0 ret i32 %ext } @@ -853,9 +845,7 @@ define i32 @extractelt_udiv_nxv4i32_splat( %x) { ; RV32M-NEXT: srli a1, a0, 31 ; RV32M-NEXT: add a0, a0, a1 ; RV32M-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = sdiv %x, %splat + %bo = sdiv %x, splat (i32 3) %ext = extractelement %bo, i32 0 ret i32 %ext } diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll index fcee77ae8d8e9..a96cf5807e6c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -737,9 +737,7 @@ define i32 @extractelt_add_nxv4i32_splat( %x) { ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: addiw a0, a0, 3 ; CHECK-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = add %x, %splat + %bo = add %x, splat (i32 3) %ext = extractelement %bo, i32 2 ret i32 %ext } @@ -753,9 +751,7 @@ define i32 @extractelt_sub_nxv4i32_splat( %x) { ; CHECK-NEXT: li a1, 3 ; CHECK-NEXT: subw a0, a1, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = sub %splat, %x + %bo = sub splat (i32 3), %x %ext = extractelement %bo, i32 1 ret i32 %ext } @@ -779,9 +775,7 @@ define i32 @extractelt_mul_nxv4i32_splat( %x) { ; RV64M-NEXT: slli a1, a0, 1 ; RV64M-NEXT: addw a0, a1, a0 ; RV64M-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = mul %x, %splat + %bo = mul %x, splat (i32 3) %ext = extractelement %bo, i32 3 ret i32 %ext } @@ -809,9 +803,7 @@ define i32 @extractelt_sdiv_nxv4i32_splat( %x) { ; RV64M-NEXT: srli a0, a0, 32 ; RV64M-NEXT: addw a0, a0, a1 ; RV64M-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = sdiv %x, %splat + %bo = sdiv %x, splat (i32 3) %ext = extractelement %bo, i32 0 ret i32 %ext } @@ -839,9 +831,7 @@ define i32 @extractelt_udiv_nxv4i32_splat( %x) { ; RV64M-NEXT: srli a0, a0, 32 ; RV64M-NEXT: addw a0, a0, a1 ; RV64M-NEXT: ret - %head = insertelement poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %bo = sdiv %x, %splat + %bo = sdiv %x, splat (i32 3) %ext = extractelement %bo, i32 0 ret i32 %ext } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll index c273dcdfbca17..c0d366760d079 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll @@ -24,9 +24,7 @@ define <2 x i8> @vp_abs_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.abs.v2i8(<2 x i8> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -50,9 +48,7 @@ define <4 x i8> @vp_abs_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.abs.v4i8(<4 x i8> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -76,9 +72,7 @@ define <8 x i8> @vp_abs_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.abs.v8i8(<8 x i8> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -102,9 +96,7 @@ define <16 x i8> @vp_abs_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.abs.v16i8(<16 x i8> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -128,9 +120,7 @@ define <2 x i16> @vp_abs_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.abs.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.abs.v2i16(<2 x i16> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -154,9 +144,7 @@ define <4 x i16> @vp_abs_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.abs.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.abs.v4i16(<4 x i16> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -180,9 +168,7 @@ define <8 x i16> @vp_abs_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.abs.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.abs.v8i16(<8 x i16> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -206,9 +192,7 @@ define <16 x i16> @vp_abs_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.abs.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.abs.v16i16(<16 x i16> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -232,9 +216,7 @@ define <2 x i32> @vp_abs_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.abs.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.abs.v2i32(<2 x i32> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -258,9 +240,7 @@ define <4 x i32> @vp_abs_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.abs.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.abs.v4i32(<4 x i32> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -284,9 +264,7 @@ define <8 x i32> @vp_abs_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.abs.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.abs.v8i32(<8 x i32> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -310,9 +288,7 @@ define <16 x i32> @vp_abs_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v12, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.abs.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.abs.v16i32(<16 x i32> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -336,9 +312,7 @@ define <2 x i64> @vp_abs_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v9, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.abs.v2i64(<2 x i64> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -362,9 +336,7 @@ define <4 x i64> @vp_abs_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v10, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.abs.v4i64(<4 x i64> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -388,9 +360,7 @@ define <8 x i64> @vp_abs_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v12, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.abs.v8i64(<8 x i64> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -414,9 +384,7 @@ define <15 x i64> @vp_abs_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v16, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.abs.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) + %v = call <15 x i64> @llvm.vp.abs.v15i64(<15 x i64> %va, i1 false, <15 x i1> splat (i1 true), i32 %evl) ret <15 x i64> %v } @@ -440,9 +408,7 @@ define <16 x i64> @vp_abs_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v16, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.abs.v16i64(<16 x i64> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -495,8 +461,6 @@ define <32 x i64> @vp_abs_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vrsub.vi v24, v16, 0 ; CHECK-NEXT: vmax.vv v16, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.abs.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.abs.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 595c650ffb82a..943fc58d637a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -54,9 +54,7 @@ define <2 x i8> @vp_bitreverse_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.bitreverse.v2i8(<2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -110,9 +108,7 @@ define <4 x i8> @vp_bitreverse_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.bitreverse.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.bitreverse.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -166,9 +162,7 @@ define <8 x i8> @vp_bitreverse_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.bitreverse.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.bitreverse.v8i8(<8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -222,9 +216,7 @@ define <16 x i8> @vp_bitreverse_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.bitreverse.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.bitreverse.v16i8(<16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -292,9 +284,7 @@ define <2 x i16> @vp_bitreverse_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.bitreverse.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -362,9 +352,7 @@ define <4 x i16> @vp_bitreverse_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.bitreverse.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -432,9 +420,7 @@ define <8 x i16> @vp_bitreverse_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.bitreverse.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -502,9 +488,7 @@ define <16 x i16> @vp_bitreverse_v16i16_unmasked(<16 x i16> %va, i32 zeroext %ev ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v10, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.bitreverse.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -588,9 +572,7 @@ define <2 x i32> @vp_bitreverse_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.bitreverse.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -674,9 +656,7 @@ define <4 x i32> @vp_bitreverse_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.bitreverse.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -760,9 +740,7 @@ define <8 x i32> @vp_bitreverse_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v10, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.bitreverse.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -846,9 +824,7 @@ define <16 x i32> @vp_bitreverse_v16i32_unmasked(<16 x i32> %va, i32 zeroext %ev ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: vor.vv v8, v12, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.bitreverse.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1120,9 +1096,7 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.bitreverse.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1394,9 +1368,7 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.bitreverse.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1668,9 +1640,7 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.bitreverse.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -2034,9 +2004,7 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x i64> %v } @@ -2400,9 +2368,7 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.bitreverse.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.bitreverse.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -2555,8 +2521,6 @@ define <128 x i16> @vp_bitreverse_v128i16_unmasked(<128 x i16> %va, i32 zeroext ; CHECK-NEXT: vadd.vv v16, v16, v16 ; CHECK-NEXT: vor.vv v16, v24, v16 ; CHECK-NEXT: ret - %head = insertelement <128 x i1> poison, i1 true, i32 0 - %m = shufflevector <128 x i1> %head, <128 x i1> poison, <128 x i32> zeroinitializer - %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl) + %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> splat (i1 true), i32 %evl) ret <128 x i16> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index 6308f73e219da..f80d4e5c0d7c3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -26,9 +26,7 @@ define <2 x i16> @vp_bswap_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.bswap.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -54,9 +52,7 @@ define <4 x i16> @vp_bswap_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.bswap.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -82,9 +78,7 @@ define <8 x i16> @vp_bswap_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.bswap.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -110,9 +104,7 @@ define <16 x i16> @vp_bswap_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.bswap.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -154,9 +146,7 @@ define <2 x i32> @vp_bswap_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.bswap.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -198,9 +188,7 @@ define <4 x i32> @vp_bswap_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.bswap.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -242,9 +230,7 @@ define <8 x i32> @vp_bswap_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vor.vv v8, v8, v12 ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.bswap.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -286,9 +272,7 @@ define <16 x i32> @vp_bswap_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vor.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.bswap.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -446,9 +430,7 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v10 ; RV64-NEXT: vor.vv v8, v9, v8 ; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.bswap.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -606,9 +588,7 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v12 ; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.bswap.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -766,9 +746,7 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v16 ; RV64-NEXT: vor.vv v8, v12, v8 ; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.bswap.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -999,9 +977,7 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.bswap.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x i64> @llvm.vp.bswap.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x i64> %v } @@ -1232,9 +1208,7 @@ define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 ; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.bswap.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1315,8 +1289,6 @@ define <128 x i16> @vp_bswap_v128i16_unmasked(<128 x i16> %va, i32 zeroext %evl) ; CHECK-NEXT: vsll.vi v16, v16, 8 ; CHECK-NEXT: vor.vv v16, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <128 x i1> poison, i1 true, i32 0 - %m = shufflevector <128 x i1> %head, <128 x i1> poison, <128 x i32> zeroinitializer - %v = call <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl) + %v = call <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16> %va, <128 x i1> splat (i1 true), i32 %evl) ret <128 x i16> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index 194179f9f470e..5d024f140fd5d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -86,9 +86,7 @@ define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.ceil.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -170,9 +168,7 @@ define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.ceil.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -256,9 +252,7 @@ define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.ceil.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -344,9 +338,7 @@ define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.ceil.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -388,9 +380,7 @@ define <2 x float> @vp_ceil_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -432,9 +422,7 @@ define <4 x float> @vp_ceil_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -478,9 +466,7 @@ define <8 x float> @vp_ceil_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -524,9 +510,7 @@ define <16 x float> @vp_ceil_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -568,9 +552,7 @@ define <2 x double> @vp_ceil_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -614,9 +596,7 @@ define <4 x double> @vp_ceil_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -660,9 +640,7 @@ define <8 x double> @vp_ceil_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -706,9 +684,7 @@ define <15 x double> @vp_ceil_v15f64_unmasked(<15 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -752,9 +728,7 @@ define <16 x double> @vp_ceil_v16f64_unmasked(<16 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -867,8 +841,6 @@ define <32 x double> @vp_ceil_v32f64_unmasked(<32 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index 36f22bd3259cf..2f4539d5038c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -58,9 +58,7 @@ define <2 x i8> @vp_ctlz_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -118,9 +116,7 @@ define <4 x i8> @vp_ctlz_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -178,9 +174,7 @@ define <8 x i8> @vp_ctlz_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -238,9 +232,7 @@ define <16 x i8> @vp_ctlz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -316,9 +308,7 @@ define <2 x i16> @vp_ctlz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -394,9 +384,7 @@ define <4 x i16> @vp_ctlz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -472,9 +460,7 @@ define <8 x i16> @vp_ctlz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -550,9 +536,7 @@ define <16 x i16> @vp_ctlz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -634,9 +618,7 @@ define <2 x i32> @vp_ctlz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -718,9 +700,7 @@ define <4 x i32> @vp_ctlz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -802,9 +782,7 @@ define <8 x i32> @vp_ctlz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -886,9 +864,7 @@ define <16 x i32> @vp_ctlz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1098,9 +1074,7 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1310,9 +1284,7 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1522,9 +1494,7 @@ define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1764,9 +1734,7 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) + %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> splat (i1 true), i32 %evl) ret <15 x i64> %v } @@ -2006,9 +1974,7 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -2623,9 +2589,7 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -2681,9 +2645,7 @@ define <2 x i8> @vp_ctlz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 true, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ctlz.v2i8(<2 x i8> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -2739,9 +2701,7 @@ define <4 x i8> @vp_ctlz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 true, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ctlz.v4i8(<4 x i8> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -2797,9 +2757,7 @@ define <8 x i8> @vp_ctlz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 true, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ctlz.v8i8(<8 x i8> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -2855,9 +2813,7 @@ define <16 x i8> @vp_ctlz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext % ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 true, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ctlz.v16i8(<16 x i8> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -2931,9 +2887,7 @@ define <2 x i16> @vp_ctlz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ctlz.v2i16(<2 x i16> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -3007,9 +2961,7 @@ define <4 x i16> @vp_ctlz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ctlz.v4i16(<4 x i16> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -3083,9 +3035,7 @@ define <8 x i16> @vp_ctlz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ctlz.v8i16(<8 x i16> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -3159,9 +3109,7 @@ define <16 x i16> @vp_ctlz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ctlz.v16i16(<16 x i16> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -3241,9 +3189,7 @@ define <2 x i32> @vp_ctlz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ctlz.v2i32(<2 x i32> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -3323,9 +3269,7 @@ define <4 x i32> @vp_ctlz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ctlz.v4i32(<4 x i32> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -3405,9 +3349,7 @@ define <8 x i32> @vp_ctlz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ctlz.v8i32(<8 x i32> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -3487,9 +3429,7 @@ define <16 x i32> @vp_ctlz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ctlz.v16i32(<16 x i32> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -3697,9 +3637,7 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext % ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ctlz.v2i64(<2 x i64> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -3907,9 +3845,7 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext % ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ctlz.v4i64(<4 x i64> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -4117,9 +4053,7 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ctlz.v8i64(<8 x i64> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -4357,9 +4291,7 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl) + %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 true, <15 x i1> splat (i1 true), i32 %evl) ret <15 x i64> %v } @@ -4597,9 +4529,7 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -5212,8 +5142,6 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 true, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index c4b22955f84c4..0b6d8b33394d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -44,9 +44,7 @@ define <2 x i8> @vp_ctpop_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ctpop.v2i8(<2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -90,9 +88,7 @@ define <4 x i8> @vp_ctpop_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ctpop.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -136,9 +132,7 @@ define <8 x i8> @vp_ctpop_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ctpop.v8i8(<8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -182,9 +176,7 @@ define <16 x i8> @vp_ctpop_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ctpop.v16i8(<16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -242,9 +234,7 @@ define <2 x i16> @vp_ctpop_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ctpop.v2i16(<2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -302,9 +292,7 @@ define <4 x i16> @vp_ctpop_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ctpop.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -362,9 +350,7 @@ define <8 x i16> @vp_ctpop_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ctpop.v8i16(<8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -422,9 +408,7 @@ define <16 x i16> @vp_ctpop_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ctpop.v16i16(<16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -484,9 +468,7 @@ define <2 x i32> @vp_ctpop_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ctpop.v2i32(<2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -546,9 +528,7 @@ define <4 x i32> @vp_ctpop_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ctpop.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -608,9 +588,7 @@ define <8 x i32> @vp_ctpop_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ctpop.v8i32(<8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -670,9 +648,7 @@ define <16 x i32> @vp_ctpop_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ctpop.v16i32(<16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -826,9 +802,7 @@ define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -982,9 +956,7 @@ define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1138,9 +1110,7 @@ define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1324,9 +1294,7 @@ define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x i64> %v } @@ -1510,9 +1478,7 @@ define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1962,8 +1928,6 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vmul.vx v16, v16, a4 ; RV64-NEXT: vsrl.vx v16, v16, a5 ; RV64-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index 49f6ffd691292..f2926fa91e5c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -52,9 +52,7 @@ define <2 x i8> @vp_cttz_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -106,9 +104,7 @@ define <4 x i8> @vp_cttz_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -160,9 +156,7 @@ define <8 x i8> @vp_cttz_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -214,9 +208,7 @@ define <16 x i8> @vp_cttz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -282,9 +274,7 @@ define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -350,9 +340,7 @@ define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -418,9 +406,7 @@ define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -486,9 +472,7 @@ define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -556,9 +540,7 @@ define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -626,9 +608,7 @@ define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -696,9 +676,7 @@ define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -766,9 +744,7 @@ define <16 x i32> @vp_cttz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -938,9 +914,7 @@ define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1110,9 +1084,7 @@ define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1282,9 +1254,7 @@ define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1484,9 +1454,7 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) + %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> splat (i1 true), i32 %evl) ret <15 x i64> %v } @@ -1686,9 +1654,7 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -2223,9 +2189,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -2275,9 +2239,7 @@ define <2 x i8> @vp_cttz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 true, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -2327,9 +2289,7 @@ define <4 x i8> @vp_cttz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 true, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -2379,9 +2339,7 @@ define <8 x i8> @vp_cttz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 true, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -2431,9 +2389,7 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext % ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 true, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -2497,9 +2453,7 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -2563,9 +2517,7 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -2629,9 +2581,7 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -2695,9 +2645,7 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -2763,9 +2711,7 @@ define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -2831,9 +2777,7 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -2899,9 +2843,7 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext % ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -2967,9 +2909,7 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -3137,9 +3077,7 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext % ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -3307,9 +3245,7 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext % ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -3477,9 +3413,7 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -3677,9 +3611,7 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl) + %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> splat (i1 true), i32 %evl) ret <15 x i64> %v } @@ -3877,9 +3809,7 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -4412,8 +4342,6 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 583742224f8cf..6c2be509f7c22 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -86,9 +86,7 @@ define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.floor.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -170,9 +168,7 @@ define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.floor.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -256,9 +252,7 @@ define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.floor.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -344,9 +338,7 @@ define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.floor.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -388,9 +380,7 @@ define <2 x float> @vp_floor_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -432,9 +422,7 @@ define <4 x float> @vp_floor_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -478,9 +466,7 @@ define <8 x float> @vp_floor_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -524,9 +510,7 @@ define <16 x float> @vp_floor_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -568,9 +552,7 @@ define <2 x double> @vp_floor_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -614,9 +596,7 @@ define <4 x double> @vp_floor_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -660,9 +640,7 @@ define <8 x double> @vp_floor_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -706,9 +684,7 @@ define <15 x double> @vp_floor_v15f64_unmasked(<15 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -752,9 +728,7 @@ define <16 x double> @vp_floor_v16f64_unmasked(<16 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -867,8 +841,6 @@ define <32 x double> @vp_floor_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index 3b7480117d375..edb33158e32eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -76,9 +76,7 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.maximum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -150,9 +148,7 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.maximum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -226,9 +222,7 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.maximum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -304,9 +298,7 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.maximum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -340,9 +332,7 @@ define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -376,9 +366,7 @@ define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -414,9 +402,7 @@ define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v14 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -452,9 +438,7 @@ define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v20 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -488,9 +472,7 @@ define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -526,9 +508,7 @@ define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v14 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -564,9 +544,7 @@ define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v20 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -615,9 +593,7 @@ define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -808,8 +784,6 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.maximum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.maximum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 57275df57a311..48649c43f782a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -76,9 +76,7 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.minimum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.minimum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -150,9 +148,7 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.minimum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.minimum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -226,9 +222,7 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.minimum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.minimum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -304,9 +298,7 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.minimum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.minimum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -340,9 +332,7 @@ define <2 x float> @vfmin_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -376,9 +366,7 @@ define <4 x float> @vfmin_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -414,9 +402,7 @@ define <8 x float> @vfmin_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v14 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -452,9 +438,7 @@ define <16 x float> @vfmin_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v20 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -488,9 +472,7 @@ define <2 x double> @vfmin_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -526,9 +508,7 @@ define <4 x double> @vfmin_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v14 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -564,9 +544,7 @@ define <8 x double> @vfmin_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v20 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -615,9 +593,7 @@ define <16 x double> @vfmin_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -808,8 +784,6 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.minimum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.minimum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll index dc907eed16cce..97842c294036d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll @@ -87,9 +87,7 @@ define void @splat_zero_v8f16(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x half> poison, half 0.0, i32 0 - %b = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer - store <8 x half> %b, ptr %x + store <8 x half> splat (half 0.0), ptr %x ret void } @@ -100,9 +98,7 @@ define void @splat_zero_v4f32(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x float> poison, float 0.0, i32 0 - %b = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> zeroinitializer - store <4 x float> %b, ptr %x + store <4 x float> splat (float 0.0), ptr %x ret void } @@ -113,9 +109,7 @@ define void @splat_zero_v2f64(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <2 x double> poison, double 0.0, i32 0 - %b = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> zeroinitializer - store <2 x double> %b, ptr %x + store <2 x double> splat (double 0.0), ptr %x ret void } @@ -126,9 +120,7 @@ define void @splat_zero_16f16(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <16 x half> poison, half 0.0, i32 0 - %b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer - store <16 x half> %b, ptr %x + store <16 x half> splat (half 0.0), ptr %x ret void } @@ -139,9 +131,7 @@ define void @splat_zero_v8f32(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x float> poison, float 0.0, i32 0 - %b = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer - store <8 x float> %b, ptr %x + store <8 x float> splat (float 0.0), ptr %x ret void } @@ -152,9 +142,7 @@ define void @splat_zero_v4f64(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x double> poison, double 0.0, i32 0 - %b = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> zeroinitializer - store <4 x double> %b, ptr %x + store <4 x double> splat (double 0.0), ptr %x ret void } @@ -166,9 +154,7 @@ define void @splat_negzero_v8f16(ptr %x) { ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x half> poison, half -0.0, i32 0 - %b = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> zeroinitializer - store <8 x half> %b, ptr %x + store <8 x half> splat (half -0.0), ptr %x ret void } @@ -180,9 +166,7 @@ define void @splat_negzero_v4f32(ptr %x) { ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x float> poison, float -0.0, i32 0 - %b = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> zeroinitializer - store <4 x float> %b, ptr %x + store <4 x float> splat (float -0.0), ptr %x ret void } @@ -204,9 +188,7 @@ define void @splat_negzero_v2f64(ptr %x) { ; CHECK-RV64-NEXT: vmv.v.x v8, a1 ; CHECK-RV64-NEXT: vse64.v v8, (a0) ; CHECK-RV64-NEXT: ret - %a = insertelement <2 x double> poison, double -0.0, i32 0 - %b = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> zeroinitializer - store <2 x double> %b, ptr %x + store <2 x double> splat (double -0.0), ptr %x ret void } @@ -218,9 +200,7 @@ define void @splat_negzero_16f16(ptr %x) { ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <16 x half> poison, half -0.0, i32 0 - %b = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> zeroinitializer - store <16 x half> %b, ptr %x + store <16 x half> splat (half -0.0), ptr %x ret void } @@ -232,9 +212,7 @@ define void @splat_negzero_v8f32(ptr %x) { ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x float> poison, float -0.0, i32 0 - %b = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer - store <8 x float> %b, ptr %x + store <8 x float> splat (float -0.0), ptr %x ret void } @@ -256,8 +234,6 @@ define void @splat_negzero_v4f64(ptr %x) { ; CHECK-RV64-NEXT: vmv.v.x v8, a1 ; CHECK-RV64-NEXT: vse64.v v8, (a0) ; CHECK-RV64-NEXT: ret - %a = insertelement <4 x double> poison, double -0.0, i32 0 - %b = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> zeroinitializer - store <4 x double> %b, ptr %x + store <4 x double> splat (double -0.0), ptr %x ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll index 5de28a0d722d0..51ac27acaf470 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll @@ -24,7 +24,7 @@ define <2 x float> @vfpext_v2f16_v2f32_unmasked(<2 x half> %a, i32 zeroext %vl) ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + %v = call <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half> %a, <2 x i1> splat (i1 true), i32 %vl) ret <2 x float> %v } @@ -50,7 +50,7 @@ define <2 x double> @vfpext_v2f16_v2f64_unmasked(<2 x half> %a, i32 zeroext %vl) ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v8, v9 ; CHECK-NEXT: ret - %v = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + %v = call <2 x double> @llvm.vp.fpext.v2f64.v2f16(<2 x half> %a, <2 x i1> splat (i1 true), i32 %vl) ret <2 x double> %v } @@ -74,7 +74,7 @@ define <2 x double> @vfpext_v2f32_v2f64_unmasked(<2 x float> %a, i32 zeroext %vl ; CHECK-NEXT: vfwcvt.f.f.v v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + %v = call <2 x double> @llvm.vp.fpext.v2f64.v2f32(<2 x float> %a, <2 x i1> splat (i1 true), i32 %vl) ret <2 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll index dab4b4d9926e1..602662b184290 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll @@ -42,7 +42,7 @@ define <4 x i1> @vfptosi_v4i1_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9 ; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 ; ZVFHMIN-NEXT: ret - %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i1> %v } @@ -66,7 +66,7 @@ define <4 x i1> @vfptosi_v4i1_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i1> %v } @@ -91,6 +91,6 @@ define <4 x i1> @vfptosi_v4i1_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i1> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll index c673e396914bf..49a1b19b58a27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -67,7 +67,7 @@ define <4 x i8> @vfptosi_v4i8_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; ZVFHMIN-NEXT: vnsrl.wi v8, v8, 0 ; ZVFHMIN-NEXT: ret - %v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -105,7 +105,7 @@ define <4 x i16> @vfptosi_v4i16_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -145,7 +145,7 @@ define <4 x i32> @vfptosi_v4i32_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v9 ; ZVFHMIN-NEXT: ret - %v = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -187,7 +187,7 @@ define <4 x i64> @vfptosi_v4i64_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.rtz.x.f.v v8, v10 ; ZVFHMIN-NEXT: ret - %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -213,7 +213,7 @@ define <4 x i8> @vfptosi_v4i8_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v9, 0 ; CHECK-NEXT: ret - %v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -237,7 +237,7 @@ define <4 x i16> @vfptosi_v4i16_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -259,7 +259,7 @@ define <4 x i32> @vfptosi_v4i32_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -283,7 +283,7 @@ define <4 x i64> @vfptosi_v4i64_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vfwcvt.rtz.x.f.v v10, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -313,7 +313,7 @@ define <4 x i8> @vfptosi_v4i8_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: ret - %v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i8> @llvm.vp.fptosi.v4i8.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -339,7 +339,7 @@ define <4 x i16> @vfptosi_v4i16_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-NEXT: ret - %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.fptosi.v4i16.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -363,7 +363,7 @@ define <4 x i32> @vfptosi_v4i32_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.fptosi.v4i32.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -385,7 +385,7 @@ define <4 x i64> @vfptosi_v4i64_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.fptosi.v4i64.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -434,6 +434,6 @@ define <32 x i64> @vfptosi_v32i64_v32f64_unmasked(<32 x double> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.x.f.v v16, v16 ; CHECK-NEXT: ret - %v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) + %v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll index f1a78b25e1862..c5bfd41ec9510 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll @@ -42,7 +42,7 @@ define <4 x i1> @vfptoui_v4i1_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vfcvt.rtz.xu.f.v v8, v9 ; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 ; ZVFHMIN-NEXT: ret - %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i1> %v } @@ -66,7 +66,7 @@ define <4 x i1> @vfptoui_v4i1_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i1> %v } @@ -91,6 +91,6 @@ define <4 x i1> @vfptoui_v4i1_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i1> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll index 0a19dcb550b58..d44efa2f6133f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -67,7 +67,7 @@ define <4 x i8> @vfptoui_v4i8_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; ZVFHMIN-NEXT: vnsrl.wi v8, v8, 0 ; ZVFHMIN-NEXT: ret - %v = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -105,7 +105,7 @@ define <4 x i16> @vfptoui_v4i16_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -145,7 +145,7 @@ define <4 x i32> @vfptoui_v4i32_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.rtz.xu.f.v v8, v9 ; ZVFHMIN-NEXT: ret - %v = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -187,7 +187,7 @@ define <4 x i64> @vfptoui_v4i64_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.rtz.xu.f.v v8, v10 ; ZVFHMIN-NEXT: ret - %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -213,7 +213,7 @@ define <4 x i8> @vfptoui_v4i8_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v9, 0 ; CHECK-NEXT: ret - %v = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -237,7 +237,7 @@ define <4 x i16> @vfptoui_v4i16_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -259,7 +259,7 @@ define <4 x i32> @vfptoui_v4i32_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -283,7 +283,7 @@ define <4 x i64> @vfptoui_v4i64_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vfwcvt.rtz.xu.f.v v10, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -313,7 +313,7 @@ define <4 x i8> @vfptoui_v4i8_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: ret - %v = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i8> @llvm.vp.fptoui.v4i8.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -339,7 +339,7 @@ define <4 x i16> @vfptoui_v4i16_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-NEXT: ret - %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.fptoui.v4i16.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -363,7 +363,7 @@ define <4 x i32> @vfptoui_v4i32_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.fptoui.v4i32.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -385,7 +385,7 @@ define <4 x i64> @vfptoui_v4i64_v4f64_unmasked(<4 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.fptoui.v4i64.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -434,6 +434,6 @@ define <32 x i64> @vfptoui_v32i64_v32f64_unmasked(<32 x double> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.rtz.xu.f.v v16, v16 ; CHECK-NEXT: ret - %v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) + %v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll index 0d5b59b087b40..de11f9e8a9fa2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -25,7 +25,7 @@ define <2 x half> @vfptrunc_v2f16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl ; CHECK-NEXT: vfncvt.f.f.w v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float> %a, <2 x i1> splat (i1 true), i32 %vl) ret <2 x half> %v } @@ -51,7 +51,7 @@ define <2 x half> @vfptrunc_v2f16_v2f64_unmasked(<2 x double> %a, i32 zeroext %v ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v8, v9 ; CHECK-NEXT: ret - %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + %v = call <2 x half> @llvm.vp.fptrunc.v2f16.v2f64(<2 x double> %a, <2 x i1> splat (i1 true), i32 %vl) ret <2 x half> %v } @@ -75,7 +75,7 @@ define <2 x float> @vfptrunc_v2f32_v2f64_unmasked(<2 x double> %a, i32 zeroext % ; CHECK-NEXT: vfncvt.f.f.w v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + %v = call <2 x float> @llvm.vp.fptrunc.v2f64.v2f32(<2 x double> %a, <2 x i1> splat (i1 true), i32 %vl) ret <2 x float> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll index 4da778a354726..0b08d9401402b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll @@ -523,9 +523,7 @@ define void @seteq_vi_v16i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 0, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = icmp eq <16 x i8> %a, %c + %d = icmp eq <16 x i8> %a, splat (i8 0) store <16 x i1> %d, ptr %z ret void } @@ -540,9 +538,7 @@ define void @setne_vi_v32i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v10, (a1) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x - %b = insertelement <32 x i8> poison, i8 0, i32 0 - %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer - %d = icmp ne <32 x i8> %a, %c + %d = icmp ne <32 x i8> %a, splat (i8 0) store <32 x i1> %d, ptr %z ret void } @@ -557,9 +553,7 @@ define void @setgt_vi_v64i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v12, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x - %b = insertelement <64 x i8> poison, i8 0, i32 0 - %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer - %d = icmp sgt <64 x i8> %a, %c + %d = icmp sgt <64 x i8> %a, splat (i8 0) store <64 x i1> %d, ptr %z ret void } @@ -574,9 +568,7 @@ define void @setgt_vi_v64i8_nonzero(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v12, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x - %b = insertelement <64 x i8> poison, i8 5, i32 0 - %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer - %d = icmp sgt <64 x i8> %a, %c + %d = icmp sgt <64 x i8> %a, splat (i8 5) store <64 x i1> %d, ptr %z ret void } @@ -591,9 +583,7 @@ define void @setlt_vi_v128i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v16, (a1) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x - %b = insertelement <128 x i8> poison, i8 0, i32 0 - %c = shufflevector <128 x i8> %b, <128 x i8> poison, <128 x i32> zeroinitializer - %d = icmp slt <128 x i8> %a, %c + %d = icmp slt <128 x i8> %a, splat (i8 0) store <128 x i1> %d, ptr %z ret void } @@ -607,9 +597,7 @@ define void @setge_vi_v8i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <8 x i8>, ptr %x - %b = insertelement <8 x i8> poison, i8 0, i32 0 - %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer - %d = icmp sge <8 x i8> %a, %c + %d = icmp sge <8 x i8> %a, splat (i8 0) store <8 x i1> %d, ptr %z ret void } @@ -623,9 +611,7 @@ define void @setle_vi_v16i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 0, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = icmp sle <16 x i8> %a, %c + %d = icmp sle <16 x i8> %a, splat (i8 0) store <16 x i1> %d, ptr %z ret void } @@ -640,9 +626,7 @@ define void @setugt_vi_v32i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v10, (a1) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x - %b = insertelement <32 x i8> poison, i8 5, i32 0 - %c = shufflevector <32 x i8> %b, <32 x i8> poison, <32 x i32> zeroinitializer - %d = icmp ugt <32 x i8> %a, %c + %d = icmp ugt <32 x i8> %a, splat (i8 5) store <32 x i1> %d, ptr %z ret void } @@ -657,9 +641,7 @@ define void @setult_vi_v64i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v12, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x - %b = insertelement <64 x i8> poison, i8 5, i32 0 - %c = shufflevector <64 x i8> %b, <64 x i8> poison, <64 x i32> zeroinitializer - %d = icmp ult <64 x i8> %a, %c + %d = icmp ult <64 x i8> %a, splat (i8 5) store <64 x i1> %d, ptr %z ret void } @@ -674,9 +656,7 @@ define void @setuge_vi_v128i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v16, (a1) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x - %b = insertelement <128 x i8> poison, i8 5, i32 0 - %c = shufflevector <128 x i8> %b, <128 x i8> poison, <128 x i32> zeroinitializer - %d = icmp uge <128 x i8> %a, %c + %d = icmp uge <128 x i8> %a, splat (i8 5) store <128 x i1> %d, ptr %z ret void } @@ -690,9 +670,7 @@ define void @setule_vi_v8i8(ptr %x, ptr %z) { ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <8 x i8>, ptr %x - %b = insertelement <8 x i8> poison, i8 5, i32 0 - %c = shufflevector <8 x i8> %b, <8 x i8> poison, <8 x i32> zeroinitializer - %d = icmp ule <8 x i8> %a, %c + %d = icmp ule <8 x i8> %a, splat (i8 5) store <8 x i1> %d, ptr %z ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll index 60202cfba760d..649aa067b01af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -140,9 +140,7 @@ define void @splat_zero_v16i8(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <16 x i8> poison, i8 0, i32 0 - %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer - store <16 x i8> %b, ptr %x + store <16 x i8> splat (i8 0), ptr %x ret void } @@ -153,9 +151,7 @@ define void @splat_zero_v8i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x i16> poison, i16 0, i32 0 - %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer - store <8 x i16> %b, ptr %x + store <8 x i16> splat (i16 0), ptr %x ret void } @@ -166,9 +162,7 @@ define void @splat_zero_v4i32(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x i32> poison, i32 0, i32 0 - %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer - store <4 x i32> %b, ptr %x + store <4 x i32> splat (i32 0), ptr %x ret void } @@ -179,9 +173,7 @@ define void @splat_zero_v2i64(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <2 x i64> poison, i64 0, i32 0 - %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer - store <2 x i64> %b, ptr %x + store <2 x i64> splat (i64 0), ptr %x ret void } @@ -193,9 +185,7 @@ define void @splat_zero_v32i8(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <32 x i8> poison, i8 0, i32 0 - %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer - store <32 x i8> %b, ptr %x + store <32 x i8> splat (i8 0), ptr %x ret void } @@ -206,9 +196,7 @@ define void @splat_zero_v16i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <16 x i16> poison, i16 0, i32 0 - %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer - store <16 x i16> %b, ptr %x + store <16 x i16> splat (i16 0), ptr %x ret void } @@ -219,9 +207,7 @@ define void @splat_zero_v8i32(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x i32> poison, i32 0, i32 0 - %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer - store <8 x i32> %b, ptr %x + store <8 x i32> splat (i32 0), ptr %x ret void } @@ -232,9 +218,7 @@ define void @splat_zero_v4i64(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x i64> poison, i64 0, i32 0 - %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer - store <4 x i64> %b, ptr %x + store <4 x i64> splat (i64 0), ptr %x ret void } @@ -311,9 +295,7 @@ define void @splat_allones_v16i8(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <16 x i8> poison, i8 -1, i32 0 - %b = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> zeroinitializer - store <16 x i8> %b, ptr %x + store <16 x i8> splat (i8 -1), ptr %x ret void } @@ -324,9 +306,7 @@ define void @splat_allones_v8i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x i16> poison, i16 -1, i32 0 - %b = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> zeroinitializer - store <8 x i16> %b, ptr %x + store <8 x i16> splat (i16 -1), ptr %x ret void } @@ -337,9 +317,7 @@ define void @splat_allones_v4i32(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x i32> poison, i32 -1, i32 0 - %b = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> zeroinitializer - store <4 x i32> %b, ptr %x + store <4 x i32> splat (i32 -1), ptr %x ret void } @@ -350,9 +328,7 @@ define void @splat_allones_v2i64(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <2 x i64> poison, i64 -1, i32 0 - %b = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> zeroinitializer - store <2 x i64> %b, ptr %x + store <2 x i64> splat (i64 -1), ptr %x ret void } @@ -364,9 +340,7 @@ define void @splat_allones_v32i8(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <32 x i8> poison, i8 -1, i32 0 - %b = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> zeroinitializer - store <32 x i8> %b, ptr %x + store <32 x i8> splat (i8 -1), ptr %x ret void } @@ -377,9 +351,7 @@ define void @splat_allones_v16i16(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <16 x i16> poison, i16 -1, i32 0 - %b = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer - store <16 x i16> %b, ptr %x + store <16 x i16> splat (i16 -1), ptr %x ret void } @@ -390,9 +362,7 @@ define void @splat_allones_v8i32(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x i32> poison, i32 -1, i32 0 - %b = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer - store <8 x i32> %b, ptr %x + store <8 x i32> splat (i32 -1), ptr %x ret void } @@ -403,9 +373,7 @@ define void @splat_allones_v4i64(ptr %x) { ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x i64> poison, i64 -1, i32 0 - %b = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> zeroinitializer - store <4 x i64> %b, ptr %x + store <4 x i64> splat (i64 -1), ptr %x ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 175b110538ffb..03e99baf91c08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -3935,9 +3935,7 @@ define void @add_vi_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 -1, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = add <16 x i8> %a, %c + %d = add <16 x i8> %a, splat (i8 -1) store <16 x i8> %d, ptr %x ret void } @@ -3951,9 +3949,7 @@ define void @add_vi_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 -1, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = add <8 x i16> %a, %c + %d = add <8 x i16> %a, splat (i16 -1) store <8 x i16> %d, ptr %x ret void } @@ -3967,9 +3963,7 @@ define void @add_vi_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 -1, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = add <4 x i32> %a, %c + %d = add <4 x i32> %a, splat (i32 -1) store <4 x i32> %d, ptr %x ret void } @@ -3983,9 +3977,7 @@ define void @add_vi_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 -1, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = add <2 x i64> %a, %c + %d = add <2 x i64> %a, splat (i64 -1) store <2 x i64> %d, ptr %x ret void } @@ -3999,9 +3991,7 @@ define void @add_iv_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 1, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = add <16 x i8> %c, %a + %d = add <16 x i8> splat (i8 1), %a store <16 x i8> %d, ptr %x ret void } @@ -4015,9 +4005,7 @@ define void @add_iv_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 1, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = add <8 x i16> %c, %a + %d = add <8 x i16> splat (i16 1), %a store <8 x i16> %d, ptr %x ret void } @@ -4031,9 +4019,7 @@ define void @add_iv_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 1, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = add <4 x i32> %c, %a + %d = add <4 x i32> splat (i32 1), %a store <4 x i32> %d, ptr %x ret void } @@ -4047,9 +4033,7 @@ define void @add_iv_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 1, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = add <2 x i64> %c, %a + %d = add <2 x i64> splat (i64 1), %a store <2 x i64> %d, ptr %x ret void } @@ -4160,9 +4144,7 @@ define void @sub_vi_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 -1, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = sub <16 x i8> %a, %c + %d = sub <16 x i8> %a, splat (i8 -1) store <16 x i8> %d, ptr %x ret void } @@ -4177,9 +4159,7 @@ define void @sub_vi_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 -1, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = sub <8 x i16> %a, %c + %d = sub <8 x i16> %a, splat (i16 -1) store <8 x i16> %d, ptr %x ret void } @@ -4194,9 +4174,7 @@ define void @sub_vi_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 -1, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = sub <4 x i32> %a, %c + %d = sub <4 x i32> %a, splat (i32 -1) store <4 x i32> %d, ptr %x ret void } @@ -4211,9 +4189,7 @@ define void @sub_vi_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 -1, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = sub <2 x i64> %a, %c + %d = sub <2 x i64> %a, splat (i64 -1) store <2 x i64> %d, ptr %x ret void } @@ -4227,9 +4203,7 @@ define void @sub_iv_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 1, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = sub <16 x i8> %c, %a + %d = sub <16 x i8> splat (i8 1), %a store <16 x i8> %d, ptr %x ret void } @@ -4243,9 +4217,7 @@ define void @sub_iv_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 1, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = sub <8 x i16> %c, %a + %d = sub <8 x i16> splat (i16 1), %a store <8 x i16> %d, ptr %x ret void } @@ -4259,9 +4231,7 @@ define void @sub_iv_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 1, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = sub <4 x i32> %c, %a + %d = sub <4 x i32> splat (i32 1), %a store <4 x i32> %d, ptr %x ret void } @@ -4275,9 +4245,7 @@ define void @sub_iv_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 1, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = sub <2 x i64> %c, %a + %d = sub <2 x i64> splat (i64 1), %a store <2 x i64> %d, ptr %x ret void } @@ -4483,9 +4451,7 @@ define void @and_vi_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 -2, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = and <16 x i8> %a, %c + %d = and <16 x i8> %a, splat (i8 -2) store <16 x i8> %d, ptr %x ret void } @@ -4499,9 +4465,7 @@ define void @and_vi_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 -2, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = and <8 x i16> %a, %c + %d = and <8 x i16> %a, splat (i16 -2) store <8 x i16> %d, ptr %x ret void } @@ -4515,9 +4479,7 @@ define void @and_vi_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 -2, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = and <4 x i32> %a, %c + %d = and <4 x i32> %a, splat (i32 -2) store <4 x i32> %d, ptr %x ret void } @@ -4531,9 +4493,7 @@ define void @and_vi_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 -2, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = and <2 x i64> %a, %c + %d = and <2 x i64> %a, splat (i64 -2) store <2 x i64> %d, ptr %x ret void } @@ -4547,9 +4507,7 @@ define void @and_iv_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 1, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = and <16 x i8> %c, %a + %d = and <16 x i8> splat (i8 1), %a store <16 x i8> %d, ptr %x ret void } @@ -4563,9 +4521,7 @@ define void @and_iv_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 1, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = and <8 x i16> %c, %a + %d = and <8 x i16> splat (i16 1), %a store <8 x i16> %d, ptr %x ret void } @@ -4579,9 +4535,7 @@ define void @and_iv_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 1, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = and <4 x i32> %c, %a + %d = and <4 x i32> splat (i32 1), %a store <4 x i32> %d, ptr %x ret void } @@ -4595,9 +4549,7 @@ define void @and_iv_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 1, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = and <2 x i64> %c, %a + %d = and <2 x i64> splat (i64 1), %a store <2 x i64> %d, ptr %x ret void } @@ -4707,9 +4659,7 @@ define void @or_vi_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 -2, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = or <16 x i8> %a, %c + %d = or <16 x i8> %a, splat (i8 -2) store <16 x i8> %d, ptr %x ret void } @@ -4723,9 +4673,7 @@ define void @or_vi_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 -2, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = or <8 x i16> %a, %c + %d = or <8 x i16> %a, splat (i16 -2) store <8 x i16> %d, ptr %x ret void } @@ -4739,9 +4687,7 @@ define void @or_vi_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 -2, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = or <4 x i32> %a, %c + %d = or <4 x i32> %a, splat (i32 -2) store <4 x i32> %d, ptr %x ret void } @@ -4755,9 +4701,7 @@ define void @or_vi_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 -2, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = or <2 x i64> %a, %c + %d = or <2 x i64> %a, splat (i64 -2) store <2 x i64> %d, ptr %x ret void } @@ -4771,9 +4715,7 @@ define void @or_iv_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 1, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = or <16 x i8> %c, %a + %d = or <16 x i8> splat (i8 1), %a store <16 x i8> %d, ptr %x ret void } @@ -4787,9 +4729,7 @@ define void @or_iv_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 1, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = or <8 x i16> %c, %a + %d = or <8 x i16> splat (i16 1), %a store <8 x i16> %d, ptr %x ret void } @@ -4803,9 +4743,7 @@ define void @or_iv_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 1, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = or <4 x i32> %c, %a + %d = or <4 x i32> splat (i32 1), %a store <4 x i32> %d, ptr %x ret void } @@ -4819,9 +4757,7 @@ define void @or_iv_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 1, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = or <2 x i64> %c, %a + %d = or <2 x i64> splat (i64 1), %a store <2 x i64> %d, ptr %x ret void } @@ -4931,9 +4867,7 @@ define void @xor_vi_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 -1, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = xor <16 x i8> %a, %c + %d = xor <16 x i8> %a, splat (i8 -1) store <16 x i8> %d, ptr %x ret void } @@ -4947,9 +4881,7 @@ define void @xor_vi_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 -1, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = xor <8 x i16> %a, %c + %d = xor <8 x i16> %a, splat (i16 -1) store <8 x i16> %d, ptr %x ret void } @@ -4963,9 +4895,7 @@ define void @xor_vi_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 -1, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = xor <4 x i32> %a, %c + %d = xor <4 x i32> %a, splat (i32 -1) store <4 x i32> %d, ptr %x ret void } @@ -4979,9 +4909,7 @@ define void @xor_vi_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 -1, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = xor <2 x i64> %a, %c + %d = xor <2 x i64> %a, splat (i64 -1) store <2 x i64> %d, ptr %x ret void } @@ -4995,9 +4923,7 @@ define void @xor_iv_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 1, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = xor <16 x i8> %c, %a + %d = xor <16 x i8> splat (i8 1), %a store <16 x i8> %d, ptr %x ret void } @@ -5011,9 +4937,7 @@ define void @xor_iv_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 1, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = xor <8 x i16> %c, %a + %d = xor <8 x i16> splat (i16 1), %a store <8 x i16> %d, ptr %x ret void } @@ -5027,9 +4951,7 @@ define void @xor_iv_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 1, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = xor <4 x i32> %c, %a + %d = xor <4 x i32> splat (i32 1), %a store <4 x i32> %d, ptr %x ret void } @@ -5043,9 +4965,7 @@ define void @xor_iv_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 1, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = xor <2 x i64> %c, %a + %d = xor <2 x i64> splat (i64 1), %a store <2 x i64> %d, ptr %x ret void } @@ -5155,9 +5075,7 @@ define void @lshr_vi_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 7, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = lshr <16 x i8> %a, %c + %d = lshr <16 x i8> %a, splat (i8 7) store <16 x i8> %d, ptr %x ret void } @@ -5171,9 +5089,7 @@ define void @lshr_vi_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 15, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = lshr <8 x i16> %a, %c + %d = lshr <8 x i16> %a, splat (i16 15) store <8 x i16> %d, ptr %x ret void } @@ -5187,9 +5103,7 @@ define void @lshr_vi_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 31, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = lshr <4 x i32> %a, %c + %d = lshr <4 x i32> %a, splat (i32 31) store <4 x i32> %d, ptr %x ret void } @@ -5203,9 +5117,7 @@ define void @lshr_vi_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 31, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = lshr <2 x i64> %a, %c + %d = lshr <2 x i64> %a, splat (i64 31) store <2 x i64> %d, ptr %x ret void } @@ -5267,9 +5179,7 @@ define void @ashr_vi_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 7, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = ashr <16 x i8> %a, %c + %d = ashr <16 x i8> %a, splat (i8 7) store <16 x i8> %d, ptr %x ret void } @@ -5283,9 +5193,7 @@ define void @ashr_vi_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 15, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = ashr <8 x i16> %a, %c + %d = ashr <8 x i16> %a, splat (i16 15) store <8 x i16> %d, ptr %x ret void } @@ -5299,9 +5207,7 @@ define void @ashr_vi_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 31, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = ashr <4 x i32> %a, %c + %d = ashr <4 x i32> %a, splat (i32 31) store <4 x i32> %d, ptr %x ret void } @@ -5315,9 +5221,7 @@ define void @ashr_vi_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 31, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = ashr <2 x i64> %a, %c + %d = ashr <2 x i64> %a, splat (i64 31) store <2 x i64> %d, ptr %x ret void } @@ -5379,9 +5283,7 @@ define void @shl_vi_v16i8(ptr %x) { ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x - %b = insertelement <16 x i8> poison, i8 7, i32 0 - %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer - %d = shl <16 x i8> %a, %c + %d = shl <16 x i8> %a, splat (i8 7) store <16 x i8> %d, ptr %x ret void } @@ -5395,9 +5297,7 @@ define void @shl_vi_v8i16(ptr %x) { ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x - %b = insertelement <8 x i16> poison, i16 15, i32 0 - %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer - %d = shl <8 x i16> %a, %c + %d = shl <8 x i16> %a, splat (i16 15) store <8 x i16> %d, ptr %x ret void } @@ -5411,9 +5311,7 @@ define void @shl_vi_v4i32(ptr %x) { ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x - %b = insertelement <4 x i32> poison, i32 31, i32 0 - %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer - %d = shl <4 x i32> %a, %c + %d = shl <4 x i32> %a, splat (i32 31) store <4 x i32> %d, ptr %x ret void } @@ -5427,9 +5325,7 @@ define void @shl_vi_v2i64(ptr %x) { ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x - %b = insertelement <2 x i64> poison, i64 31, i32 0 - %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer - %d = shl <2 x i64> %a, %c + %d = shl <2 x i64> %a, splat (i64 31) store <2 x i64> %d, ptr %x ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll index 08dd1c79f24c9..1c920e42f7d41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint-vp.ll @@ -126,7 +126,25 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) { } declare <8 x iXLen> @llvm.vp.lrint.v8iXLen.v8f32(<8 x float>, <8 x i1>, i32) -define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) { +define <16 x iXLen> @lrint_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_v16f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v16f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v16f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v16, v8, v0.t +; RV64-i64-NEXT: vmv8r.v v8, v16 +; RV64-i64-NEXT: ret %a = call <16 x iXLen> @llvm.vp.lrint.v16iXLen.v16f32(<16 x float> %x, <16 x i1> %m, i32 %evl) ret <16 x iXLen> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index 224f5066138cd..35baa6808db60 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -380,7 +380,246 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) -define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) { +define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { +; RV32-LABEL: lrint_v16f32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -192 +; RV32-NEXT: .cfi_def_cfa_offset 192 +; RV32-NEXT: sw ra, 188(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 184(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: .cfi_offset s0, -8 +; RV32-NEXT: addi s0, sp, 192 +; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: andi sp, sp, -64 +; RV32-NEXT: mv a0, sp +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vse32.v v8, (a0) +; RV32-NEXT: flw fa5, 60(sp) +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 124(sp) +; RV32-NEXT: flw fa5, 56(sp) +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 120(sp) +; RV32-NEXT: flw fa5, 52(sp) +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 116(sp) +; RV32-NEXT: flw fa5, 48(sp) +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 112(sp) +; RV32-NEXT: flw fa5, 44(sp) +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 108(sp) +; RV32-NEXT: flw fa5, 40(sp) +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 104(sp) +; RV32-NEXT: flw fa5, 36(sp) +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 100(sp) +; RV32-NEXT: flw fa5, 32(sp) +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 96(sp) +; RV32-NEXT: vfmv.f.s fa5, v8 +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 64(sp) +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 3 +; RV32-NEXT: vfmv.f.s fa5, v10 +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 76(sp) +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vfmv.f.s fa5, v10 +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 72(sp) +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vfmv.f.s fa5, v10 +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 68(sp) +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 7 +; RV32-NEXT: vfmv.f.s fa5, v10 +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 92(sp) +; RV32-NEXT: vslidedown.vi v10, v8, 6 +; RV32-NEXT: vfmv.f.s fa5, v10 +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 88(sp) +; RV32-NEXT: vslidedown.vi v10, v8, 5 +; RV32-NEXT: vfmv.f.s fa5, v10 +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 84(sp) +; RV32-NEXT: vslidedown.vi v8, v8, 4 +; RV32-NEXT: vfmv.f.s fa5, v8 +; RV32-NEXT: fcvt.w.s a0, fa5 +; RV32-NEXT: sw a0, 80(sp) +; RV32-NEXT: addi a0, sp, 64 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: addi sp, s0, -192 +; RV32-NEXT: lw ra, 188(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 184(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 192 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_v16f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: addi sp, sp, -192 +; RV64-i32-NEXT: .cfi_def_cfa_offset 192 +; RV64-i32-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; RV64-i32-NEXT: sd s0, 176(sp) # 8-byte Folded Spill +; RV64-i32-NEXT: .cfi_offset ra, -8 +; RV64-i32-NEXT: .cfi_offset s0, -16 +; RV64-i32-NEXT: addi s0, sp, 192 +; RV64-i32-NEXT: .cfi_def_cfa s0, 0 +; RV64-i32-NEXT: andi sp, sp, -64 +; RV64-i32-NEXT: mv a0, sp +; RV64-i32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-i32-NEXT: vse32.v v8, (a0) +; RV64-i32-NEXT: flw fa5, 60(sp) +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 124(sp) +; RV64-i32-NEXT: flw fa5, 56(sp) +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 120(sp) +; RV64-i32-NEXT: flw fa5, 52(sp) +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 116(sp) +; RV64-i32-NEXT: flw fa5, 48(sp) +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 112(sp) +; RV64-i32-NEXT: flw fa5, 44(sp) +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 108(sp) +; RV64-i32-NEXT: flw fa5, 40(sp) +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 104(sp) +; RV64-i32-NEXT: flw fa5, 36(sp) +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 100(sp) +; RV64-i32-NEXT: flw fa5, 32(sp) +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 96(sp) +; RV64-i32-NEXT: vfmv.f.s fa5, v8 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 64(sp) +; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-i32-NEXT: vslidedown.vi v10, v8, 3 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 76(sp) +; RV64-i32-NEXT: vslidedown.vi v10, v8, 2 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 72(sp) +; RV64-i32-NEXT: vslidedown.vi v10, v8, 1 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 68(sp) +; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-i32-NEXT: vslidedown.vi v10, v8, 7 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 92(sp) +; RV64-i32-NEXT: vslidedown.vi v10, v8, 6 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 88(sp) +; RV64-i32-NEXT: vslidedown.vi v10, v8, 5 +; RV64-i32-NEXT: vfmv.f.s fa5, v10 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 84(sp) +; RV64-i32-NEXT: vslidedown.vi v8, v8, 4 +; RV64-i32-NEXT: vfmv.f.s fa5, v8 +; RV64-i32-NEXT: fcvt.l.s a0, fa5 +; RV64-i32-NEXT: sw a0, 80(sp) +; RV64-i32-NEXT: addi a0, sp, 64 +; RV64-i32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-i32-NEXT: vle32.v v8, (a0) +; RV64-i32-NEXT: addi sp, s0, -192 +; RV64-i32-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; RV64-i32-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; RV64-i32-NEXT: addi sp, sp, 192 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_v16f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: addi sp, sp, -384 +; RV64-i64-NEXT: .cfi_def_cfa_offset 384 +; RV64-i64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; RV64-i64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill +; RV64-i64-NEXT: .cfi_offset ra, -8 +; RV64-i64-NEXT: .cfi_offset s0, -16 +; RV64-i64-NEXT: addi s0, sp, 384 +; RV64-i64-NEXT: .cfi_def_cfa s0, 0 +; RV64-i64-NEXT: andi sp, sp, -128 +; RV64-i64-NEXT: addi a0, sp, 64 +; RV64-i64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-i64-NEXT: vse32.v v8, (a0) +; RV64-i64-NEXT: flw fa5, 124(sp) +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 248(sp) +; RV64-i64-NEXT: flw fa5, 120(sp) +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 240(sp) +; RV64-i64-NEXT: flw fa5, 116(sp) +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 232(sp) +; RV64-i64-NEXT: flw fa5, 112(sp) +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 224(sp) +; RV64-i64-NEXT: flw fa5, 108(sp) +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 216(sp) +; RV64-i64-NEXT: flw fa5, 104(sp) +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 208(sp) +; RV64-i64-NEXT: flw fa5, 100(sp) +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 200(sp) +; RV64-i64-NEXT: flw fa5, 96(sp) +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 192(sp) +; RV64-i64-NEXT: vfmv.f.s fa5, v8 +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 128(sp) +; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-i64-NEXT: vslidedown.vi v10, v8, 3 +; RV64-i64-NEXT: vfmv.f.s fa5, v10 +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 152(sp) +; RV64-i64-NEXT: vslidedown.vi v10, v8, 2 +; RV64-i64-NEXT: vfmv.f.s fa5, v10 +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 144(sp) +; RV64-i64-NEXT: vslidedown.vi v10, v8, 1 +; RV64-i64-NEXT: vfmv.f.s fa5, v10 +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 136(sp) +; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-i64-NEXT: vslidedown.vi v10, v8, 7 +; RV64-i64-NEXT: vfmv.f.s fa5, v10 +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 184(sp) +; RV64-i64-NEXT: vslidedown.vi v10, v8, 6 +; RV64-i64-NEXT: vfmv.f.s fa5, v10 +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 176(sp) +; RV64-i64-NEXT: vslidedown.vi v10, v8, 5 +; RV64-i64-NEXT: vfmv.f.s fa5, v10 +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 168(sp) +; RV64-i64-NEXT: vslidedown.vi v8, v8, 4 +; RV64-i64-NEXT: vfmv.f.s fa5, v8 +; RV64-i64-NEXT: fcvt.l.s a0, fa5 +; RV64-i64-NEXT: sd a0, 160(sp) +; RV64-i64-NEXT: addi a0, sp, 128 +; RV64-i64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-i64-NEXT: vle64.v v8, (a0) +; RV64-i64-NEXT: addi sp, s0, -384 +; RV64-i64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; RV64-i64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload +; RV64-i64-NEXT: addi sp, sp, 384 +; RV64-i64-NEXT: ret %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) ret <16 x iXLen> %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index a09ab3ee0252a..9fbc22221f99b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -518,55 +518,20 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i8: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB9_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB9_6 -; RV64ZVE32F-NEXT: .LBB9_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB9_7 -; RV64ZVE32F-NEXT: .LBB9_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB9_8 -; RV64ZVE32F-NEXT: .LBB9_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB9_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB9_2 -; RV64ZVE32F-NEXT: .LBB9_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB9_3 -; RV64ZVE32F-NEXT: .LBB9_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a1, 0(a1) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB9_4 -; RV64ZVE32F-NEXT: .LBB9_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lbu a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse8.v v8, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %mtrue, <4 x i8> %passthru) + %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1), <4 x i8> %passthru) ret <4 x i8> %v } @@ -1242,55 +1207,20 @@ define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) { ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB20_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB20_6 -; RV64ZVE32F-NEXT: .LBB20_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB20_7 -; RV64ZVE32F-NEXT: .LBB20_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB20_8 -; RV64ZVE32F-NEXT: .LBB20_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB20_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB20_2 -; RV64ZVE32F-NEXT: .LBB20_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB20_3 -; RV64ZVE32F-NEXT: .LBB20_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: lh a1, 0(a1) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB20_4 -; RV64ZVE32F-NEXT: .LBB20_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lh a0, 0(a0) +; RV64ZVE32F-NEXT: lh a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue, <4 x i16> %passthru) + %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x i16> %passthru) ret <4 x i16> %v } @@ -2326,55 +2256,20 @@ define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) { ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB32_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB32_6 -; RV64ZVE32F-NEXT: .LBB32_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB32_7 -; RV64ZVE32F-NEXT: .LBB32_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB32_8 -; RV64ZVE32F-NEXT: .LBB32_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB32_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB32_2 -; RV64ZVE32F-NEXT: .LBB32_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB32_3 -; RV64ZVE32F-NEXT: .LBB32_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: lw a1, 0(a1) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB32_4 -; RV64ZVE32F-NEXT: .LBB32_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lw a0, 0(a0) +; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue, <4 x i32> %passthru) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x i32> %passthru) ret <4 x i32> %v } @@ -3839,117 +3734,48 @@ define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) { ; ; RV32ZVE32F-LABEL: mgather_truemask_v4i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmset.m v9 -; RV32ZVE32F-NEXT: vmv.x.s a6, v9 -; RV32ZVE32F-NEXT: bnez zero, .LBB45_5 -; RV32ZVE32F-NEXT: # %bb.1: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 -; RV32ZVE32F-NEXT: lw a2, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) -; RV32ZVE32F-NEXT: andi a4, a6, 2 -; RV32ZVE32F-NEXT: bnez a4, .LBB45_6 -; RV32ZVE32F-NEXT: .LBB45_2: -; RV32ZVE32F-NEXT: lw a4, 12(a1) -; RV32ZVE32F-NEXT: lw a5, 8(a1) -; RV32ZVE32F-NEXT: andi a7, a6, 4 -; RV32ZVE32F-NEXT: bnez a7, .LBB45_7 -; RV32ZVE32F-NEXT: .LBB45_3: -; RV32ZVE32F-NEXT: lw a7, 20(a1) -; RV32ZVE32F-NEXT: lw t0, 16(a1) -; RV32ZVE32F-NEXT: andi a6, a6, 8 -; RV32ZVE32F-NEXT: bnez a6, .LBB45_8 -; RV32ZVE32F-NEXT: .LBB45_4: -; RV32ZVE32F-NEXT: lw a6, 28(a1) -; RV32ZVE32F-NEXT: lw a1, 24(a1) -; RV32ZVE32F-NEXT: j .LBB45_9 -; RV32ZVE32F-NEXT: .LBB45_5: -; RV32ZVE32F-NEXT: lw a2, 4(a1) -; RV32ZVE32F-NEXT: lw a3, 0(a1) -; RV32ZVE32F-NEXT: andi a4, a6, 2 -; RV32ZVE32F-NEXT: beqz a4, .LBB45_2 -; RV32ZVE32F-NEXT: .LBB45_6: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: lw a2, 0(a1) +; RV32ZVE32F-NEXT: lw a1, 4(a1) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a5, v9 -; RV32ZVE32F-NEXT: lw a4, 4(a5) -; RV32ZVE32F-NEXT: lw a5, 0(a5) -; RV32ZVE32F-NEXT: andi a7, a6, 4 -; RV32ZVE32F-NEXT: beqz a7, .LBB45_3 -; RV32ZVE32F-NEXT: .LBB45_7: # %cond.load4 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a3, v9 +; RV32ZVE32F-NEXT: lw a4, 0(a3) +; RV32ZVE32F-NEXT: lw a3, 4(a3) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s t0, v9 -; RV32ZVE32F-NEXT: lw a7, 4(t0) -; RV32ZVE32F-NEXT: lw t0, 0(t0) -; RV32ZVE32F-NEXT: andi a6, a6, 8 -; RV32ZVE32F-NEXT: beqz a6, .LBB45_4 -; RV32ZVE32F-NEXT: .LBB45_8: # %cond.load7 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a5, v9 +; RV32ZVE32F-NEXT: lw a6, 0(a5) +; RV32ZVE32F-NEXT: lw a5, 4(a5) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: lw a6, 4(a1) -; RV32ZVE32F-NEXT: lw a1, 0(a1) -; RV32ZVE32F-NEXT: .LBB45_9: # %else8 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw a2, 4(a0) -; RV32ZVE32F-NEXT: sw a5, 8(a0) -; RV32ZVE32F-NEXT: sw a4, 12(a0) -; RV32ZVE32F-NEXT: sw t0, 16(a0) -; RV32ZVE32F-NEXT: sw a7, 20(a0) -; RV32ZVE32F-NEXT: sw a1, 24(a0) -; RV32ZVE32F-NEXT: sw a6, 28(a0) +; RV32ZVE32F-NEXT: vmv.x.s a7, v8 +; RV32ZVE32F-NEXT: lw t0, 4(a7) +; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: sw a1, 4(a0) +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw t0, 28(a0) +; RV32ZVE32F-NEXT: sw a7, 24(a0) +; RV32ZVE32F-NEXT: sw a5, 20(a0) +; RV32ZVE32F-NEXT: sw a6, 16(a0) +; RV32ZVE32F-NEXT: sw a3, 12(a0) +; RV32ZVE32F-NEXT: sw a4, 8(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_truemask_v4i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a5, v8 -; RV64ZVE32F-NEXT: bnez zero, .LBB45_5 -; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: ld a3, 0(a1) -; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a5, 2 -; RV64ZVE32F-NEXT: bnez a4, .LBB45_6 -; RV64ZVE32F-NEXT: .LBB45_2: -; RV64ZVE32F-NEXT: ld a4, 8(a2) -; RV64ZVE32F-NEXT: andi a6, a5, 4 -; RV64ZVE32F-NEXT: bnez a6, .LBB45_7 -; RV64ZVE32F-NEXT: .LBB45_3: -; RV64ZVE32F-NEXT: ld a6, 16(a2) -; RV64ZVE32F-NEXT: andi a5, a5, 8 -; RV64ZVE32F-NEXT: bnez a5, .LBB45_8 -; RV64ZVE32F-NEXT: .LBB45_4: -; RV64ZVE32F-NEXT: ld a1, 24(a2) -; RV64ZVE32F-NEXT: j .LBB45_9 -; RV64ZVE32F-NEXT: .LBB45_5: -; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a5, 2 -; RV64ZVE32F-NEXT: beqz a4, .LBB45_2 -; RV64ZVE32F-NEXT: .LBB45_6: # %cond.load1 +; RV64ZVE32F-NEXT: ld a2, 24(a1) +; RV64ZVE32F-NEXT: ld a3, 16(a1) ; RV64ZVE32F-NEXT: ld a4, 8(a1) +; RV64ZVE32F-NEXT: ld a1, 0(a1) +; RV64ZVE32F-NEXT: ld a2, 0(a2) +; RV64ZVE32F-NEXT: ld a3, 0(a3) ; RV64ZVE32F-NEXT: ld a4, 0(a4) -; RV64ZVE32F-NEXT: andi a6, a5, 4 -; RV64ZVE32F-NEXT: beqz a6, .LBB45_3 -; RV64ZVE32F-NEXT: .LBB45_7: # %cond.load4 -; RV64ZVE32F-NEXT: ld a6, 16(a1) -; RV64ZVE32F-NEXT: ld a6, 0(a6) -; RV64ZVE32F-NEXT: andi a5, a5, 8 -; RV64ZVE32F-NEXT: beqz a5, .LBB45_4 -; RV64ZVE32F-NEXT: .LBB45_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a1, 24(a1) ; RV64ZVE32F-NEXT: ld a1, 0(a1) -; RV64ZVE32F-NEXT: .LBB45_9: # %else8 -; RV64ZVE32F-NEXT: sd a3, 0(a0) +; RV64ZVE32F-NEXT: sd a2, 24(a0) +; RV64ZVE32F-NEXT: sd a3, 16(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a6, 16(a0) -; RV64ZVE32F-NEXT: sd a1, 24(a0) +; RV64ZVE32F-NEXT: sd a1, 0(a0) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue, <4 x i64> %passthru) + %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x i64> %passthru) ret <4 x i64> %v } @@ -7190,55 +7016,20 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) ; ; RV64ZVE32F-LABEL: mgather_truemask_v4f16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB61_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB61_6 -; RV64ZVE32F-NEXT: .LBB61_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB61_7 -; RV64ZVE32F-NEXT: .LBB61_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB61_8 -; RV64ZVE32F-NEXT: .LBB61_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB61_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB61_2 -; RV64ZVE32F-NEXT: .LBB61_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB61_3 -; RV64ZVE32F-NEXT: .LBB61_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB61_4 -; RV64ZVE32F-NEXT: .LBB61_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: flh fa5, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: flh fa5, 0(a1) +; RV64ZVE32F-NEXT: flh fa4, 0(a2) +; RV64ZVE32F-NEXT: flh fa3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue, <4 x half> %passthru) + %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru) ret <4 x half> %v } @@ -8148,55 +7939,20 @@ define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthr ; ; RV64ZVE32F-LABEL: mgather_truemask_v4f32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB71_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_6 -; RV64ZVE32F-NEXT: .LBB71_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB71_7 -; RV64ZVE32F-NEXT: .LBB71_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB71_8 -; RV64ZVE32F-NEXT: .LBB71_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB71_5: # %cond.load -; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_2 -; RV64ZVE32F-NEXT: .LBB71_6: # %cond.load1 -; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB71_3 -; RV64ZVE32F-NEXT: .LBB71_7: # %cond.load4 +; RV64ZVE32F-NEXT: ld a1, 8(a0) ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB71_4 -; RV64ZVE32F-NEXT: .LBB71_8: # %cond.load7 -; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: flw fa5, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 24(a0) +; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: flw fa5, 0(a1) +; RV64ZVE32F-NEXT: flw fa4, 0(a2) +; RV64ZVE32F-NEXT: flw fa3, 0(a3) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa5 +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa4 +; RV64ZVE32F-NEXT: vfslide1down.vf v8, v8, fa3 ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue, <4 x float> %passthru) + %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1), <4 x float> %passthru) ret <4 x float> %v } @@ -9627,95 +9383,40 @@ define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passt ; ; RV32ZVE32F-LABEL: mgather_truemask_v4f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmset.m v9 -; RV32ZVE32F-NEXT: vmv.x.s a1, v9 -; RV32ZVE32F-NEXT: beqz zero, .LBB84_6 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB84_7 -; RV32ZVE32F-NEXT: .LBB84_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB84_8 -; RV32ZVE32F-NEXT: .LBB84_3: # %else5 -; RV32ZVE32F-NEXT: andi a1, a1, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB84_5 -; RV32ZVE32F-NEXT: .LBB84_4: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: fld fa5, 0(a1) +; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; RV32ZVE32F-NEXT: vmv.x.s a1, v9 +; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 +; RV32ZVE32F-NEXT: fld fa4, 0(a1) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fld fa3, 0(a1) -; RV32ZVE32F-NEXT: .LBB84_5: # %else8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a0) -; RV32ZVE32F-NEXT: fsd fa1, 8(a0) -; RV32ZVE32F-NEXT: fsd fa2, 16(a0) +; RV32ZVE32F-NEXT: vmv.x.s a1, v9 +; RV32ZVE32F-NEXT: fld fa2, 0(a1) +; RV32ZVE32F-NEXT: fsd fa5, 0(a0) ; RV32ZVE32F-NEXT: fsd fa3, 24(a0) +; RV32ZVE32F-NEXT: fsd fa4, 16(a0) +; RV32ZVE32F-NEXT: fsd fa2, 8(a0) ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB84_6: # %cond.load -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: fld fa0, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB84_2 -; RV32ZVE32F-NEXT: .LBB84_7: # %cond.load1 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v9 -; RV32ZVE32F-NEXT: fld fa1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB84_3 -; RV32ZVE32F-NEXT: .LBB84_8: # %cond.load4 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v9 -; RV32ZVE32F-NEXT: fld fa2, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB84_4 -; RV32ZVE32F-NEXT: j .LBB84_5 ; ; RV64ZVE32F-LABEL: mgather_truemask_v4f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB84_6 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB84_7 -; RV64ZVE32F-NEXT: .LBB84_2: # %else2 -; RV64ZVE32F-NEXT: andi a3, a2, 4 -; RV64ZVE32F-NEXT: bnez a3, .LBB84_8 -; RV64ZVE32F-NEXT: .LBB84_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a2, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB84_5 -; RV64ZVE32F-NEXT: .LBB84_4: # %cond.load7 -; RV64ZVE32F-NEXT: ld a1, 24(a1) -; RV64ZVE32F-NEXT: fld fa3, 0(a1) -; RV64ZVE32F-NEXT: .LBB84_5: # %else8 -; RV64ZVE32F-NEXT: fsd fa0, 0(a0) -; RV64ZVE32F-NEXT: fsd fa1, 8(a0) -; RV64ZVE32F-NEXT: fsd fa2, 16(a0) -; RV64ZVE32F-NEXT: fsd fa3, 24(a0) -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB84_6: # %cond.load -; RV64ZVE32F-NEXT: ld a3, 0(a1) -; RV64ZVE32F-NEXT: fld fa0, 0(a3) -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB84_2 -; RV64ZVE32F-NEXT: .LBB84_7: # %cond.load1 -; RV64ZVE32F-NEXT: ld a3, 8(a1) -; RV64ZVE32F-NEXT: fld fa1, 0(a3) -; RV64ZVE32F-NEXT: andi a3, a2, 4 -; RV64ZVE32F-NEXT: beqz a3, .LBB84_3 -; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load4 +; RV64ZVE32F-NEXT: ld a2, 24(a1) ; RV64ZVE32F-NEXT: ld a3, 16(a1) -; RV64ZVE32F-NEXT: fld fa2, 0(a3) -; RV64ZVE32F-NEXT: andi a2, a2, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB84_4 -; RV64ZVE32F-NEXT: j .LBB84_5 - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue, <4 x double> %passthru) +; RV64ZVE32F-NEXT: ld a4, 8(a1) +; RV64ZVE32F-NEXT: ld a1, 0(a1) +; RV64ZVE32F-NEXT: fld fa5, 0(a2) +; RV64ZVE32F-NEXT: fld fa4, 0(a3) +; RV64ZVE32F-NEXT: fld fa3, 0(a4) +; RV64ZVE32F-NEXT: fld fa2, 0(a1) +; RV64ZVE32F-NEXT: fsd fa5, 24(a0) +; RV64ZVE32F-NEXT: fsd fa4, 16(a0) +; RV64ZVE32F-NEXT: fsd fa3, 8(a0) +; RV64ZVE32F-NEXT: fsd fa2, 0(a0) +; RV64ZVE32F-NEXT: ret + %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1), <4 x double> %passthru) ret <4 x double> %v } @@ -12850,10 +12551,8 @@ define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), zero ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12864,11 +12563,9 @@ define <4 x i32> @mgather_broadcast_load_unmasked2(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), zero ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrhead = insertelement <4 x ptr> poison, ptr %base, i32 0 %ptrs = shufflevector <4 x ptr> %ptrhead, <4 x ptr> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12878,8 +12575,6 @@ define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), zero, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> poison) ret <4 x i32> %v @@ -12891,10 +12586,8 @@ define <4 x i32> @mgather_unit_stride_load(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12905,10 +12598,8 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12918,10 +12609,8 @@ define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12931,10 +12620,8 @@ define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i128> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -12965,51 +12652,15 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_narrow_edge_case: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB106_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB106_6 -; RV64ZVE32F-NEXT: .LBB106_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB106_7 -; RV64ZVE32F-NEXT: .LBB106_3: # %else5 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: bnez a1, .LBB106_8 -; RV64ZVE32F-NEXT: .LBB106_4: # %else8 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB106_5: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB106_2 -; RV64ZVE32F-NEXT: .LBB106_6: # %cond.load1 -; RV64ZVE32F-NEXT: lw a2, -512(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB106_3 -; RV64ZVE32F-NEXT: .LBB106_7: # %cond.load4 -; RV64ZVE32F-NEXT: lw a2, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a1, a1, 8 -; RV64ZVE32F-NEXT: beqz a1, .LBB106_4 -; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load7 -; RV64ZVE32F-NEXT: lw a0, -512(a0) +; RV64ZVE32F-NEXT: addi a1, a0, -512 +; RV64ZVE32F-NEXT: lw a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 +; RV64ZVE32F-NEXT: vlse32.v v8, (a1), zero +; RV64ZVE32F-NEXT: vmv.v.i v0, 5 +; RV64ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64ZVE32F-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> - %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison) + %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %v } @@ -13017,404 +12668,198 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-LABEL: mgather_strided_unaligned: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmset.m v8 -; RV32-NEXT: vid.v v10 -; RV32-NEXT: vsll.vi v10, v10, 2 -; RV32-NEXT: vadd.vx v10, v10, a0 -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vadd.vx v8, v8, a0 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: # implicit-def: $v8 -; RV32-NEXT: beqz zero, .LBB107_9 -; RV32-NEXT: # %bb.1: # %else -; RV32-NEXT: andi a1, a0, 2 -; RV32-NEXT: bnez a1, .LBB107_10 -; RV32-NEXT: .LBB107_2: # %else2 -; RV32-NEXT: andi a1, a0, 4 -; RV32-NEXT: bnez a1, .LBB107_11 -; RV32-NEXT: .LBB107_3: # %else5 -; RV32-NEXT: andi a1, a0, 8 -; RV32-NEXT: bnez a1, .LBB107_12 -; RV32-NEXT: .LBB107_4: # %else8 -; RV32-NEXT: andi a1, a0, 16 -; RV32-NEXT: bnez a1, .LBB107_13 -; RV32-NEXT: .LBB107_5: # %else11 -; RV32-NEXT: andi a1, a0, 32 -; RV32-NEXT: bnez a1, .LBB107_14 -; RV32-NEXT: .LBB107_6: # %else14 -; RV32-NEXT: andi a1, a0, 64 -; RV32-NEXT: bnez a1, .LBB107_15 -; RV32-NEXT: .LBB107_7: # %else17 -; RV32-NEXT: andi a0, a0, -128 -; RV32-NEXT: bnez a0, .LBB107_16 -; RV32-NEXT: .LBB107_8: # %else20 -; RV32-NEXT: ret -; RV32-NEXT: .LBB107_9: # %cond.load -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.x.s a1, v10 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 -; RV32-NEXT: andi a1, a0, 2 -; RV32-NEXT: beqz a1, .LBB107_2 -; RV32-NEXT: .LBB107_10: # %cond.load1 +; RV32-NEXT: lbu a1, 0(a0) +; RV32-NEXT: lbu a0, 1(a0) ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 1 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 1 -; RV32-NEXT: andi a1, a0, 4 -; RV32-NEXT: beqz a1, .LBB107_3 -; RV32-NEXT: .LBB107_11: # %cond.load4 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 2 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 2 -; RV32-NEXT: andi a1, a0, 8 -; RV32-NEXT: beqz a1, .LBB107_4 -; RV32-NEXT: .LBB107_12: # %cond.load7 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v9, v10, 3 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 3 -; RV32-NEXT: andi a1, a0, 16 -; RV32-NEXT: beqz a1, .LBB107_5 -; RV32-NEXT: .LBB107_13: # %cond.load10 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 4 -; RV32-NEXT: vmv.x.s a1, v12 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 4 -; RV32-NEXT: andi a1, a0, 32 -; RV32-NEXT: beqz a1, .LBB107_6 -; RV32-NEXT: .LBB107_14: # %cond.load13 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 5 -; RV32-NEXT: vmv.x.s a1, v12 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 5 -; RV32-NEXT: andi a1, a0, 64 -; RV32-NEXT: beqz a1, .LBB107_7 -; RV32-NEXT: .LBB107_15: # %cond.load16 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v12, v10, 6 -; RV32-NEXT: vmv.x.s a1, v12 -; RV32-NEXT: lbu a2, 1(a1) -; RV32-NEXT: lbu a1, 0(a1) -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a1, a2, a1 -; RV32-NEXT: vmv.s.x v9, a1 -; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV32-NEXT: vslideup.vi v8, v9, 6 -; RV32-NEXT: andi a0, a0, -128 -; RV32-NEXT: beqz a0, .LBB107_8 -; RV32-NEXT: .LBB107_16: # %cond.load19 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v10, 7 -; RV32-NEXT: vmv.x.s a0, v10 -; RV32-NEXT: lbu a1, 1(a0) -; RV32-NEXT: lbu a0, 0(a0) +; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vmv.x.s a2, v10 +; RV32-NEXT: lbu a3, 1(a2) +; RV32-NEXT: lbu a2, 0(a2) +; RV32-NEXT: slli a0, a0, 8 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: slli a3, a3, 8 +; RV32-NEXT: or a2, a3, a2 +; RV32-NEXT: vslidedown.vi v10, v8, 2 +; RV32-NEXT: vmv.x.s a1, v10 +; RV32-NEXT: lbu a3, 0(a1) +; RV32-NEXT: lbu a1, 1(a1) +; RV32-NEXT: vslidedown.vi v10, v8, 3 +; RV32-NEXT: vmv.x.s a4, v10 +; RV32-NEXT: lbu a5, 1(a4) +; RV32-NEXT: lbu a4, 0(a4) ; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v9, 7 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: slli a5, a5, 8 +; RV32-NEXT: or a4, a5, a4 +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 4 +; RV32-NEXT: vmv.x.s a3, v10 +; RV32-NEXT: lbu a5, 0(a3) +; RV32-NEXT: lbu a3, 1(a3) +; RV32-NEXT: vslidedown.vi v10, v8, 5 +; RV32-NEXT: vmv.x.s a6, v10 +; RV32-NEXT: lbu a7, 1(a6) +; RV32-NEXT: lbu a6, 0(a6) +; RV32-NEXT: slli a3, a3, 8 +; RV32-NEXT: or a3, a3, a5 +; RV32-NEXT: slli a7, a7, 8 +; RV32-NEXT: or a5, a7, a6 +; RV32-NEXT: vslidedown.vi v10, v8, 6 +; RV32-NEXT: vmv.x.s a6, v10 +; RV32-NEXT: lbu a7, 0(a6) +; RV32-NEXT: lbu a6, 1(a6) +; RV32-NEXT: vslidedown.vi v8, v8, 7 +; RV32-NEXT: vmv.x.s t0, v8 +; RV32-NEXT: lbu t1, 1(t0) +; RV32-NEXT: lbu t0, 0(t0) +; RV32-NEXT: slli a6, a6, 8 +; RV32-NEXT: or a6, a6, a7 +; RV32-NEXT: slli t1, t1, 8 +; RV32-NEXT: or a7, t1, t0 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v9, v8, a4 +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a5 +; RV32-NEXT: vslide1down.vx v8, v8, a6 +; RV32-NEXT: vmv.v.i v0, 15 +; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: ret ; ; RV64V-LABEL: mgather_strided_unaligned: ; RV64V: # %bb.0: -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vmset.m v8 -; RV64V-NEXT: vid.v v12 -; RV64V-NEXT: vsll.vi v12, v12, 2 -; RV64V-NEXT: vadd.vx v12, v12, a0 -; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64V-NEXT: vmv.x.s a0, v8 -; RV64V-NEXT: # implicit-def: $v8 -; RV64V-NEXT: beqz zero, .LBB107_11 -; RV64V-NEXT: # %bb.1: # %else -; RV64V-NEXT: andi a1, a0, 2 -; RV64V-NEXT: bnez a1, .LBB107_12 -; RV64V-NEXT: .LBB107_2: # %else2 -; RV64V-NEXT: andi a1, a0, 4 -; RV64V-NEXT: bnez a1, .LBB107_13 -; RV64V-NEXT: .LBB107_3: # %else5 -; RV64V-NEXT: andi a1, a0, 8 -; RV64V-NEXT: beqz a1, .LBB107_5 -; RV64V-NEXT: .LBB107_4: # %cond.load7 -; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v10, v12, 3 -; RV64V-NEXT: vmv.x.s a1, v10 -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 3 -; RV64V-NEXT: .LBB107_5: # %else8 -; RV64V-NEXT: addi sp, sp, -320 -; RV64V-NEXT: .cfi_def_cfa_offset 320 -; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill -; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill +; RV64V-NEXT: addi sp, sp, -128 +; RV64V-NEXT: .cfi_def_cfa_offset 128 +; RV64V-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RV64V-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; RV64V-NEXT: .cfi_offset ra, -8 ; RV64V-NEXT: .cfi_offset s0, -16 -; RV64V-NEXT: addi s0, sp, 320 +; RV64V-NEXT: addi s0, sp, 128 ; RV64V-NEXT: .cfi_def_cfa s0, 0 ; RV64V-NEXT: andi sp, sp, -64 -; RV64V-NEXT: andi a1, a0, 16 -; RV64V-NEXT: bnez a1, .LBB107_14 -; RV64V-NEXT: # %bb.6: # %else11 -; RV64V-NEXT: andi a1, a0, 32 -; RV64V-NEXT: bnez a1, .LBB107_15 -; RV64V-NEXT: .LBB107_7: # %else14 -; RV64V-NEXT: andi a1, a0, 64 -; RV64V-NEXT: bnez a1, .LBB107_16 -; RV64V-NEXT: .LBB107_8: # %else17 -; RV64V-NEXT: andi a0, a0, -128 -; RV64V-NEXT: beqz a0, .LBB107_10 -; RV64V-NEXT: .LBB107_9: # %cond.load19 -; RV64V-NEXT: mv a0, sp ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vse64.v v12, (a0) -; RV64V-NEXT: ld a0, 56(sp) -; RV64V-NEXT: lbu a1, 1(a0) -; RV64V-NEXT: lbu a0, 0(a0) -; RV64V-NEXT: slli a1, a1, 8 -; RV64V-NEXT: or a0, a1, a0 -; RV64V-NEXT: vmv.s.x v9, a0 -; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64V-NEXT: vslideup.vi v8, v9, 7 -; RV64V-NEXT: .LBB107_10: # %else20 -; RV64V-NEXT: addi sp, s0, -320 -; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload -; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload -; RV64V-NEXT: addi sp, sp, 320 -; RV64V-NEXT: ret -; RV64V-NEXT: .LBB107_11: # %cond.load -; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV64V-NEXT: vmv.x.s a1, v12 -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64V-NEXT: vmv.v.x v8, a1 -; RV64V-NEXT: andi a1, a0, 2 -; RV64V-NEXT: beqz a1, .LBB107_2 -; RV64V-NEXT: .LBB107_12: # %cond.load1 +; RV64V-NEXT: vid.v v8 +; RV64V-NEXT: vsll.vi v8, v8, 2 +; RV64V-NEXT: vadd.vx v8, v8, a0 +; RV64V-NEXT: vmv.x.s a0, v8 +; RV64V-NEXT: lbu a1, 0(a0) +; RV64V-NEXT: lbu a0, 1(a0) ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64V-NEXT: vslidedown.vi v9, v12, 1 -; RV64V-NEXT: vmv.x.s a1, v9 -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 1 -; RV64V-NEXT: andi a1, a0, 4 -; RV64V-NEXT: beqz a1, .LBB107_3 -; RV64V-NEXT: .LBB107_13: # %cond.load4 +; RV64V-NEXT: vslidedown.vi v12, v8, 1 +; RV64V-NEXT: vmv.x.s a2, v12 +; RV64V-NEXT: lbu a3, 1(a2) +; RV64V-NEXT: lbu a2, 0(a2) +; RV64V-NEXT: slli a0, a0, 8 +; RV64V-NEXT: or a0, a0, a1 +; RV64V-NEXT: slli a1, a3, 8 +; RV64V-NEXT: or a1, a1, a2 ; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64V-NEXT: vslidedown.vi v10, v12, 2 -; RV64V-NEXT: vmv.x.s a1, v10 -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 2 -; RV64V-NEXT: andi a1, a0, 8 -; RV64V-NEXT: bnez a1, .LBB107_4 -; RV64V-NEXT: j .LBB107_5 -; RV64V-NEXT: .LBB107_14: # %cond.load10 -; RV64V-NEXT: addi a1, sp, 192 -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vse64.v v12, (a1) -; RV64V-NEXT: ld a1, 224(sp) -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 4 -; RV64V-NEXT: andi a1, a0, 32 -; RV64V-NEXT: beqz a1, .LBB107_7 -; RV64V-NEXT: .LBB107_15: # %cond.load13 -; RV64V-NEXT: addi a1, sp, 128 -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vse64.v v12, (a1) -; RV64V-NEXT: ld a1, 168(sp) -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) -; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 5 -; RV64V-NEXT: andi a1, a0, 64 -; RV64V-NEXT: beqz a1, .LBB107_8 -; RV64V-NEXT: .LBB107_16: # %cond.load16 -; RV64V-NEXT: addi a1, sp, 64 -; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64V-NEXT: vse64.v v12, (a1) -; RV64V-NEXT: ld a1, 112(sp) -; RV64V-NEXT: lbu a2, 1(a1) -; RV64V-NEXT: lbu a1, 0(a1) +; RV64V-NEXT: vslidedown.vi v12, v8, 2 +; RV64V-NEXT: vmv.x.s a2, v12 +; RV64V-NEXT: lbu a3, 0(a2) +; RV64V-NEXT: lbu a2, 1(a2) +; RV64V-NEXT: vslidedown.vi v12, v8, 3 +; RV64V-NEXT: vmv.x.s a4, v12 +; RV64V-NEXT: lbu a5, 0(a4) +; RV64V-NEXT: lbu a4, 1(a4) +; RV64V-NEXT: mv a6, sp +; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64V-NEXT: vse64.v v8, (a6) +; RV64V-NEXT: ld a6, 32(sp) ; RV64V-NEXT: slli a2, a2, 8 -; RV64V-NEXT: or a1, a2, a1 -; RV64V-NEXT: vmv.s.x v9, a1 -; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64V-NEXT: vslideup.vi v8, v9, 6 -; RV64V-NEXT: andi a0, a0, -128 -; RV64V-NEXT: bnez a0, .LBB107_9 -; RV64V-NEXT: j .LBB107_10 +; RV64V-NEXT: or a2, a2, a3 +; RV64V-NEXT: slli a4, a4, 8 +; RV64V-NEXT: lbu a3, 1(a6) +; RV64V-NEXT: ld a7, 40(sp) +; RV64V-NEXT: lbu a6, 0(a6) +; RV64V-NEXT: or a4, a4, a5 +; RV64V-NEXT: slli a3, a3, 8 +; RV64V-NEXT: lbu a5, 1(a7) +; RV64V-NEXT: or a3, a3, a6 +; RV64V-NEXT: lbu a6, 0(a7) +; RV64V-NEXT: ld a7, 48(sp) +; RV64V-NEXT: slli a5, a5, 8 +; RV64V-NEXT: ld t0, 56(sp) +; RV64V-NEXT: or a5, a5, a6 +; RV64V-NEXT: lbu a6, 1(a7) +; RV64V-NEXT: lbu a7, 0(a7) +; RV64V-NEXT: lbu t1, 1(t0) +; RV64V-NEXT: lbu t0, 0(t0) +; RV64V-NEXT: slli a6, a6, 8 +; RV64V-NEXT: or a6, a6, a7 +; RV64V-NEXT: slli t1, t1, 8 +; RV64V-NEXT: or a7, t1, t0 +; RV64V-NEXT: vmv.v.x v8, a0 +; RV64V-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-NEXT: vslide1down.vx v9, v8, a4 +; RV64V-NEXT: vmv.v.x v8, a3 +; RV64V-NEXT: vslide1down.vx v8, v8, a5 +; RV64V-NEXT: vslide1down.vx v8, v8, a6 +; RV64V-NEXT: vmv.v.i v0, 15 +; RV64V-NEXT: vslide1down.vx v8, v8, a7 +; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t +; RV64V-NEXT: addi sp, s0, -128 +; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RV64V-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; RV64V-NEXT: addi sp, sp, 128 +; RV64V-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_strided_unaligned: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB107_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_10 -; RV64ZVE32F-NEXT: .LBB107_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_11 -; RV64ZVE32F-NEXT: .LBB107_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_12 -; RV64ZVE32F-NEXT: .LBB107_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_13 -; RV64ZVE32F-NEXT: .LBB107_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_14 -; RV64ZVE32F-NEXT: .LBB107_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB107_15 -; RV64ZVE32F-NEXT: .LBB107_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB107_16 -; RV64ZVE32F-NEXT: .LBB107_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB107_9: # %cond.load -; RV64ZVE32F-NEXT: lbu a2, 1(a0) -; RV64ZVE32F-NEXT: lbu a3, 0(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.v.x v8, a2 -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_2 -; RV64ZVE32F-NEXT: .LBB107_10: # %cond.load1 -; RV64ZVE32F-NEXT: lbu a2, 5(a0) -; RV64ZVE32F-NEXT: lbu a3, 4(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_3 -; RV64ZVE32F-NEXT: .LBB107_11: # %cond.load4 +; RV64ZVE32F-NEXT: lbu a1, 1(a0) +; RV64ZVE32F-NEXT: lbu a2, 0(a0) +; RV64ZVE32F-NEXT: lbu a3, 5(a0) +; RV64ZVE32F-NEXT: lbu a4, 4(a0) +; RV64ZVE32F-NEXT: slli a1, a1, 8 +; RV64ZVE32F-NEXT: or a1, a1, a2 +; RV64ZVE32F-NEXT: slli a3, a3, 8 +; RV64ZVE32F-NEXT: or a3, a3, a4 ; RV64ZVE32F-NEXT: lbu a2, 9(a0) -; RV64ZVE32F-NEXT: lbu a3, 8(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_4 -; RV64ZVE32F-NEXT: .LBB107_12: # %cond.load7 -; RV64ZVE32F-NEXT: lbu a2, 13(a0) -; RV64ZVE32F-NEXT: lbu a3, 12(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_5 -; RV64ZVE32F-NEXT: .LBB107_13: # %cond.load10 -; RV64ZVE32F-NEXT: lbu a2, 17(a0) -; RV64ZVE32F-NEXT: lbu a3, 16(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_6 -; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load13 -; RV64ZVE32F-NEXT: lbu a2, 21(a0) -; RV64ZVE32F-NEXT: lbu a3, 20(a0) -; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB107_7 -; RV64ZVE32F-NEXT: .LBB107_15: # %cond.load16 -; RV64ZVE32F-NEXT: lbu a2, 25(a0) -; RV64ZVE32F-NEXT: lbu a3, 24(a0) +; RV64ZVE32F-NEXT: lbu a4, 8(a0) +; RV64ZVE32F-NEXT: lbu a5, 13(a0) +; RV64ZVE32F-NEXT: lbu a6, 12(a0) ; RV64ZVE32F-NEXT: slli a2, a2, 8 -; RV64ZVE32F-NEXT: or a2, a2, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB107_8 -; RV64ZVE32F-NEXT: .LBB107_16: # %cond.load19 -; RV64ZVE32F-NEXT: lbu a1, 29(a0) +; RV64ZVE32F-NEXT: or a2, a2, a4 +; RV64ZVE32F-NEXT: slli a5, a5, 8 +; RV64ZVE32F-NEXT: or a4, a5, a6 +; RV64ZVE32F-NEXT: lbu a5, 17(a0) +; RV64ZVE32F-NEXT: lbu a6, 16(a0) +; RV64ZVE32F-NEXT: lbu a7, 21(a0) +; RV64ZVE32F-NEXT: lbu t0, 20(a0) +; RV64ZVE32F-NEXT: slli a5, a5, 8 +; RV64ZVE32F-NEXT: or a5, a5, a6 +; RV64ZVE32F-NEXT: slli a7, a7, 8 +; RV64ZVE32F-NEXT: or a6, a7, t0 +; RV64ZVE32F-NEXT: lbu a7, 25(a0) +; RV64ZVE32F-NEXT: lbu t0, 24(a0) +; RV64ZVE32F-NEXT: lbu t1, 29(a0) ; RV64ZVE32F-NEXT: lbu a0, 28(a0) -; RV64ZVE32F-NEXT: slli a1, a1, 8 -; RV64ZVE32F-NEXT: or a0, a1, a0 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: slli a7, a7, 8 +; RV64ZVE32F-NEXT: or a7, a7, t0 +; RV64ZVE32F-NEXT: slli t1, t1, 8 +; RV64ZVE32F-NEXT: or a0, t1, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v8, a4 +; RV64ZVE32F-NEXT: vmv.v.x v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13436,91 +12881,27 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB108_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_10 -; RV64ZVE32F-NEXT: .LBB108_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_11 -; RV64ZVE32F-NEXT: .LBB108_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_12 -; RV64ZVE32F-NEXT: .LBB108_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_13 -; RV64ZVE32F-NEXT: .LBB108_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_14 -; RV64ZVE32F-NEXT: .LBB108_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB108_15 -; RV64ZVE32F-NEXT: .LBB108_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB108_16 -; RV64ZVE32F-NEXT: .LBB108_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB108_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_2 -; RV64ZVE32F-NEXT: .LBB108_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_3 -; RV64ZVE32F-NEXT: .LBB108_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_4 -; RV64ZVE32F-NEXT: .LBB108_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_5 -; RV64ZVE32F-NEXT: .LBB108_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_6 -; RV64ZVE32F-NEXT: .LBB108_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB108_7 -; RV64ZVE32F-NEXT: .LBB108_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 24(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB108_8 -; RV64ZVE32F-NEXT: .LBB108_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 26(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 10(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 24(a0) +; RV64ZVE32F-NEXT: lh a6, 26(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 16 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13544,92 +12925,28 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB109_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_10 -; RV64ZVE32F-NEXT: .LBB109_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_11 -; RV64ZVE32F-NEXT: .LBB109_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_12 -; RV64ZVE32F-NEXT: .LBB109_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_13 -; RV64ZVE32F-NEXT: .LBB109_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_14 -; RV64ZVE32F-NEXT: .LBB109_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB109_15 -; RV64ZVE32F-NEXT: .LBB109_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB109_16 -; RV64ZVE32F-NEXT: .LBB109_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB109_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 4 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_2 -; RV64ZVE32F-NEXT: .LBB109_10: # %cond.load1 +; RV64ZVE32F-NEXT: addi a1, a0, 4 ; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_3 -; RV64ZVE32F-NEXT: .LBB109_11: # %cond.load4 -; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_4 -; RV64ZVE32F-NEXT: .LBB109_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 14(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_5 -; RV64ZVE32F-NEXT: .LBB109_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_6 -; RV64ZVE32F-NEXT: .LBB109_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB109_7 -; RV64ZVE32F-NEXT: .LBB109_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 28(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB109_8 -; RV64ZVE32F-NEXT: .LBB109_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 30(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 12(a0) +; RV64ZVE32F-NEXT: lh a4, 14(a0) +; RV64ZVE32F-NEXT: lh a5, 22(a0) +; RV64ZVE32F-NEXT: lh a6, 28(a0) +; RV64ZVE32F-NEXT: lh a7, 30(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero +; RV64ZVE32F-NEXT: addi a0, a0, 20 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13653,92 +12970,28 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB110_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_10 -; RV64ZVE32F-NEXT: .LBB110_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_11 -; RV64ZVE32F-NEXT: .LBB110_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_12 -; RV64ZVE32F-NEXT: .LBB110_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_13 -; RV64ZVE32F-NEXT: .LBB110_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_14 -; RV64ZVE32F-NEXT: .LBB110_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB110_15 -; RV64ZVE32F-NEXT: .LBB110_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB110_16 -; RV64ZVE32F-NEXT: .LBB110_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB110_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 28 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_2 -; RV64ZVE32F-NEXT: .LBB110_10: # %cond.load1 +; RV64ZVE32F-NEXT: addi a1, a0, 28 ; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_3 -; RV64ZVE32F-NEXT: .LBB110_11: # %cond.load4 -; RV64ZVE32F-NEXT: lh a2, 24(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_4 -; RV64ZVE32F-NEXT: .LBB110_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 26(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_5 -; RV64ZVE32F-NEXT: .LBB110_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_6 -; RV64ZVE32F-NEXT: .LBB110_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB110_7 -; RV64ZVE32F-NEXT: .LBB110_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB110_8 -; RV64ZVE32F-NEXT: .LBB110_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 24(a0) +; RV64ZVE32F-NEXT: lh a4, 26(a0) +; RV64ZVE32F-NEXT: lh a5, 22(a0) +; RV64ZVE32F-NEXT: lh a6, 16(a0) +; RV64ZVE32F-NEXT: lh a7, 18(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero +; RV64ZVE32F-NEXT: addi a0, a0, 20 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13762,92 +13015,28 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB111_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_10 -; RV64ZVE32F-NEXT: .LBB111_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_11 -; RV64ZVE32F-NEXT: .LBB111_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_12 -; RV64ZVE32F-NEXT: .LBB111_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_13 -; RV64ZVE32F-NEXT: .LBB111_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_14 -; RV64ZVE32F-NEXT: .LBB111_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB111_15 -; RV64ZVE32F-NEXT: .LBB111_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB111_16 -; RV64ZVE32F-NEXT: .LBB111_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB111_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 28 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_2 -; RV64ZVE32F-NEXT: .LBB111_10: # %cond.load1 +; RV64ZVE32F-NEXT: addi a1, a0, 28 ; RV64ZVE32F-NEXT: lh a2, 30(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_3 -; RV64ZVE32F-NEXT: .LBB111_11: # %cond.load4 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_4 -; RV64ZVE32F-NEXT: .LBB111_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_5 -; RV64ZVE32F-NEXT: .LBB111_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_6 -; RV64ZVE32F-NEXT: .LBB111_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 14(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB111_7 -; RV64ZVE32F-NEXT: .LBB111_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB111_8 -; RV64ZVE32F-NEXT: .LBB111_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 20(a0) +; RV64ZVE32F-NEXT: lh a4, 22(a0) +; RV64ZVE32F-NEXT: lh a5, 14(a0) +; RV64ZVE32F-NEXT: lh a6, 4(a0) +; RV64ZVE32F-NEXT: lh a7, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero +; RV64ZVE32F-NEXT: addi a0, a0, 12 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13870,91 +13059,27 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB112_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_10 -; RV64ZVE32F-NEXT: .LBB112_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_11 -; RV64ZVE32F-NEXT: .LBB112_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_12 -; RV64ZVE32F-NEXT: .LBB112_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_13 -; RV64ZVE32F-NEXT: .LBB112_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_14 -; RV64ZVE32F-NEXT: .LBB112_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB112_15 -; RV64ZVE32F-NEXT: .LBB112_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB112_16 -; RV64ZVE32F-NEXT: .LBB112_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB112_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_2 -; RV64ZVE32F-NEXT: .LBB112_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_3 -; RV64ZVE32F-NEXT: .LBB112_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_4 -; RV64ZVE32F-NEXT: .LBB112_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_5 -; RV64ZVE32F-NEXT: .LBB112_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_6 -; RV64ZVE32F-NEXT: .LBB112_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB112_7 -; RV64ZVE32F-NEXT: .LBB112_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB112_8 -; RV64ZVE32F-NEXT: .LBB112_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 18(a0) +; RV64ZVE32F-NEXT: lh a4, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -13980,91 +13105,27 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB113_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_10 -; RV64ZVE32F-NEXT: .LBB113_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_11 -; RV64ZVE32F-NEXT: .LBB113_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_12 -; RV64ZVE32F-NEXT: .LBB113_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_13 -; RV64ZVE32F-NEXT: .LBB113_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_14 -; RV64ZVE32F-NEXT: .LBB113_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB113_15 -; RV64ZVE32F-NEXT: .LBB113_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB113_16 -; RV64ZVE32F-NEXT: .LBB113_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB113_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_2 -; RV64ZVE32F-NEXT: .LBB113_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_3 -; RV64ZVE32F-NEXT: .LBB113_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_4 -; RV64ZVE32F-NEXT: .LBB113_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_5 -; RV64ZVE32F-NEXT: .LBB113_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_6 -; RV64ZVE32F-NEXT: .LBB113_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB113_7 -; RV64ZVE32F-NEXT: .LBB113_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB113_8 -; RV64ZVE32F-NEXT: .LBB113_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 20(a0) +; RV64ZVE32F-NEXT: lh a4, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14091,92 +13152,27 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB114_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_10 -; RV64ZVE32F-NEXT: .LBB114_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_11 -; RV64ZVE32F-NEXT: .LBB114_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_12 -; RV64ZVE32F-NEXT: .LBB114_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_13 -; RV64ZVE32F-NEXT: .LBB114_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_14 -; RV64ZVE32F-NEXT: .LBB114_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB114_15 -; RV64ZVE32F-NEXT: .LBB114_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB114_16 -; RV64ZVE32F-NEXT: .LBB114_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB114_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 2 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_2 -; RV64ZVE32F-NEXT: .LBB114_10: # %cond.load1 +; RV64ZVE32F-NEXT: addi a1, a0, 2 ; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_3 -; RV64ZVE32F-NEXT: .LBB114_11: # %cond.load4 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_4 -; RV64ZVE32F-NEXT: .LBB114_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_5 -; RV64ZVE32F-NEXT: .LBB114_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_6 -; RV64ZVE32F-NEXT: .LBB114_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB114_7 -; RV64ZVE32F-NEXT: .LBB114_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB114_8 -; RV64ZVE32F-NEXT: .LBB114_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 18(a0) +; RV64ZVE32F-NEXT: lh a4, 20(a0) +; RV64ZVE32F-NEXT: lh a5, 10(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a1), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14206,91 +13202,27 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_4xSEW: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB115_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_10 -; RV64ZVE32F-NEXT: .LBB115_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_11 -; RV64ZVE32F-NEXT: .LBB115_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_12 -; RV64ZVE32F-NEXT: .LBB115_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_13 -; RV64ZVE32F-NEXT: .LBB115_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_14 -; RV64ZVE32F-NEXT: .LBB115_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB115_15 -; RV64ZVE32F-NEXT: .LBB115_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB115_16 -; RV64ZVE32F-NEXT: .LBB115_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB115_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_2 -; RV64ZVE32F-NEXT: .LBB115_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_3 -; RV64ZVE32F-NEXT: .LBB115_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_4 -; RV64ZVE32F-NEXT: .LBB115_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_5 -; RV64ZVE32F-NEXT: .LBB115_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_6 -; RV64ZVE32F-NEXT: .LBB115_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB115_7 -; RV64ZVE32F-NEXT: .LBB115_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB115_8 -; RV64ZVE32F-NEXT: .LBB115_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 6(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 20(a0) +; RV64ZVE32F-NEXT: lh a6, 22(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 16 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14317,91 +13249,27 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB116_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_10 -; RV64ZVE32F-NEXT: .LBB116_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_11 -; RV64ZVE32F-NEXT: .LBB116_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_12 -; RV64ZVE32F-NEXT: .LBB116_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_13 -; RV64ZVE32F-NEXT: .LBB116_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_14 -; RV64ZVE32F-NEXT: .LBB116_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB116_15 -; RV64ZVE32F-NEXT: .LBB116_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB116_16 -; RV64ZVE32F-NEXT: .LBB116_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB116_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_2 -; RV64ZVE32F-NEXT: .LBB116_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_3 -; RV64ZVE32F-NEXT: .LBB116_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 2(a0) ; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_4 -; RV64ZVE32F-NEXT: .LBB116_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_5 -; RV64ZVE32F-NEXT: .LBB116_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 16(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_6 -; RV64ZVE32F-NEXT: .LBB116_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 18(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB116_7 -; RV64ZVE32F-NEXT: .LBB116_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 20(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB116_8 -; RV64ZVE32F-NEXT: .LBB116_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 22(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 6(a0) +; RV64ZVE32F-NEXT: lh a4, 18(a0) +; RV64ZVE32F-NEXT: lh a5, 20(a0) +; RV64ZVE32F-NEXT: lh a6, 22(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 16 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14413,10 +13281,8 @@ define <8 x i16> @mgather_shuffle_reverse(ptr %base) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v8, (a0), a1 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14439,92 +13305,27 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_shuffle_rotate: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB118_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_10 -; RV64ZVE32F-NEXT: .LBB118_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_11 -; RV64ZVE32F-NEXT: .LBB118_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_12 -; RV64ZVE32F-NEXT: .LBB118_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_13 -; RV64ZVE32F-NEXT: .LBB118_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_14 -; RV64ZVE32F-NEXT: .LBB118_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB118_15 -; RV64ZVE32F-NEXT: .LBB118_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB118_16 -; RV64ZVE32F-NEXT: .LBB118_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB118_9: # %cond.load -; RV64ZVE32F-NEXT: addi a2, a0, 8 -; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_2 -; RV64ZVE32F-NEXT: .LBB118_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_3 -; RV64ZVE32F-NEXT: .LBB118_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 10(a0) ; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_4 -; RV64ZVE32F-NEXT: .LBB118_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 14(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_5 -; RV64ZVE32F-NEXT: .LBB118_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_6 -; RV64ZVE32F-NEXT: .LBB118_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB118_7 -; RV64ZVE32F-NEXT: .LBB118_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB118_8 -; RV64ZVE32F-NEXT: .LBB118_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 14(a0) +; RV64ZVE32F-NEXT: lh a4, 2(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -14551,91 +13352,27 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a1, v8 -; RV64ZVE32F-NEXT: # implicit-def: $v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB119_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_10 -; RV64ZVE32F-NEXT: .LBB119_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_11 -; RV64ZVE32F-NEXT: .LBB119_3: # %else5 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_12 -; RV64ZVE32F-NEXT: .LBB119_4: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_13 -; RV64ZVE32F-NEXT: .LBB119_5: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_14 -; RV64ZVE32F-NEXT: .LBB119_6: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB119_15 -; RV64ZVE32F-NEXT: .LBB119_7: # %else17 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB119_16 -; RV64ZVE32F-NEXT: .LBB119_8: # %else20 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB119_9: # %cond.load -; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_2 -; RV64ZVE32F-NEXT: .LBB119_10: # %cond.load1 -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_3 -; RV64ZVE32F-NEXT: .LBB119_11: # %cond.load4 +; RV64ZVE32F-NEXT: lh a1, 4(a0) ; RV64ZVE32F-NEXT: lh a2, 6(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_4 -; RV64ZVE32F-NEXT: .LBB119_12: # %cond.load7 -; RV64ZVE32F-NEXT: lh a2, 2(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_5 -; RV64ZVE32F-NEXT: .LBB119_13: # %cond.load10 -; RV64ZVE32F-NEXT: lh a2, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_6 -; RV64ZVE32F-NEXT: .LBB119_14: # %cond.load13 -; RV64ZVE32F-NEXT: lh a2, 10(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB119_7 -; RV64ZVE32F-NEXT: .LBB119_15: # %cond.load16 -; RV64ZVE32F-NEXT: lh a2, 12(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a2 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB119_8 -; RV64ZVE32F-NEXT: .LBB119_16: # %cond.load19 -; RV64ZVE32F-NEXT: lh a0, 14(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a0 -; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 +; RV64ZVE32F-NEXT: lh a3, 2(a0) +; RV64ZVE32F-NEXT: lh a4, 10(a0) +; RV64ZVE32F-NEXT: lh a5, 12(a0) +; RV64ZVE32F-NEXT: lh a6, 14(a0) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu +; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero +; RV64ZVE32F-NEXT: addi a0, a0, 8 +; RV64ZVE32F-NEXT: vlse16.v v9, (a0), zero +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison) + %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> splat (i1 true), <8 x i16> poison) ret <8 x i16> %v } @@ -15082,7 +13819,7 @@ define <32 x i64> @mgather_strided_split(ptr %base) { ; RV64ZVE32F-NEXT: addi sp, sp, 144 ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, <32 x i64> - %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> shufflevector(<32 x i1> insertelement(<32 x i1> poison, i1 true, i32 0), <32 x i1> poison, <32 x i32> zeroinitializer), <32 x i64> poison) + %x = call <32 x i64> @llvm.masked.gather.v32i64.v32p0(<32 x ptr> %ptrs, i32 8, <32 x i1> splat (i1 true), <32 x i64> poison) ret <32 x i64> %x } @@ -15125,7 +13862,7 @@ define <4 x i32> @masked_gather_widen_sew_negative_stride(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr i32, ptr %base, <4 x i64> - %x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> shufflevector(<4 x i1> insertelement(<4 x i1> poison, i1 true, i32 0), <4 x i1> poison, <4 x i32> zeroinitializer), <4 x i32> poison) + %x = call <4 x i32> @llvm.masked.gather.v4i32.v32p0(<4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 true), <4 x i32> poison) ret <4 x i32> %x } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index b2ff471455631..aa815e18ac101 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -336,49 +336,19 @@ define void @mscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i8: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB6_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB6_6 -; RV64ZVE32F-NEXT: .LBB6_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB6_7 -; RV64ZVE32F-NEXT: .LBB6_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB6_8 -; RV64ZVE32F-NEXT: .LBB6_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB6_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; RV64ZVE32F-NEXT: vse8.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB6_2 -; RV64ZVE32F-NEXT: .LBB6_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vse8.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse8.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB6_3 -; RV64ZVE32F-NEXT: .LBB6_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vse8.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse8.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB6_4 -; RV64ZVE32F-NEXT: .LBB6_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vse8.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse8.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 1)) ret void } @@ -883,49 +853,19 @@ define void @mscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB15_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB15_6 -; RV64ZVE32F-NEXT: .LBB15_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB15_7 -; RV64ZVE32F-NEXT: .LBB15_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB15_8 -; RV64ZVE32F-NEXT: .LBB15_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB15_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB15_2 -; RV64ZVE32F-NEXT: .LBB15_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB15_3 -; RV64ZVE32F-NEXT: .LBB15_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB15_4 -; RV64ZVE32F-NEXT: .LBB15_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void } @@ -1788,49 +1728,19 @@ define void @mscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB26_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB26_6 -; RV64ZVE32F-NEXT: .LBB26_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB26_7 -; RV64ZVE32F-NEXT: .LBB26_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB26_8 -; RV64ZVE32F-NEXT: .LBB26_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB26_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB26_2 -; RV64ZVE32F-NEXT: .LBB26_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB26_3 -; RV64ZVE32F-NEXT: .LBB26_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse32.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB26_4 -; RV64ZVE32F-NEXT: .LBB26_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void } @@ -3163,50 +3073,22 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { ; RV32ZVE32F-NEXT: lw a2, 24(a0) ; RV32ZVE32F-NEXT: lw a3, 20(a0) ; RV32ZVE32F-NEXT: lw a4, 16(a0) -; RV32ZVE32F-NEXT: lw a7, 12(a0) +; RV32ZVE32F-NEXT: lw a5, 12(a0) ; RV32ZVE32F-NEXT: lw a6, 8(a0) -; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmset.m v9 -; RV32ZVE32F-NEXT: vmv.x.s a5, v9 -; RV32ZVE32F-NEXT: beqz zero, .LBB39_5 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, a5, 2 -; RV32ZVE32F-NEXT: bnez a0, .LBB39_6 -; RV32ZVE32F-NEXT: .LBB39_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, a5, 4 -; RV32ZVE32F-NEXT: bnez a0, .LBB39_7 -; RV32ZVE32F-NEXT: .LBB39_3: # %else4 -; RV32ZVE32F-NEXT: andi a5, a5, 8 -; RV32ZVE32F-NEXT: bnez a5, .LBB39_8 -; RV32ZVE32F-NEXT: .LBB39_4: # %else6 -; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB39_5: # %cond.store -; RV32ZVE32F-NEXT: lw t0, 4(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s t1, v8 -; RV32ZVE32F-NEXT: sw t0, 4(t1) -; RV32ZVE32F-NEXT: sw a0, 0(t1) -; RV32ZVE32F-NEXT: andi a0, a5, 2 -; RV32ZVE32F-NEXT: beqz a0, .LBB39_2 -; RV32ZVE32F-NEXT: .LBB39_6: # %cond.store1 +; RV32ZVE32F-NEXT: lw a7, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 4(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s t0, v8 +; RV32ZVE32F-NEXT: sw a7, 0(t0) +; RV32ZVE32F-NEXT: sw a0, 4(t0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: sw a7, 4(a0) ; RV32ZVE32F-NEXT: sw a6, 0(a0) -; RV32ZVE32F-NEXT: andi a0, a5, 4 -; RV32ZVE32F-NEXT: beqz a0, .LBB39_3 -; RV32ZVE32F-NEXT: .LBB39_7: # %cond.store3 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: sw a5, 4(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v9 ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a3, 4(a0) -; RV32ZVE32F-NEXT: andi a5, a5, 8 -; RV32ZVE32F-NEXT: beqz a5, .LBB39_4 -; RV32ZVE32F-NEXT: .LBB39_8: # %cond.store5 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: sw a2, 0(a0) @@ -3216,46 +3098,19 @@ define void @mscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4i64: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a2, 24(a1) -; RV64ZVE32F-NEXT: ld a4, 16(a1) -; RV64ZVE32F-NEXT: ld a7, 8(a1) -; RV64ZVE32F-NEXT: ld a3, 24(a0) -; RV64ZVE32F-NEXT: ld a5, 16(a0) -; RV64ZVE32F-NEXT: ld t0, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a6, v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB39_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a6, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB39_6 -; RV64ZVE32F-NEXT: .LBB39_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a6, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB39_7 -; RV64ZVE32F-NEXT: .LBB39_3: # %else4 -; RV64ZVE32F-NEXT: andi a0, a6, 8 -; RV64ZVE32F-NEXT: bnez a0, .LBB39_8 -; RV64ZVE32F-NEXT: .LBB39_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB39_5: # %cond.store +; RV64ZVE32F-NEXT: ld a3, 16(a1) +; RV64ZVE32F-NEXT: ld a4, 8(a1) ; RV64ZVE32F-NEXT: ld a1, 0(a1) -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: sd a0, 0(a1) -; RV64ZVE32F-NEXT: andi a0, a6, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB39_2 -; RV64ZVE32F-NEXT: .LBB39_6: # %cond.store1 -; RV64ZVE32F-NEXT: sd t0, 0(a7) -; RV64ZVE32F-NEXT: andi a0, a6, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB39_3 -; RV64ZVE32F-NEXT: .LBB39_7: # %cond.store3 -; RV64ZVE32F-NEXT: sd a5, 0(a4) -; RV64ZVE32F-NEXT: andi a0, a6, 8 -; RV64ZVE32F-NEXT: beqz a0, .LBB39_4 -; RV64ZVE32F-NEXT: .LBB39_8: # %cond.store5 -; RV64ZVE32F-NEXT: sd a3, 0(a2) +; RV64ZVE32F-NEXT: ld a5, 0(a0) +; RV64ZVE32F-NEXT: ld a6, 8(a0) +; RV64ZVE32F-NEXT: ld a7, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) +; RV64ZVE32F-NEXT: sd a5, 0(a1) +; RV64ZVE32F-NEXT: sd a6, 0(a4) +; RV64ZVE32F-NEXT: sd a7, 0(a3) +; RV64ZVE32F-NEXT: sd a0, 0(a2) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1)) ret void } @@ -6168,49 +6023,19 @@ define void @mscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4f16: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB55_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB55_6 -; RV64ZVE32F-NEXT: .LBB55_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB55_7 -; RV64ZVE32F-NEXT: .LBB55_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB55_8 -; RV64ZVE32F-NEXT: .LBB55_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB55_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB55_2 -; RV64ZVE32F-NEXT: .LBB55_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB55_3 -; RV64ZVE32F-NEXT: .LBB55_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB55_4 -; RV64ZVE32F-NEXT: .LBB55_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1)) ret void } @@ -7020,49 +6845,19 @@ define void @mscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs) { ; RV64ZVE32F-LABEL: mscatter_truemask_v4f32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB65_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB65_6 -; RV64ZVE32F-NEXT: .LBB65_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB65_7 -; RV64ZVE32F-NEXT: .LBB65_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB65_8 -; RV64ZVE32F-NEXT: .LBB65_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB65_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB65_2 -; RV64ZVE32F-NEXT: .LBB65_6: # %cond.store1 +; RV64ZVE32F-NEXT: ld a2, 0(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v9, (a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB65_3 -; RV64ZVE32F-NEXT: .LBB65_7: # %cond.store3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v9, (a0) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse32.v v9, (a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB65_4 -; RV64ZVE32F-NEXT: .LBB65_8: # %cond.store5 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vse32.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV64ZVE32F-NEXT: vse32.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 1)) ret void } @@ -8368,43 +8163,15 @@ define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) { ; ; RV32ZVE32F-LABEL: mscatter_truemask_v4f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmset.m v9 -; RV32ZVE32F-NEXT: vmv.x.s a0, v9 -; RV32ZVE32F-NEXT: beqz zero, .LBB78_5 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB78_6 -; RV32ZVE32F-NEXT: .LBB78_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB78_7 -; RV32ZVE32F-NEXT: .LBB78_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, a0, 8 -; RV32ZVE32F-NEXT: bnez a0, .LBB78_8 -; RV32ZVE32F-NEXT: .LBB78_4: # %else6 -; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB78_5: # %cond.store -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: fsd fa0, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB78_2 -; RV32ZVE32F-NEXT: .LBB78_6: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa0, 0(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v9 -; RV32ZVE32F-NEXT: fsd fa1, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB78_3 -; RV32ZVE32F-NEXT: .LBB78_7: # %cond.store3 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a0, v9 +; RV32ZVE32F-NEXT: fsd fa1, 0(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v9 -; RV32ZVE32F-NEXT: fsd fa2, 0(a1) -; RV32ZVE32F-NEXT: andi a0, a0, 8 -; RV32ZVE32F-NEXT: beqz a0, .LBB78_4 -; RV32ZVE32F-NEXT: .LBB78_8: # %cond.store5 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a0, v9 +; RV32ZVE32F-NEXT: fsd fa2, 0(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8 ; RV32ZVE32F-NEXT: fsd fa3, 0(a0) @@ -8412,43 +8179,16 @@ define void @mscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs) { ; ; RV64ZVE32F-LABEL: mscatter_truemask_v4f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a1, 24(a0) -; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: ld a4, 8(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v8 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: beqz zero, .LBB78_5 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: bnez a0, .LBB78_6 -; RV64ZVE32F-NEXT: .LBB78_2: # %else2 -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: bnez a0, .LBB78_7 -; RV64ZVE32F-NEXT: .LBB78_3: # %else4 -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB78_8 -; RV64ZVE32F-NEXT: .LBB78_4: # %else6 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB78_5: # %cond.store -; RV64ZVE32F-NEXT: ld a0, 0(a0) -; RV64ZVE32F-NEXT: fsd fa0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a3, 2 -; RV64ZVE32F-NEXT: beqz a0, .LBB78_2 -; RV64ZVE32F-NEXT: .LBB78_6: # %cond.store1 -; RV64ZVE32F-NEXT: fsd fa1, 0(a4) -; RV64ZVE32F-NEXT: andi a0, a3, 4 -; RV64ZVE32F-NEXT: beqz a0, .LBB78_3 -; RV64ZVE32F-NEXT: .LBB78_7: # %cond.store3 -; RV64ZVE32F-NEXT: fsd fa2, 0(a2) -; RV64ZVE32F-NEXT: andi a3, a3, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB78_4 -; RV64ZVE32F-NEXT: .LBB78_8: # %cond.store5 -; RV64ZVE32F-NEXT: fsd fa3, 0(a1) +; RV64ZVE32F-NEXT: ld a1, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 8(a0) +; RV64ZVE32F-NEXT: ld a3, 16(a0) +; RV64ZVE32F-NEXT: ld a0, 24(a0) +; RV64ZVE32F-NEXT: fsd fa0, 0(a1) +; RV64ZVE32F-NEXT: fsd fa1, 0(a2) +; RV64ZVE32F-NEXT: fsd fa2, 0(a3) +; RV64ZVE32F-NEXT: fsd fa3, 0(a0) ; RV64ZVE32F-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, i32 8, <4 x i1> splat (i1 1)) ret void } @@ -11344,10 +11084,8 @@ define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } @@ -11358,10 +11096,8 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } @@ -11373,10 +11109,8 @@ define void @mscatter_shuffle_reverse(<8 x i16> %val, ptr %base) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsse16.v v8, (a0), a1 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } @@ -11399,89 +11133,31 @@ define void @mscatter_shuffle_rotate(<8 x i16> %val, ptr %base) { ; ; RV64ZVE32F-LABEL: mscatter_shuffle_rotate: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64ZVE32F-NEXT: vmset.m v9 -; RV64ZVE32F-NEXT: vmv.x.s a1, v9 -; RV64ZVE32F-NEXT: beqz zero, .LBB96_9 -; RV64ZVE32F-NEXT: # %bb.1: # %else -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_10 -; RV64ZVE32F-NEXT: .LBB96_2: # %else2 -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_11 -; RV64ZVE32F-NEXT: .LBB96_3: # %else4 -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_12 -; RV64ZVE32F-NEXT: .LBB96_4: # %else6 -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_13 -; RV64ZVE32F-NEXT: .LBB96_5: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_14 -; RV64ZVE32F-NEXT: .LBB96_6: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_15 -; RV64ZVE32F-NEXT: .LBB96_7: # %else12 -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: bnez a1, .LBB96_16 -; RV64ZVE32F-NEXT: .LBB96_8: # %else14 -; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB96_9: # %cond.store -; RV64ZVE32F-NEXT: addi a2, a0, 8 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vse16.v v8, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_2 -; RV64ZVE32F-NEXT: .LBB96_10: # %cond.store1 -; RV64ZVE32F-NEXT: addi a2, a0, 10 +; RV64ZVE32F-NEXT: addi a1, a0, 6 +; RV64ZVE32F-NEXT: addi a2, a0, 4 +; RV64ZVE32F-NEXT: addi a3, a0, 2 +; RV64ZVE32F-NEXT: addi a4, a0, 14 +; RV64ZVE32F-NEXT: addi a5, a0, 12 +; RV64ZVE32F-NEXT: addi a6, a0, 10 +; RV64ZVE32F-NEXT: addi a7, a0, 8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v8, (a7) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_3 -; RV64ZVE32F-NEXT: .LBB96_11: # %cond.store3 -; RV64ZVE32F-NEXT: addi a2, a0, 12 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a6) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 8 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_4 -; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store5 -; RV64ZVE32F-NEXT: addi a2, a0, 14 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a5) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 3 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_5 -; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a4) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a0) -; RV64ZVE32F-NEXT: andi a2, a1, 32 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_6 -; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store9 -; RV64ZVE32F-NEXT: addi a2, a0, 2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 -; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a2, a1, 64 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_7 -; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11 -; RV64ZVE32F-NEXT: addi a2, a0, 4 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vse16.v v9, (a3) ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 6 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) -; RV64ZVE32F-NEXT: andi a1, a1, -128 -; RV64ZVE32F-NEXT: beqz a1, .LBB96_8 -; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13 -; RV64ZVE32F-NEXT: addi a0, a0, 6 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV64ZVE32F-NEXT: vse16.v v8, (a0) +; RV64ZVE32F-NEXT: vse16.v v8, (a1) ; RV64ZVE32F-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i16 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> - call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> %allones) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, i32 2, <8 x i1> splat (i1 true)) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index 648fb785cf151..19f3d3ce19fa4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -42,9 +42,7 @@ define <2 x half> @vp_nearbyint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -86,9 +84,7 @@ define <4 x half> @vp_nearbyint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -130,9 +126,7 @@ define <8 x half> @vp_nearbyint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -176,9 +170,7 @@ define <16 x half> @vp_nearbyint_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -220,9 +212,7 @@ define <2 x float> @vp_nearbyint_v2f32_unmasked(<2 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -264,9 +254,7 @@ define <4 x float> @vp_nearbyint_v4f32_unmasked(<4 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -310,9 +298,7 @@ define <8 x float> @vp_nearbyint_v8f32_unmasked(<8 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -356,9 +342,7 @@ define <16 x float> @vp_nearbyint_v16f32_unmasked(<16 x float> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -400,9 +384,7 @@ define <2 x double> @vp_nearbyint_v2f64_unmasked(<2 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -446,9 +428,7 @@ define <4 x double> @vp_nearbyint_v4f64_unmasked(<4 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -492,9 +472,7 @@ define <8 x double> @vp_nearbyint_v8f64_unmasked(<8 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -538,9 +516,7 @@ define <15 x double> @vp_nearbyint_v15f64_unmasked(<15 x double> %va, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -584,9 +560,7 @@ define <16 x double> @vp_nearbyint_v16f64_unmasked(<16 x double> %va, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -690,8 +664,6 @@ define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll index 09bc27bdd15e8..016be04ffc9b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll @@ -14,9 +14,7 @@ define <8 x i32> @vpmerge_vpadd(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -31,10 +29,8 @@ define <8 x i32> @vpmerge_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) - %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) + %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -46,10 +42,8 @@ define <8 x i32> @vpmerge_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) - %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %mask, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> splat (i1 true), <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -61,9 +55,7 @@ define <8 x float> @vpmerge_vpfadd(<8 x float> %passthru, <8 x float> %x, <8 x f ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %x, <8 x float> %y, <8 x i1> %mask, i32 %vl) + %a = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %x, <8 x float> %y, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) ret <8 x float> %b } @@ -76,9 +68,7 @@ define <8 x i16> @vpmerge_vpfptosi(<8 x i16> %passthru, <8 x float> %x, <8 x i1> ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %a, <8 x i16> %passthru, i32 %vl) ret <8 x i16> %b } @@ -91,9 +81,7 @@ define <8 x float> @vpmerge_vpsitofp(<8 x float> %passthru, <8 x i64> %x, <8 x i ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) ret <8 x float> %b } @@ -106,9 +94,7 @@ define <8 x i32> @vpmerge_vpzext(<8 x i32> %passthru, <8 x i8> %x, <8 x i1> %m, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -121,9 +107,7 @@ define <8 x i32> @vpmerge_vptrunc(<8 x i32> %passthru, <8 x i64> %x, <8 x i1> %m ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -136,9 +120,7 @@ define <8 x double> @vpmerge_vpfpext(<8 x double> %passthru, <8 x float> %x, <8 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %a, <8 x double> %passthru, i32 %vl) ret <8 x double> %b } @@ -151,9 +133,7 @@ define <8 x float> @vpmerge_vpfptrunc(<8 x float> %passthru, <8 x double> %x, <8 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) ret <8 x float> %b } @@ -167,9 +147,7 @@ define <8 x i32> @vpmerge_vpload(<8 x i32> %passthru, ptr %p, <8 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -183,10 +161,8 @@ define <8 x i32> @vpmerge_vpload2(<8 x i32> %passthru, ptr %p, <8 x i32> %x, <8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> %mask, i32 %vl) - %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> splat (i1 true), i32 %vl) + %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -203,9 +179,7 @@ define <8 x i32> @vpselect_vpadd(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -218,10 +192,8 @@ define <8 x i32> @vpselect_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> % ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) - %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) + %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -233,10 +205,8 @@ define <8 x i32> @vpselect_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> % ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) - %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %mask, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + %a = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> splat (i1 true), i32 %vl) + %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -247,9 +217,7 @@ define <8 x float> @vpselect_vpfadd(<8 x float> %passthru, <8 x float> %x, <8 x ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %x, <8 x float> %y, <8 x i1> %mask, i32 %vl) + %a = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %x, <8 x float> %y, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) ret <8 x float> %b } @@ -261,9 +229,7 @@ define <8 x i16> @vpselect_vpfptosi(<8 x i16> %passthru, <8 x float> %x, <8 x i1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x i16> @llvm.vp.fptosi.v8i16.v8f32(<8 x float> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i16> @llvm.vp.select.v8i16(<8 x i1> %m, <8 x i16> %a, <8 x i16> %passthru, i32 %vl) ret <8 x i16> %b } @@ -275,9 +241,7 @@ define <8 x float> @vpselect_vpsitofp(<8 x float> %passthru, <8 x i64> %x, <8 x ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x float> @llvm.vp.sitofp.v8f32.v8i64(<8 x i64> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) ret <8 x float> %b } @@ -289,9 +253,7 @@ define <8 x i32> @vpselect_vpzext(<8 x i32> %passthru, <8 x i8> %x, <8 x i1> %m, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vzext.vf4 v8, v9, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.zext.v8i32.v8i8(<8 x i8> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -303,9 +265,7 @@ define <8 x i32> @vpselect_vptrunc(<8 x i32> %passthru, <8 x i64> %x, <8 x i1> % ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vnsrl.wi v8, v10, 0, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.trunc.v8i32.v8i64(<8 x i64> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -317,9 +277,7 @@ define <8 x double> @vpselect_vpfpext(<8 x double> %passthru, <8 x float> %x, <8 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x double> @llvm.vp.fpext.v8f64.v8f32(<8 x float> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %a, <8 x double> %passthru, i32 %vl) ret <8 x double> %b } @@ -331,9 +289,7 @@ define <8 x float> @vpselect_vpfptrunc(<8 x float> %passthru, <8 x double> %x, < ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %x, <8 x i1> %mask, i32 %vl) + %a = call <8 x float> @llvm.vp.fptrunc.v8f32.v8f64(<8 x double> %x, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) ret <8 x float> %b } @@ -345,9 +301,7 @@ define <8 x i32> @vpselect_vpload(<8 x i32> %passthru, ptr %p, <8 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } @@ -360,10 +314,8 @@ define <8 x i32> @vpselect_vpload2(<8 x i32> %passthru, ptr %p, <8 x i32> %x, <8 ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 true, i32 0 - %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> %mask, i32 %vl) - %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> %mask, i32 %vl) + %a = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %p, <8 x i1> splat (i1 true), i32 %vl) + %m = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> splat (i1 true), i32 %vl) %b = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) ret <8 x i32> %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 3e0fb3009c6b1..920d0d5fe7ba7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -38,9 +38,7 @@ define <2 x half> @vp_rint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.rint.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -78,9 +76,7 @@ define <4 x half> @vp_rint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.rint.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -118,9 +114,7 @@ define <8 x half> @vp_rint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.rint.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -160,9 +154,7 @@ define <16 x half> @vp_rint_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.rint.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -200,9 +192,7 @@ define <2 x float> @vp_rint_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -240,9 +230,7 @@ define <4 x float> @vp_rint_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -282,9 +270,7 @@ define <8 x float> @vp_rint_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -324,9 +310,7 @@ define <16 x float> @vp_rint_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -364,9 +348,7 @@ define <2 x double> @vp_rint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -406,9 +388,7 @@ define <4 x double> @vp_rint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -448,9 +428,7 @@ define <8 x double> @vp_rint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -490,9 +468,7 @@ define <15 x double> @vp_rint_v15f64_unmasked(<15 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -532,9 +508,7 @@ define <16 x double> @vp_rint_v16f64_unmasked(<16 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -639,8 +613,6 @@ define <32 x double> @vp_rint_v32f64_unmasked(<32 x double> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index 96a72d0ddd18b..6f045349423c7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -86,9 +86,7 @@ define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.round.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -170,9 +168,7 @@ define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.round.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -256,9 +252,7 @@ define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.round.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -344,9 +338,7 @@ define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.round.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -388,9 +380,7 @@ define <2 x float> @vp_round_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.round.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.round.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -432,9 +422,7 @@ define <4 x float> @vp_round_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.round.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.round.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -478,9 +466,7 @@ define <8 x float> @vp_round_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.round.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -524,9 +510,7 @@ define <16 x float> @vp_round_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.round.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -568,9 +552,7 @@ define <2 x double> @vp_round_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.round.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -614,9 +596,7 @@ define <4 x double> @vp_round_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.round.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -660,9 +640,7 @@ define <8 x double> @vp_round_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.round.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -706,9 +684,7 @@ define <15 x double> @vp_round_v15f64_unmasked(<15 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -752,9 +728,7 @@ define <16 x double> @vp_round_v16f64_unmasked(<16 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -867,8 +841,6 @@ define <32 x double> @vp_round_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 74a43f09542af..738d7e37c50bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -86,9 +86,7 @@ define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.roundeven.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -170,9 +168,7 @@ define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.roundeven.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -256,9 +252,7 @@ define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.roundeven.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -344,9 +338,7 @@ define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.roundeven.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -388,9 +380,7 @@ define <2 x float> @vp_roundeven_v2f32_unmasked(<2 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -432,9 +422,7 @@ define <4 x float> @vp_roundeven_v4f32_unmasked(<4 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -478,9 +466,7 @@ define <8 x float> @vp_roundeven_v8f32_unmasked(<8 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -524,9 +510,7 @@ define <16 x float> @vp_roundeven_v16f32_unmasked(<16 x float> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -568,9 +552,7 @@ define <2 x double> @vp_roundeven_v2f64_unmasked(<2 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -614,9 +596,7 @@ define <4 x double> @vp_roundeven_v4f64_unmasked(<4 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -660,9 +640,7 @@ define <8 x double> @vp_roundeven_v8f64_unmasked(<8 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -706,9 +684,7 @@ define <15 x double> @vp_roundeven_v15f64_unmasked(<15 x double> %va, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -752,9 +728,7 @@ define <16 x double> @vp_roundeven_v16f64_unmasked(<16 x double> %va, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -867,8 +841,6 @@ define <32 x double> @vp_roundeven_v32f64_unmasked(<32 x double> %va, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index 91de65c79bb70..6f5b7875266b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -86,9 +86,7 @@ define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -170,9 +168,7 @@ define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -256,9 +252,7 @@ define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -344,9 +338,7 @@ define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.roundtozero.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -388,9 +380,7 @@ define <2 x float> @vp_roundtozero_v2f32_unmasked(<2 x float> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -432,9 +422,7 @@ define <4 x float> @vp_roundtozero_v4f32_unmasked(<4 x float> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -478,9 +466,7 @@ define <8 x float> @vp_roundtozero_v8f32_unmasked(<8 x float> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -524,9 +510,7 @@ define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroex ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -568,9 +552,7 @@ define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -614,9 +596,7 @@ define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -660,9 +640,7 @@ define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -706,9 +684,7 @@ define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zero ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -752,9 +728,7 @@ define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zero ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -867,8 +841,6 @@ define <32 x double> @vp_roundtozero_v32f64_unmasked(<32 x double> %va, i32 zero ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 8cf069e66e8f2..981715bd2b998 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -135,9 +135,7 @@ define <8 x i1> @icmp_eq_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"eq", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -147,9 +145,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"eq", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -193,9 +189,7 @@ define <8 x i1> @icmp_ne_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"ne", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -205,9 +199,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"ne", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -251,9 +243,7 @@ define <8 x i1> @icmp_ugt_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -263,9 +253,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -311,9 +299,7 @@ define <8 x i1> @icmp_uge_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -323,9 +309,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"uge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -369,9 +353,7 @@ define <8 x i1> @icmp_ult_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -381,9 +363,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"ult", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -427,9 +407,7 @@ define <8 x i1> @icmp_sgt_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -439,9 +417,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -487,9 +463,7 @@ define <8 x i1> @icmp_sge_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"sge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -499,9 +473,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"sge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -545,9 +517,7 @@ define <8 x i1> @icmp_slt_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"slt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -557,9 +527,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"slt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -605,9 +573,7 @@ define <8 x i1> @icmp_sle_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> %vb, metadata !"sle", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -617,9 +583,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> %vb, <8 x i8> %va, metadata !"sle", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i8(<8 x i8> splat (i8 4), <8 x i8> %va, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -783,9 +747,7 @@ define <8 x i1> @icmp_eq_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmseq.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"eq", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -796,9 +758,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vmseq.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"eq", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -846,9 +806,7 @@ define <8 x i1> @icmp_ne_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsne.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ne", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -859,9 +817,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vmsne.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"ne", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -909,9 +865,7 @@ define <8 x i1> @icmp_ugt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsgtu.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -922,9 +876,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsleu.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -974,9 +926,7 @@ define <8 x i1> @icmp_uge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsgtu.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -987,9 +937,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsleu.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"uge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1037,9 +985,7 @@ define <8 x i1> @icmp_ult_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsleu.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1050,9 +996,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsgtu.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"ult", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1100,9 +1044,7 @@ define <8 x i1> @icmp_sgt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsgt.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1113,9 +1055,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsle.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1165,9 +1105,7 @@ define <8 x i1> @icmp_sge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsgt.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1178,9 +1116,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsle.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"sge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1228,9 +1164,7 @@ define <8 x i1> @icmp_slt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsle.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"slt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1241,9 +1175,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsgt.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"slt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1293,9 +1225,7 @@ define <8 x i1> @icmp_sle_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsle.vi v10, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sle", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1306,9 +1236,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsgt.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %vb, <8 x i32> %va, metadata !"sle", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1558,9 +1486,7 @@ define <8 x i1> @icmp_eq_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"eq", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1571,9 +1497,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"eq", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1651,9 +1575,7 @@ define <8 x i1> @icmp_ne_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ne", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1664,9 +1586,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"ne", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1744,9 +1664,7 @@ define <8 x i1> @icmp_ugt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1757,9 +1675,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1839,9 +1755,7 @@ define <8 x i1> @icmp_uge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsgtu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1852,9 +1766,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"uge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1932,9 +1844,7 @@ define <8 x i1> @icmp_ult_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -1945,9 +1855,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"ult", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -2025,9 +1933,7 @@ define <8 x i1> @icmp_sgt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -2038,9 +1944,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -2120,9 +2024,7 @@ define <8 x i1> @icmp_sge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -2133,9 +2035,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"sge", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -2213,9 +2113,7 @@ define <8 x i1> @icmp_slt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"slt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -2226,9 +2124,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"slt", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -2308,9 +2204,7 @@ define <8 x i1> @icmp_sle_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sle", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } @@ -2321,8 +2215,6 @@ define <8 x i1> @icmp_sle_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %vb, <8 x i64> %va, metadata !"sle", <8 x i1> %m, i32 %evl) + %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp-mask.ll index c2c321bf91fbc..bd9b66997ff8d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp-mask.ll @@ -22,7 +22,7 @@ define <4 x i16> @vsext_v4i16_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: ret - %v = call <4 x i16> @llvm.vp.sext.v4i16.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.sext.v4i16.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -46,7 +46,7 @@ define <4 x i32> @vsext_v4i32_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.sext.v4i32.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.sext.v4i32.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -70,6 +70,6 @@ define <4 x i64> @vsext_v4i64_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll index b1d3f5c3a3a6b..52596d8892411 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll @@ -22,7 +22,7 @@ define <4 x i16> @vsext_v4i16_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsext.vf2 v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x i16> @llvm.vp.sext.v4i16.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.sext.v4i16.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -46,7 +46,7 @@ define <4 x i32> @vsext_v4i32_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsext.vf4 v9, v8 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.sext.v4i32.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.sext.v4i32.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -70,7 +70,7 @@ define <4 x i64> @vsext_v4i64_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsext.vf8 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -94,7 +94,7 @@ define <4 x i32> @vsext_v4i32_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsext.vf2 v9, v8 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.sext.v4i32.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.sext.v4i32.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -118,7 +118,7 @@ define <4 x i64> @vsext_v4i64_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsext.vf4 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -142,7 +142,7 @@ define <4 x i64> @vsext_v4i64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsext.vf2 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.sext.v4i64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -197,7 +197,7 @@ define <32 x i64> @vsext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl ; CHECK-NEXT: vsext.vf2 v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) + %v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp-mask.ll index 60469eb466b0b..67c045cc2b189 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp-mask.ll @@ -25,7 +25,7 @@ define <4 x half> @vsitofp_v4f16_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -52,7 +52,7 @@ define <4 x float> @vsitofp_v4f32_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -79,6 +79,6 @@ define <4 x double> @vsitofp_v4f64_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll index a6e55fc353252..5e93fdfc7a652 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -73,7 +73,7 @@ define <4 x half> @vsitofp_v4f16_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -111,7 +111,7 @@ define <4 x half> @vsitofp_v4f16_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -151,7 +151,7 @@ define <4 x half> @vsitofp_v4f16_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i32(<4 x i32> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -193,7 +193,7 @@ define <4 x half> @vsitofp_v4f16_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i64(<4 x i64> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.sitofp.v4f16.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -217,7 +217,7 @@ define <4 x float> @vsitofp_v4f32_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) ; CHECK-NEXT: vsext.vf2 v9, v8 ; CHECK-NEXT: vfwcvt.f.x.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -241,7 +241,7 @@ define <4 x float> @vsitofp_v4f32_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl ; CHECK-NEXT: vfwcvt.f.x.v v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -263,7 +263,7 @@ define <4 x float> @vsitofp_v4f32_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -287,7 +287,7 @@ define <4 x float> @vsitofp_v4f32_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl ; CHECK-NEXT: vfncvt.f.x.w v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.sitofp.v4f32.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -311,7 +311,7 @@ define <4 x double> @vsitofp_v4f64_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) ; CHECK-NEXT: vsext.vf4 v10, v8 ; CHECK-NEXT: vfwcvt.f.x.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -335,7 +335,7 @@ define <4 x double> @vsitofp_v4f64_v4i16_unmasked(<4 x i16> %va, i32 zeroext %ev ; CHECK-NEXT: vsext.vf2 v10, v8 ; CHECK-NEXT: vfwcvt.f.x.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -359,7 +359,7 @@ define <4 x double> @vsitofp_v4f64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %ev ; CHECK-NEXT: vfwcvt.f.x.v v10, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -381,7 +381,7 @@ define <4 x double> @vsitofp_v4f64_v4i64_unmasked(<4 x i64> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.sitofp.v4f64.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -430,6 +430,6 @@ define <32 x double> @vsitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.x.v v16, v16 ; CHECK-NEXT: ret - %v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) + %v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 2ae058128eaa0..6a8d2008de74d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -78,9 +78,7 @@ define <4 x i8> @strided_vpload_v4i8_allones_mask(ptr %ptr, i32 signext %stride, ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %b, i32 %evl) + %load = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %load } @@ -158,9 +156,7 @@ define <8 x i16> @strided_vpload_v8i16_allones_mask(ptr %ptr, i32 signext %strid ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlse16.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %b, i32 %evl) + %load = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %load } @@ -216,9 +212,7 @@ define <8 x i32> @strided_vpload_v8i32_allones_mask(ptr %ptr, i32 signext %strid ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %b, i32 %evl) + %load = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %load } @@ -262,9 +256,7 @@ define <4 x i64> @strided_vpload_v4i64_allones_mask(ptr %ptr, i32 signext %strid ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %b, i32 %evl) + %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %load } @@ -298,9 +290,7 @@ define <2 x half> @strided_vpload_v2f16_allones_mask(ptr %ptr, i32 signext %stri ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlse16.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %b, i32 %evl) + %load = call <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %load } @@ -390,9 +380,7 @@ define <8 x float> @strided_vpload_v8f32_allones_mask(ptr %ptr, i32 signext %str ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %b, i32 %evl) + %load = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %load } @@ -437,9 +425,7 @@ define <4 x double> @strided_vpload_v4f64_allones_mask(ptr %ptr, i32 signext %st ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call <4 x double> @llvm.experimental.vp.strided.load.v4f64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %b, i32 %evl) + %load = call <4 x double> @llvm.experimental.vp.strided.load.v4f64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %load } @@ -472,9 +458,7 @@ define <3 x double> @strided_vpload_v3f64_allones_mask(ptr %ptr, i32 signext %st ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1 ; CHECK-NEXT: ret - %one = insertelement <3 x i1> poison, i1 true, i32 0 - %allones = shufflevector <3 x i1> %one, <3 x i1> poison, <3 x i32> zeroinitializer - %v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr %ptr, i32 %stride, <3 x i1> %allones, i32 %evl) + %v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr %ptr, i32 %stride, <3 x i1> splat (i1 true), i32 %evl) ret <3 x double> %v } @@ -530,9 +514,7 @@ define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext % ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1 ; CHECK-NEXT: ret - %one = insertelement <32 x i1> poison, i1 true, i32 0 - %allones = shufflevector <32 x i1> %one, <32 x i1> poison, <32 x i32> zeroinitializer - %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> %allones, i32 %evl) + %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll index 6c4960bd40784..dee422a4c17d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll @@ -376,9 +376,7 @@ define void @strided_vpstore_v2i8_allones_mask(<2 x i8> %val, ptr %ptr, i32 sign ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vsse8.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v2i8.p0.i32(<2 x i8> %val, ptr %ptr, i32 %stride, <2 x i1> %b, i32 %evl) + call void @llvm.experimental.vp.strided.store.v2i8.p0.i32(<2 x i8> %val, ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl) ret void } @@ -399,9 +397,7 @@ define void @strided_vpstore_v3f32_allones_mask(<3 x float> %v, ptr %ptr, i32 si ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret - %one = insertelement <3 x i1> poison, i1 true, i32 0 - %allones = shufflevector <3 x i1> %one, <3 x i1> poison, <3 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v3f32.p0.i32(<3 x float> %v, ptr %ptr, i32 %stride, <3 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v3f32.p0.i32(<3 x float> %v, ptr %ptr, i32 %stride, <3 x i1> splat (i1 true), i32 %evl) ret void } @@ -454,9 +450,7 @@ define void @strided_store_v32f64_allones_mask(<32 x double> %v, ptr %ptr, i32 s ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1 ; CHECK-NEXT: ret - %one = insertelement <32 x i1> poison, i1 true, i32 0 - %allones = shufflevector <32 x i1> %one, <32 x i1> poison, <32 x i32> zeroinitializer - call void @llvm.experimental.vp.strided.store.v32f64.p0.i32(<32 x double> %v, ptr %ptr, i32 %stride, <32 x i1> %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.v32f64.p0.i32(<32 x double> %v, ptr %ptr, i32 %stride, <32 x i1> splat (i1 true), i32 %evl) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp-mask.ll index 41f956bec9030..adfb26cd31060 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp-mask.ll @@ -25,7 +25,7 @@ define <4 x half> @vuitofp_v4f16_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -52,7 +52,7 @@ define <4 x float> @vuitofp_v4f32_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -79,6 +79,6 @@ define <4 x double> @vuitofp_v4f64_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll index 9047400427408..698c48bc55650 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -73,7 +73,7 @@ define <4 x half> @vuitofp_v4f16_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -111,7 +111,7 @@ define <4 x half> @vuitofp_v4f16_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -151,7 +151,7 @@ define <4 x half> @vuitofp_v4f16_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i32(<4 x i32> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -193,7 +193,7 @@ define <4 x half> @vuitofp_v4f16_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i64(<4 x i64> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x half> @llvm.vp.uitofp.v4f16.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -217,7 +217,7 @@ define <4 x float> @vuitofp_v4f32_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) ; CHECK-NEXT: vzext.vf2 v9, v8 ; CHECK-NEXT: vfwcvt.f.xu.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -241,7 +241,7 @@ define <4 x float> @vuitofp_v4f32_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl ; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -263,7 +263,7 @@ define <4 x float> @vuitofp_v4f32_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i32(<4 x i32> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -287,7 +287,7 @@ define <4 x float> @vuitofp_v4f32_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl ; CHECK-NEXT: vfncvt.f.xu.w v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i64(<4 x i64> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x float> @llvm.vp.uitofp.v4f32.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -311,7 +311,7 @@ define <4 x double> @vuitofp_v4f64_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) ; CHECK-NEXT: vzext.vf4 v10, v8 ; CHECK-NEXT: vfwcvt.f.xu.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -335,7 +335,7 @@ define <4 x double> @vuitofp_v4f64_v4i16_unmasked(<4 x i16> %va, i32 zeroext %ev ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vfwcvt.f.xu.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -359,7 +359,7 @@ define <4 x double> @vuitofp_v4f64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %ev ; CHECK-NEXT: vfwcvt.f.xu.v v10, v8 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -381,7 +381,7 @@ define <4 x double> @vuitofp_v4f64_v4i64_unmasked(<4 x i64> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 ; CHECK-NEXT: ret - %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x double> @llvm.vp.uitofp.v4f64.v4i64(<4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -430,6 +430,6 @@ define <32 x double> @vuitofp_v32f64_v32i64_unmasked(<32 x i64> %va, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v16, v16 ; CHECK-NEXT: ret - %v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) + %v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll index 954edf872aff8..70b547759938f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll @@ -29,9 +29,7 @@ define <8 x i8> @vaaddu_vx_v8i8_floor(<8 x i8> %x, i8 %y) { %ysplat = shufflevector <8 x i8> %yhead, <8 x i8> poison, <8 x i32> zeroinitializer %yzv = zext <8 x i8> %ysplat to <8 x i16> %add = add nuw nsw <8 x i16> %xzv, %yzv - %one = insertelement <8 x i16> poison, i16 1, i32 0 - %splat = shufflevector <8 x i16> %one, <8 x i16> poison, <8 x i32> zeroinitializer - %div = lshr <8 x i16> %add, %splat + %div = lshr <8 x i16> %add, splat (i16 1) %ret = trunc <8 x i16> %div to <8 x i8> ret <8 x i8> %ret } @@ -109,9 +107,7 @@ define <8 x i16> @vaaddu_vx_v8i16_floor(<8 x i16> %x, i16 %y) { %ysplat = shufflevector <8 x i16> %yhead, <8 x i16> poison, <8 x i32> zeroinitializer %yzv = zext <8 x i16> %ysplat to <8 x i32> %add = add nuw nsw <8 x i32> %xzv, %yzv - %one = insertelement <8 x i32> poison, i32 1, i32 0 - %splat = shufflevector <8 x i32> %one, <8 x i32> poison, <8 x i32> zeroinitializer - %div = lshr <8 x i32> %add, %splat + %div = lshr <8 x i32> %add, splat (i32 1) %ret = trunc <8 x i32> %div to <8 x i16> ret <8 x i16> %ret } @@ -143,9 +139,7 @@ define <8 x i32> @vaaddu_vx_v8i32_floor(<8 x i32> %x, i32 %y) { %ysplat = shufflevector <8 x i32> %yhead, <8 x i32> poison, <8 x i32> zeroinitializer %yzv = zext <8 x i32> %ysplat to <8 x i64> %add = add nuw nsw <8 x i64> %xzv, %yzv - %one = insertelement <8 x i64> poison, i64 1, i64 0 - %splat = shufflevector <8 x i64> %one, <8 x i64> poison, <8 x i32> zeroinitializer - %div = lshr <8 x i64> %add, %splat + %div = lshr <8 x i64> %add, splat (i64 1) %ret = trunc <8 x i64> %div to <8 x i32> ret <8 x i32> %ret } @@ -212,9 +206,7 @@ define <8 x i64> @vaaddu_vx_v8i64_floor(<8 x i64> %x, i64 %y) { %ysplat = shufflevector <8 x i64> %yhead, <8 x i64> poison, <8 x i32> zeroinitializer %yzv = zext <8 x i64> %ysplat to <8 x i128> %add = add nuw nsw <8 x i128> %xzv, %yzv - %one = insertelement <8 x i128> poison, i128 1, i128 0 - %splat = shufflevector <8 x i128> %one, <8 x i128> poison, <8 x i32> zeroinitializer - %div = lshr <8 x i128> %add, %splat + %div = lshr <8 x i128> %add, splat (i128 1) %ret = trunc <8 x i128> %div to <8 x i64> ret <8 x i64> %ret } @@ -248,9 +240,7 @@ define <8 x i8> @vaaddu_vx_v8i8_ceil(<8 x i8> %x, i8 %y) { %yzv = zext <8 x i8> %ysplat to <8 x i16> %add = add nuw nsw <8 x i16> %xzv, %yzv %add1 = add nuw nsw <8 x i16> %add, - %one = insertelement <8 x i16> poison, i16 1, i32 0 - %splat = shufflevector <8 x i16> %one, <8 x i16> poison, <8 x i32> zeroinitializer - %div = lshr <8 x i16> %add1, %splat + %div = lshr <8 x i16> %add1, splat (i16 1) %ret = trunc <8 x i16> %div to <8 x i8> ret <8 x i8> %ret } @@ -359,9 +349,7 @@ define <8 x i16> @vaaddu_vx_v8i16_ceil(<8 x i16> %x, i16 %y) { %yzv = zext <8 x i16> %ysplat to <8 x i32> %add = add nuw nsw <8 x i32> %xzv, %yzv %add1 = add nuw nsw <8 x i32> %add, - %one = insertelement <8 x i32> poison, i32 1, i32 0 - %splat = shufflevector <8 x i32> %one, <8 x i32> poison, <8 x i32> zeroinitializer - %div = lshr <8 x i32> %add1, %splat + %div = lshr <8 x i32> %add1, splat (i32 1) %ret = trunc <8 x i32> %div to <8 x i16> ret <8 x i16> %ret } @@ -395,9 +383,7 @@ define <8 x i32> @vaaddu_vx_v8i32_ceil(<8 x i32> %x, i32 %y) { %yzv = zext <8 x i32> %ysplat to <8 x i64> %add = add nuw nsw <8 x i64> %xzv, %yzv %add1 = add nuw nsw <8 x i64> %add, - %one = insertelement <8 x i64> poison, i64 1, i64 0 - %splat = shufflevector <8 x i64> %one, <8 x i64> poison, <8 x i32> zeroinitializer - %div = lshr <8 x i64> %add1, %splat + %div = lshr <8 x i64> %add1, splat (i64 1) %ret = trunc <8 x i64> %div to <8 x i32> ret <8 x i32> %ret } @@ -467,9 +453,7 @@ define <8 x i64> @vaaddu_vx_v8i64_ceil(<8 x i64> %x, i64 %y) { %yzv = zext <8 x i64> %ysplat to <8 x i128> %add = add nuw nsw <8 x i128> %xzv, %yzv %add1 = add nuw nsw <8 x i128> %add, - %one = insertelement <8 x i128> poison, i128 1, i128 0 - %splat = shufflevector <8 x i128> %one, <8 x i128> poison, <8 x i32> zeroinitializer - %div = lshr <8 x i128> %add1, %splat + %div = lshr <8 x i128> %add1, splat (i128 1) %ret = trunc <8 x i128> %div to <8 x i64> ret <8 x i64> %ret } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index e15253b67275c..2c62cbd583d00 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -34,9 +34,7 @@ define <2 x i8> @vadd_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -60,9 +58,7 @@ define <2 x i8> @vadd_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -72,9 +68,7 @@ define <2 x i8> @vadd_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -84,11 +78,7 @@ define <2 x i8> @vadd_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -110,9 +100,7 @@ define <4 x i8> @vadd_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -148,9 +136,7 @@ define <4 x i8> @vadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -160,9 +146,7 @@ define <4 x i8> @vadd_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -172,11 +156,7 @@ define <4 x i8> @vadd_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -198,9 +178,7 @@ define <5 x i8> @vadd_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.add.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.add.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -224,9 +202,7 @@ define <5 x i8> @vadd_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.add.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.add.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -236,9 +212,7 @@ define <5 x i8> @vadd_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.add.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.add.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> %m, i32 %evl) ret <5 x i8> %v } @@ -248,11 +222,7 @@ define <5 x i8> @vadd_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.add.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.add.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -274,9 +244,7 @@ define <8 x i8> @vadd_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -300,9 +268,7 @@ define <8 x i8> @vadd_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -312,9 +278,7 @@ define <8 x i8> @vadd_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -324,11 +288,7 @@ define <8 x i8> @vadd_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.add.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -350,9 +310,7 @@ define <16 x i8> @vadd_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -376,9 +334,7 @@ define <16 x i8> @vadd_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -388,9 +344,7 @@ define <16 x i8> @vadd_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -400,11 +354,7 @@ define <16 x i8> @vadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.add.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -431,9 +381,7 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) ret <256 x i8> %v } @@ -455,11 +403,7 @@ define <256 x i8> @vadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -476,9 +420,7 @@ define <256 x i8> @vadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) ret <256 x i8> %v } @@ -491,9 +433,7 @@ define <256 x i8> @vadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) ret <256 x i8> %v } @@ -515,9 +455,7 @@ define <2 x i16> @vadd_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -541,9 +479,7 @@ define <2 x i16> @vadd_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -553,9 +489,7 @@ define <2 x i16> @vadd_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -565,11 +499,7 @@ define <2 x i16> @vadd_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.add.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -591,9 +521,7 @@ define <4 x i16> @vadd_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -617,9 +545,7 @@ define <4 x i16> @vadd_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -629,9 +555,7 @@ define <4 x i16> @vadd_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -641,11 +565,7 @@ define <4 x i16> @vadd_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.add.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -667,9 +587,7 @@ define <8 x i16> @vadd_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -693,9 +611,7 @@ define <8 x i16> @vadd_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -705,9 +621,7 @@ define <8 x i16> @vadd_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -717,11 +631,7 @@ define <8 x i16> @vadd_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.add.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -743,9 +653,7 @@ define <16 x i16> @vadd_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -769,9 +677,7 @@ define <16 x i16> @vadd_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -781,9 +687,7 @@ define <16 x i16> @vadd_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -793,11 +697,7 @@ define <16 x i16> @vadd_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.add.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -819,9 +719,7 @@ define <2 x i32> @vadd_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -845,9 +743,7 @@ define <2 x i32> @vadd_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -857,9 +753,7 @@ define <2 x i32> @vadd_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -869,11 +763,7 @@ define <2 x i32> @vadd_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -895,9 +785,7 @@ define <4 x i32> @vadd_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -921,9 +809,7 @@ define <4 x i32> @vadd_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -933,9 +819,7 @@ define <4 x i32> @vadd_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -945,11 +829,7 @@ define <4 x i32> @vadd_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -971,9 +851,7 @@ define <8 x i32> @vadd_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -997,9 +875,7 @@ define <8 x i32> @vadd_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1009,9 +885,7 @@ define <8 x i32> @vadd_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -1021,11 +895,7 @@ define <8 x i32> @vadd_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.add.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1047,9 +917,7 @@ define <16 x i32> @vadd_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1073,9 +941,7 @@ define <16 x i32> @vadd_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1085,9 +951,7 @@ define <16 x i32> @vadd_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -1097,11 +961,7 @@ define <16 x i32> @vadd_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.add.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1123,9 +983,7 @@ define <2 x i64> @vadd_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1177,9 +1035,7 @@ define <2 x i64> @vadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1189,9 +1045,7 @@ define <2 x i64> @vadd_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1201,11 +1055,7 @@ define <2 x i64> @vadd_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.add.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1227,9 +1077,7 @@ define <4 x i64> @vadd_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1281,9 +1129,7 @@ define <4 x i64> @vadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1293,9 +1139,7 @@ define <4 x i64> @vadd_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1305,11 +1149,7 @@ define <4 x i64> @vadd_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1331,9 +1171,7 @@ define <8 x i64> @vadd_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1385,9 +1223,7 @@ define <8 x i64> @vadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1397,9 +1233,7 @@ define <8 x i64> @vadd_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1409,11 +1243,7 @@ define <8 x i64> @vadd_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.add.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1435,9 +1265,7 @@ define <16 x i64> @vadd_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1489,9 +1317,7 @@ define <16 x i64> @vadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1501,9 +1327,7 @@ define <16 x i64> @vadd_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1513,11 +1337,7 @@ define <16 x i64> @vadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.add.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1529,16 +1349,16 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-LABEL: vadd_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1570,24 +1390,22 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vadd_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1615,11 +1433,7 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1640,9 +1454,7 @@ define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } @@ -1671,8 +1483,6 @@ define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll index 80b62a7a0aae6..507cf5cc6b80c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -34,9 +34,7 @@ define <2 x i8> @vand_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -72,9 +70,7 @@ define <2 x i8> @vand_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -86,9 +82,7 @@ define <2 x i8> @vand_vx_v2i8_unmasked_commute(<2 x i8> %va, i8 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -98,9 +92,7 @@ define <2 x i8> @vand_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 4, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %va, <2 x i8> splat (i8 4), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -110,11 +102,7 @@ define <2 x i8> @vand_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 4, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %va, <2 x i8> splat (i8 4), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -136,9 +124,7 @@ define <4 x i8> @vand_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -162,9 +148,7 @@ define <4 x i8> @vand_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -174,9 +158,7 @@ define <4 x i8> @vand_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 4, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> %va, <4 x i8> splat (i8 4), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -186,11 +168,7 @@ define <4 x i8> @vand_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 4, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.and.v4i8(<4 x i8> %va, <4 x i8> splat (i8 4), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -212,9 +190,7 @@ define <8 x i8> @vand_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -238,9 +214,7 @@ define <8 x i8> @vand_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -250,9 +224,7 @@ define <8 x i8> @vand_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -262,11 +234,7 @@ define <8 x i8> @vand_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.and.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -288,9 +256,7 @@ define <16 x i8> @vand_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -314,9 +280,7 @@ define <16 x i8> @vand_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -326,9 +290,7 @@ define <16 x i8> @vand_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 4, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> %va, <16 x i8> splat (i8 4), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -338,11 +300,7 @@ define <16 x i8> @vand_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 4, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.and.v16i8(<16 x i8> %va, <16 x i8> splat (i8 4), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -364,9 +322,7 @@ define <2 x i16> @vand_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -390,9 +346,7 @@ define <2 x i16> @vand_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -402,9 +356,7 @@ define <2 x i16> @vand_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 4, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> %va, <2 x i16> splat (i16 4), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -414,11 +366,7 @@ define <2 x i16> @vand_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 4, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.and.v2i16(<2 x i16> %va, <2 x i16> splat (i16 4), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -440,9 +388,7 @@ define <4 x i16> @vand_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -466,9 +412,7 @@ define <4 x i16> @vand_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -478,9 +422,7 @@ define <4 x i16> @vand_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 4, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> %va, <4 x i16> splat (i16 4), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -490,11 +432,7 @@ define <4 x i16> @vand_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 4, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.and.v4i16(<4 x i16> %va, <4 x i16> splat (i16 4), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -516,9 +454,7 @@ define <8 x i16> @vand_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -542,9 +478,7 @@ define <8 x i16> @vand_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -554,9 +488,7 @@ define <8 x i16> @vand_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 4, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> %va, <8 x i16> splat (i16 4), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -566,11 +498,7 @@ define <8 x i16> @vand_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 4, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.and.v8i16(<8 x i16> %va, <8 x i16> splat (i16 4), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -592,9 +520,7 @@ define <16 x i16> @vand_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -618,9 +544,7 @@ define <16 x i16> @vand_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -630,9 +554,7 @@ define <16 x i16> @vand_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 4, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> %va, <16 x i16> splat (i16 4), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -642,11 +564,7 @@ define <16 x i16> @vand_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 4, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.and.v16i16(<16 x i16> %va, <16 x i16> splat (i16 4), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -668,9 +586,7 @@ define <2 x i32> @vand_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -694,9 +610,7 @@ define <2 x i32> @vand_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -706,9 +620,7 @@ define <2 x i32> @vand_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 4, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> %va, <2 x i32> splat (i32 4), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -718,11 +630,7 @@ define <2 x i32> @vand_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 4, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.and.v2i32(<2 x i32> %va, <2 x i32> splat (i32 4), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -744,9 +652,7 @@ define <4 x i32> @vand_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -770,9 +676,7 @@ define <4 x i32> @vand_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -782,9 +686,7 @@ define <4 x i32> @vand_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 4, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %va, <4 x i32> splat (i32 4), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -794,11 +696,7 @@ define <4 x i32> @vand_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 4, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.and.v4i32(<4 x i32> %va, <4 x i32> splat (i32 4), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -820,9 +718,7 @@ define <8 x i32> @vand_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -846,9 +742,7 @@ define <8 x i32> @vand_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -858,9 +752,7 @@ define <8 x i32> @vand_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -870,11 +762,7 @@ define <8 x i32> @vand_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.and.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -896,9 +784,7 @@ define <16 x i32> @vand_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -922,9 +808,7 @@ define <16 x i32> @vand_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -934,9 +818,7 @@ define <16 x i32> @vand_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 4, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> %va, <16 x i32> splat (i32 4), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -946,11 +828,7 @@ define <16 x i32> @vand_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 4, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.and.v16i32(<16 x i32> %va, <16 x i32> splat (i32 4), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -972,9 +850,7 @@ define <2 x i64> @vand_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1026,9 +902,7 @@ define <2 x i64> @vand_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1038,9 +912,7 @@ define <2 x i64> @vand_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 4, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> %va, <2 x i64> splat (i64 4), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1050,11 +922,7 @@ define <2 x i64> @vand_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 4, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.and.v2i64(<2 x i64> %va, <2 x i64> splat (i64 4), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1076,9 +944,7 @@ define <4 x i64> @vand_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1130,9 +996,7 @@ define <4 x i64> @vand_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1142,9 +1006,7 @@ define <4 x i64> @vand_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 4, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> %va, <4 x i64> splat (i64 4), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1154,11 +1016,7 @@ define <4 x i64> @vand_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 4, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.and.v4i64(<4 x i64> %va, <4 x i64> splat (i64 4), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1180,9 +1038,7 @@ define <8 x i64> @vand_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1234,9 +1090,7 @@ define <8 x i64> @vand_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1246,9 +1100,7 @@ define <8 x i64> @vand_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1258,11 +1110,7 @@ define <8 x i64> @vand_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.and.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1284,9 +1132,7 @@ define <11 x i64> @vand_vv_v11i64_unmasked(<11 x i64> %va, <11 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <11 x i1> poison, i1 true, i32 0 - %m = shufflevector <11 x i1> %head, <11 x i1> poison, <11 x i32> zeroinitializer - %v = call <11 x i64> @llvm.vp.and.v11i64(<11 x i64> %va, <11 x i64> %b, <11 x i1> %m, i32 %evl) + %v = call <11 x i64> @llvm.vp.and.v11i64(<11 x i64> %va, <11 x i64> %b, <11 x i1> splat (i1 true), i32 %evl) ret <11 x i64> %v } @@ -1338,9 +1184,7 @@ define <11 x i64> @vand_vx_v11i64_unmasked(<11 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <11 x i64> poison, i64 %b, i32 0 %vb = shufflevector <11 x i64> %elt.head, <11 x i64> poison, <11 x i32> zeroinitializer - %head = insertelement <11 x i1> poison, i1 true, i32 0 - %m = shufflevector <11 x i1> %head, <11 x i1> poison, <11 x i32> zeroinitializer - %v = call <11 x i64> @llvm.vp.and.v11i64(<11 x i64> %va, <11 x i64> %vb, <11 x i1> %m, i32 %evl) + %v = call <11 x i64> @llvm.vp.and.v11i64(<11 x i64> %va, <11 x i64> %vb, <11 x i1> splat (i1 true), i32 %evl) ret <11 x i64> %v } @@ -1350,9 +1194,7 @@ define <11 x i64> @vand_vi_v11i64(<11 x i64> %va, <11 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <11 x i64> poison, i64 4, i32 0 - %vb = shufflevector <11 x i64> %elt.head, <11 x i64> poison, <11 x i32> zeroinitializer - %v = call <11 x i64> @llvm.vp.and.v11i64(<11 x i64> %va, <11 x i64> %vb, <11 x i1> %m, i32 %evl) + %v = call <11 x i64> @llvm.vp.and.v11i64(<11 x i64> %va, <11 x i64> splat (i64 4), <11 x i1> %m, i32 %evl) ret <11 x i64> %v } @@ -1362,11 +1204,7 @@ define <11 x i64> @vand_vi_v11i64_unmasked(<11 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <11 x i64> poison, i64 4, i32 0 - %vb = shufflevector <11 x i64> %elt.head, <11 x i64> poison, <11 x i32> zeroinitializer - %head = insertelement <11 x i1> poison, i1 true, i32 0 - %m = shufflevector <11 x i1> %head, <11 x i1> poison, <11 x i32> zeroinitializer - %v = call <11 x i64> @llvm.vp.and.v11i64(<11 x i64> %va, <11 x i64> %vb, <11 x i1> %m, i32 %evl) + %v = call <11 x i64> @llvm.vp.and.v11i64(<11 x i64> %va, <11 x i64> splat (i64 4), <11 x i1> splat (i1 true), i32 %evl) ret <11 x i64> %v } @@ -1388,9 +1226,7 @@ define <16 x i64> @vand_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1442,9 +1278,7 @@ define <16 x i64> @vand_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1454,9 +1288,7 @@ define <16 x i64> @vand_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 4, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> %va, <16 x i64> splat (i64 4), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1466,10 +1298,6 @@ define <16 x i64> @vand_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 4, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.and.v16i64(<16 x i64> %va, <16 x i64> splat (i64 4), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index f83968d54b2c9..01b07b4081e6d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -22,9 +22,7 @@ define <2 x half> @vfsgnj_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.copysign.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.copysign.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -46,9 +44,7 @@ define <4 x half> @vfsgnj_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.copysign.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.copysign.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -70,9 +66,7 @@ define <8 x half> @vfsgnj_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.copysign.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.copysign.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -94,9 +88,7 @@ define <16 x half> @vfsgnj_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.copysign.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.copysign.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -118,9 +110,7 @@ define <2 x float> @vfsgnj_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.copysign.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.copysign.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -142,9 +132,7 @@ define <4 x float> @vfsgnj_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.copysign.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.copysign.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -166,9 +154,7 @@ define <8 x float> @vfsgnj_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.copysign.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.copysign.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -190,9 +176,7 @@ define <16 x float> @vfsgnj_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %v ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.copysign.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.copysign.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -214,9 +198,7 @@ define <2 x double> @vfsgnj_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.copysign.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.copysign.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -238,9 +220,7 @@ define <4 x double> @vfsgnj_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.copysign.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.copysign.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -262,9 +242,7 @@ define <8 x double> @vfsgnj_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.copysign.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.copysign.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -286,9 +264,7 @@ define <15 x double> @vfsgnj_vv_v15f64_unmasked(<15 x double> %va, <15 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.copysign.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.copysign.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -310,9 +286,7 @@ define <16 x double> @vfsgnj_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.copysign.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.copysign.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -384,8 +358,6 @@ define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v16, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.copysign.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.copysign.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll index 435d3ff1746d1..29f8eaba90052 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll @@ -39,9 +39,7 @@ define <2 x i8> @vdiv_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sdiv.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sdiv.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -65,9 +63,7 @@ define <2 x i8> @vdiv_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sdiv.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sdiv.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -89,9 +85,7 @@ define <4 x i8> @vdiv_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sdiv.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sdiv.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -115,9 +109,7 @@ define <4 x i8> @vdiv_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sdiv.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sdiv.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -151,9 +143,7 @@ define <8 x i8> @vdiv_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sdiv.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sdiv.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -177,9 +167,7 @@ define <8 x i8> @vdiv_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sdiv.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sdiv.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -201,9 +189,7 @@ define <16 x i8> @vdiv_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sdiv.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sdiv.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -227,9 +213,7 @@ define <16 x i8> @vdiv_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sdiv.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sdiv.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -251,9 +235,7 @@ define <2 x i16> @vdiv_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sdiv.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sdiv.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -277,9 +259,7 @@ define <2 x i16> @vdiv_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sdiv.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sdiv.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -301,9 +281,7 @@ define <4 x i16> @vdiv_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sdiv.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sdiv.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -327,9 +305,7 @@ define <4 x i16> @vdiv_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sdiv.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sdiv.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -351,9 +327,7 @@ define <8 x i16> @vdiv_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sdiv.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sdiv.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -377,9 +351,7 @@ define <8 x i16> @vdiv_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sdiv.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sdiv.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -401,9 +373,7 @@ define <16 x i16> @vdiv_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sdiv.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sdiv.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -427,9 +397,7 @@ define <16 x i16> @vdiv_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sdiv.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sdiv.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -451,9 +419,7 @@ define <2 x i32> @vdiv_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sdiv.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sdiv.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -477,9 +443,7 @@ define <2 x i32> @vdiv_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sdiv.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sdiv.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -501,9 +465,7 @@ define <4 x i32> @vdiv_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -527,9 +489,7 @@ define <4 x i32> @vdiv_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -551,9 +511,7 @@ define <8 x i32> @vdiv_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -577,9 +535,7 @@ define <8 x i32> @vdiv_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sdiv.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -601,9 +557,7 @@ define <16 x i32> @vdiv_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sdiv.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sdiv.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -627,9 +581,7 @@ define <16 x i32> @vdiv_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sdiv.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sdiv.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -651,9 +603,7 @@ define <2 x i64> @vdiv_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sdiv.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sdiv.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -705,9 +655,7 @@ define <2 x i64> @vdiv_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sdiv.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sdiv.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -729,9 +677,7 @@ define <4 x i64> @vdiv_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sdiv.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sdiv.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -783,9 +729,7 @@ define <4 x i64> @vdiv_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sdiv.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sdiv.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -807,9 +751,7 @@ define <8 x i64> @vdiv_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sdiv.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sdiv.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -861,9 +803,7 @@ define <8 x i64> @vdiv_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sdiv.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sdiv.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -885,9 +825,7 @@ define <16 x i64> @vdiv_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sdiv.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sdiv.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -939,8 +877,6 @@ define <16 x i64> @vdiv_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sdiv.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sdiv.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll index 4e78c5cde2faf..3f8eb0ff276b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll @@ -38,9 +38,7 @@ define <2 x i8> @vdivu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.udiv.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.udiv.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -64,9 +62,7 @@ define <2 x i8> @vdivu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.udiv.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.udiv.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -88,9 +84,7 @@ define <4 x i8> @vdivu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.udiv.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.udiv.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -114,9 +108,7 @@ define <4 x i8> @vdivu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.udiv.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.udiv.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -150,9 +142,7 @@ define <8 x i8> @vdivu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.udiv.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.udiv.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -176,9 +166,7 @@ define <8 x i8> @vdivu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.udiv.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.udiv.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -200,9 +188,7 @@ define <16 x i8> @vdivu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.udiv.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.udiv.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -226,9 +212,7 @@ define <16 x i8> @vdivu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.udiv.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.udiv.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -250,9 +234,7 @@ define <2 x i16> @vdivu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.udiv.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.udiv.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -276,9 +258,7 @@ define <2 x i16> @vdivu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.udiv.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.udiv.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -300,9 +280,7 @@ define <4 x i16> @vdivu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.udiv.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.udiv.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -326,9 +304,7 @@ define <4 x i16> @vdivu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.udiv.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.udiv.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -350,9 +326,7 @@ define <8 x i16> @vdivu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.udiv.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.udiv.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -376,9 +350,7 @@ define <8 x i16> @vdivu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.udiv.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.udiv.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -400,9 +372,7 @@ define <16 x i16> @vdivu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.udiv.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.udiv.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -426,9 +396,7 @@ define <16 x i16> @vdivu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.udiv.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.udiv.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -450,9 +418,7 @@ define <2 x i32> @vdivu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -476,9 +442,7 @@ define <2 x i32> @vdivu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.udiv.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -500,9 +464,7 @@ define <4 x i32> @vdivu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -526,9 +488,7 @@ define <4 x i32> @vdivu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -550,9 +510,7 @@ define <8 x i32> @vdivu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -576,9 +534,7 @@ define <8 x i32> @vdivu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.udiv.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -600,9 +556,7 @@ define <16 x i32> @vdivu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.udiv.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.udiv.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -626,9 +580,7 @@ define <16 x i32> @vdivu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.udiv.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.udiv.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -650,9 +602,7 @@ define <2 x i64> @vdivu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.udiv.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.udiv.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -704,9 +654,7 @@ define <2 x i64> @vdivu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.udiv.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.udiv.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -728,9 +676,7 @@ define <4 x i64> @vdivu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.udiv.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.udiv.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -782,9 +728,7 @@ define <4 x i64> @vdivu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.udiv.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.udiv.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -806,9 +750,7 @@ define <8 x i64> @vdivu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.udiv.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.udiv.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -860,9 +802,7 @@ define <8 x i64> @vdivu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.udiv.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.udiv.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -884,9 +824,7 @@ define <16 x i64> @vdivu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.udiv.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.udiv.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -938,8 +876,6 @@ define <16 x i64> @vdivu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.udiv.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.udiv.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index a30f682d5cf1e..f32e2bbf37946 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -46,9 +46,7 @@ define <2 x half> @vfabs_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -90,9 +88,7 @@ define <4 x half> @vfabs_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -134,9 +130,7 @@ define <8 x half> @vfabs_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -178,9 +172,7 @@ define <16 x half> @vfabs_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -202,9 +194,7 @@ define <2 x float> @vfabs_vv_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fabs.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fabs.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -226,9 +216,7 @@ define <4 x float> @vfabs_vv_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fabs.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fabs.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -250,9 +238,7 @@ define <8 x float> @vfabs_vv_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fabs.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fabs.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -274,9 +260,7 @@ define <16 x float> @vfabs_vv_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fabs.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fabs.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -298,9 +282,7 @@ define <2 x double> @vfabs_vv_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fabs.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fabs.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -322,9 +304,7 @@ define <4 x double> @vfabs_vv_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fabs.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fabs.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -346,9 +326,7 @@ define <8 x double> @vfabs_vv_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fabs.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fabs.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -370,9 +348,7 @@ define <15 x double> @vfabs_vv_v15f64_unmasked(<15 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.fabs.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.fabs.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -394,9 +370,7 @@ define <16 x double> @vfabs_vv_v16f64_unmasked(<16 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fabs.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fabs.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -445,8 +419,6 @@ define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v16 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll index fa01057b21203..f023c760f14a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll @@ -48,9 +48,7 @@ define <2 x half> @vfadd_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -106,9 +104,7 @@ define <2 x half> @vfadd_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fadd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -175,9 +171,7 @@ define <4 x half> @vfadd_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -233,9 +227,7 @@ define <4 x half> @vfadd_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fadd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -279,9 +271,7 @@ define <8 x half> @vfadd_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -337,9 +327,7 @@ define <8 x half> @vfadd_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fadd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -383,9 +371,7 @@ define <16 x half> @vfadd_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -441,9 +427,7 @@ define <16 x half> @vfadd_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fadd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -465,9 +449,7 @@ define <2 x float> @vfadd_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -503,9 +485,7 @@ define <2 x float> @vfadd_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -517,9 +497,7 @@ define <2 x float> @vfadd_vf_v2f32_unmasked_commute(<2 x float> %va, float %b, i ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -541,9 +519,7 @@ define <4 x float> @vfadd_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -567,9 +543,7 @@ define <4 x float> @vfadd_vf_v4f32_unmasked(<4 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -591,9 +565,7 @@ define <8 x float> @vfadd_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -617,9 +589,7 @@ define <8 x float> @vfadd_vf_v8f32_unmasked(<8 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fadd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -641,9 +611,7 @@ define <16 x float> @vfadd_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -667,9 +635,7 @@ define <16 x float> @vfadd_vf_v16f32_unmasked(<16 x float> %va, float %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fadd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -691,9 +657,7 @@ define <2 x double> @vfadd_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -717,9 +681,7 @@ define <2 x double> @vfadd_vf_v2f64_unmasked(<2 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fadd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -741,9 +703,7 @@ define <4 x double> @vfadd_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -767,9 +727,7 @@ define <4 x double> @vfadd_vf_v4f64_unmasked(<4 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fadd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -791,9 +749,7 @@ define <8 x double> @vfadd_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -817,9 +773,7 @@ define <8 x double> @vfadd_vf_v8f64_unmasked(<8 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fadd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -841,9 +795,7 @@ define <16 x double> @vfadd_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -867,8 +819,6 @@ define <16 x double> @vfadd_vf_v16f64_unmasked(<16 x double> %va, double %b, i32 ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fadd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll index 8eefae291d343..09b9e7ce4c53d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll @@ -26,9 +26,7 @@ define <2 x i1> @isnan_v2f16_unmasked(<2 x half> %x, i32 zeroext %evl) { ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %1 = call <2 x i1> @llvm.vp.is.fpclass.v2f16(<2 x half> %x, i32 3, <2 x i1> %m, i32 %evl) ; nan + %1 = call <2 x i1> @llvm.vp.is.fpclass.v2f16(<2 x half> %x, i32 3, <2 x i1> splat (i1 true), i32 %evl) ; nan ret <2 x i1> %1 } @@ -54,9 +52,7 @@ define <2 x i1> @isnan_v2f32_unmasked(<2 x float> %x, i32 zeroext %evl) { ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %1 = call <2 x i1> @llvm.vp.is.fpclass.v2f32(<2 x float> %x, i32 639, <2 x i1> %m, i32 %evl) + %1 = call <2 x i1> @llvm.vp.is.fpclass.v2f32(<2 x float> %x, i32 639, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i1> %1 } @@ -82,9 +78,7 @@ define <4 x i1> @isnan_v4f32_unmasked(<4 x float> %x, i32 zeroext %evl) { ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f32(<4 x float> %x, i32 3, <4 x i1> %m, i32 %evl) ; nan + %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f32(<4 x float> %x, i32 3, <4 x i1> splat (i1 true), i32 %evl) ; nan ret <4 x i1> %1 } @@ -109,9 +103,7 @@ define <8 x i1> @isnan_v8f32_unmasked(<8 x float> %x, i32 zeroext %evl) { ; CHECK-NEXT: li a0, 512 ; CHECK-NEXT: vmseq.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> %x, i32 2, <8 x i1> %m, i32 %evl) + %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> %x, i32 2, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i1> %1 } @@ -136,9 +128,7 @@ define <16 x i1> @isnan_v16f32_unmasked(<16 x float> %x, i32 zeroext %evl) { ; CHECK-NEXT: li a0, 256 ; CHECK-NEXT: vmseq.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> %x, i32 1, <16 x i1> %m, i32 %evl) + %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> %x, i32 1, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i1> %1 } @@ -164,9 +154,7 @@ define <2 x i1> @isnormal_v2f64_unmasked(<2 x double> %x, i32 zeroext %evl) { ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %1 = call <2 x i1> @llvm.vp.is.fpclass.v2f64(<2 x double> %x, i32 516, <2 x i1> %m, i32 %evl) ; 0x204 = "inf" + %1 = call <2 x i1> @llvm.vp.is.fpclass.v2f64(<2 x double> %x, i32 516, <2 x i1> splat (i1 true), i32 %evl) ; 0x204 = "inf" ret <2 x i1> %1 } @@ -191,9 +179,7 @@ define <4 x i1> @isposinf_v4f64_unmasked(<4 x double> %x, i32 zeroext %evl) { ; CHECK-NEXT: li a0, 128 ; CHECK-NEXT: vmseq.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> %x, i32 512, <4 x i1> %m, i32 %evl) ; 0x200 = "+inf" + %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> %x, i32 512, <4 x i1> splat (i1 true), i32 %evl) ; 0x200 = "+inf" ret <4 x i1> %1 } @@ -216,9 +202,7 @@ define <8 x i1> @isneginf_v8f64_unmasked(<8 x double> %x, i32 zeroext %evl) { ; CHECK-NEXT: vfclass.v v8, v8 ; CHECK-NEXT: vmseq.vi v0, v8, 1 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> %x, i32 4, <8 x i1> %m, i32 %evl) ; "-inf" + %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> %x, i32 4, <8 x i1> splat (i1 true), i32 %evl) ; "-inf" ret <8 x i1> %1 } @@ -245,9 +229,7 @@ define <16 x i1> @isfinite_v16f64_unmasked(<16 x double> %x, i32 zeroext %evl) { ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 504, <16 x i1> %m, i32 %evl) ; 0x1f8 = "finite" + %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 504, <16 x i1> splat (i1 true), i32 %evl) ; 0x1f8 = "finite" ret <16 x i1> %1 } @@ -273,9 +255,7 @@ define <16 x i1> @isnegfinite_v16f64_unmasked(<16 x double> %x, i32 zeroext %evl ; CHECK-NEXT: vand.vi v8, v8, 14 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 56, <16 x i1> %m, i32 %evl) ; 0x38 = "-finite" + %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 56, <16 x i1> splat (i1 true), i32 %evl) ; 0x38 = "-finite" ret <16 x i1> %1 } @@ -302,9 +282,7 @@ define <16 x i1> @isnotfinite_v16f64_unmasked(<16 x double> %x, i32 zeroext %evl ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 519, <16 x i1> %m, i32 %evl) ; 0x207 = "inf|nan" + %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 519, <16 x i1> splat (i1 true), i32 %evl) ; 0x207 = "inf|nan" ret <16 x i1> %1 } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll index 8c19e0bae81cf..9fb8377d5a5ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll @@ -48,9 +48,7 @@ define <2 x half> @vfdiv_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -106,9 +104,7 @@ define <2 x half> @vfdiv_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -175,9 +171,7 @@ define <4 x half> @vfdiv_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -233,9 +227,7 @@ define <4 x half> @vfdiv_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -279,9 +271,7 @@ define <8 x half> @vfdiv_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -337,9 +327,7 @@ define <8 x half> @vfdiv_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -383,9 +371,7 @@ define <16 x half> @vfdiv_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -441,9 +427,7 @@ define <16 x half> @vfdiv_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -465,9 +449,7 @@ define <2 x float> @vfdiv_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -491,9 +473,7 @@ define <2 x float> @vfdiv_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -515,9 +495,7 @@ define <4 x float> @vfdiv_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -541,9 +519,7 @@ define <4 x float> @vfdiv_vf_v4f32_unmasked(<4 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -565,9 +541,7 @@ define <8 x float> @vfdiv_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -591,9 +565,7 @@ define <8 x float> @vfdiv_vf_v8f32_unmasked(<8 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -615,9 +587,7 @@ define <16 x float> @vfdiv_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -641,9 +611,7 @@ define <16 x float> @vfdiv_vf_v16f32_unmasked(<16 x float> %va, float %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -665,9 +633,7 @@ define <2 x double> @vfdiv_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -691,9 +657,7 @@ define <2 x double> @vfdiv_vf_v2f64_unmasked(<2 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -715,9 +679,7 @@ define <4 x double> @vfdiv_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -741,9 +703,7 @@ define <4 x double> @vfdiv_vf_v4f64_unmasked(<4 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -765,9 +725,7 @@ define <8 x double> @vfdiv_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -791,9 +749,7 @@ define <8 x double> @vfdiv_vf_v8f64_unmasked(<8 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -815,9 +771,7 @@ define <16 x double> @vfdiv_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -841,8 +795,6 @@ define <16 x double> @vfdiv_vf_v16f64_unmasked(<16 x double> %va, double %b, i32 ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll index d7b89ee054af8..0574773fb2fd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -51,9 +51,7 @@ define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x ha ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -111,9 +109,7 @@ define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %v ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -160,9 +156,7 @@ define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x ha ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -220,9 +214,7 @@ define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %v ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -269,9 +261,7 @@ define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x ha ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -329,9 +319,7 @@ define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %v ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -378,9 +366,7 @@ define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -438,9 +424,7 @@ define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -463,9 +447,7 @@ define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -489,9 +471,7 @@ define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -514,9 +494,7 @@ define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -540,9 +518,7 @@ define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -565,9 +541,7 @@ define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -591,9 +565,7 @@ define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -616,9 +588,7 @@ define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -642,9 +612,7 @@ define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x f ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -667,9 +635,7 @@ define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, < ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -693,9 +659,7 @@ define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x do ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -718,9 +682,7 @@ define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, < ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -744,9 +706,7 @@ define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x do ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -769,9 +729,7 @@ define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, < ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -795,9 +753,7 @@ define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x do ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -824,9 +780,7 @@ define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> % ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -853,9 +807,7 @@ define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> % ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -879,9 +831,7 @@ define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -1032,8 +982,6 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> % ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmacc-vp.ll index 78b83046738c6..2d6e1fd02dee5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmacc-vp.ll @@ -16,9 +16,7 @@ define <2 x half> @vfmacc_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -30,10 +28,8 @@ define <2 x half> @vfmacc_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x h ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -46,9 +42,7 @@ define <2 x half> @vfmacc_vf_v2f16(<2 x half> %va, half %b, <2 x half> %c, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -62,9 +56,7 @@ define <2 x half> @vfmacc_vf_v2f16_commute(<2 x half> %va, half %b, <2 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %va, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %va, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -78,10 +70,8 @@ define <2 x half> @vfmacc_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -92,9 +82,7 @@ define <2 x half> @vfmacc_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> % ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -108,9 +96,7 @@ define <2 x half> @vfmacc_vf_v2f16_ta(<2 x half> %va, half %b, <2 x half> %c, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -124,9 +110,7 @@ define <2 x half> @vfmacc_vf_v2f16_commute_ta(<2 x half> %va, half %b, <2 x half ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %va, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %va, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -143,9 +127,7 @@ define <4 x half> @vfmacc_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -157,10 +139,8 @@ define <4 x half> @vfmacc_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x h ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -173,9 +153,7 @@ define <4 x half> @vfmacc_vf_v4f16(<4 x half> %va, half %b, <4 x half> %c, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -189,9 +167,7 @@ define <4 x half> @vfmacc_vf_v4f16_commute(<4 x half> %va, half %b, <4 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %va, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %va, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -205,10 +181,8 @@ define <4 x half> @vfmacc_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -219,9 +193,7 @@ define <4 x half> @vfmacc_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> % ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -235,9 +207,7 @@ define <4 x half> @vfmacc_vf_v4f16_ta(<4 x half> %va, half %b, <4 x half> %c, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -251,9 +221,7 @@ define <4 x half> @vfmacc_vf_v4f16_commute_ta(<4 x half> %va, half %b, <4 x half ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %va, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %va, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -270,9 +238,7 @@ define <8 x half> @vfmacc_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -284,10 +250,8 @@ define <8 x half> @vfmacc_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x h ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -300,9 +264,7 @@ define <8 x half> @vfmacc_vf_v8f16(<8 x half> %va, half %b, <8 x half> %c, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -316,9 +278,7 @@ define <8 x half> @vfmacc_vf_v8f16_commute(<8 x half> %va, half %b, <8 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %va, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %va, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -332,10 +292,8 @@ define <8 x half> @vfmacc_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -346,9 +304,7 @@ define <8 x half> @vfmacc_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> % ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -362,9 +318,7 @@ define <8 x half> @vfmacc_vf_v8f16_ta(<8 x half> %va, half %b, <8 x half> %c, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -378,9 +332,7 @@ define <8 x half> @vfmacc_vf_v8f16_commute_ta(<8 x half> %va, half %b, <8 x half ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %va, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %va, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -397,9 +349,7 @@ define <16 x half> @vfmacc_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> ; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -411,10 +361,8 @@ define <16 x half> @vfmacc_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, <1 ; CHECK-NEXT: vfmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -427,9 +375,7 @@ define <16 x half> @vfmacc_vf_v16f16(<16 x half> %va, half %b, <16 x half> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -443,9 +389,7 @@ define <16 x half> @vfmacc_vf_v16f16_commute(<16 x half> %va, half %b, <16 x hal ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %va, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %va, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -459,10 +403,8 @@ define <16 x half> @vfmacc_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x ha ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -473,9 +415,7 @@ define <16 x half> @vfmacc_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x ha ; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -489,9 +429,7 @@ define <16 x half> @vfmacc_vf_v16f16_ta(<16 x half> %va, half %b, <16 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -505,9 +443,7 @@ define <16 x half> @vfmacc_vf_v16f16_commute_ta(<16 x half> %va, half %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %va, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %va, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -524,9 +460,7 @@ define <32 x half> @vfmacc_vv_v32f16(<32 x half> %a, <32 x half> %b, <32 x half> ; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -538,10 +472,8 @@ define <32 x half> @vfmacc_vv_v32f16_unmasked(<32 x half> %a, <32 x half> %b, <3 ; CHECK-NEXT: vfmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) - %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -554,9 +486,7 @@ define <32 x half> @vfmacc_vf_v32f16(<32 x half> %va, half %b, <32 x half> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -570,9 +500,7 @@ define <32 x half> @vfmacc_vf_v32f16_commute(<32 x half> %va, half %b, <32 x hal ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %va, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %va, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -586,10 +514,8 @@ define <32 x half> @vfmacc_vf_v32f16_unmasked(<32 x half> %va, half %b, <32 x ha ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) - %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -600,9 +526,7 @@ define <32 x half> @vfmacc_vv_v32f16_ta(<32 x half> %a, <32 x half> %b, <32 x ha ; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -616,9 +540,7 @@ define <32 x half> @vfmacc_vf_v32f16_ta(<32 x half> %va, half %b, <32 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %va, <32 x half> %vb, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -632,9 +554,7 @@ define <32 x half> @vfmacc_vf_v32f16_commute_ta(<32 x half> %va, half %b, <32 x ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %va, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %va, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -651,9 +571,7 @@ define <2 x float> @vfmacc_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -665,10 +583,8 @@ define <2 x float> @vfmacc_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -681,9 +597,7 @@ define <2 x float> @vfmacc_vf_v2f32(<2 x float> %va, float %b, <2 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -697,9 +611,7 @@ define <2 x float> @vfmacc_vf_v2f32_commute(<2 x float> %va, float %b, <2 x floa ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %va, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %va, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -713,10 +625,8 @@ define <2 x float> @vfmacc_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x flo ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -727,9 +637,7 @@ define <2 x float> @vfmacc_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x floa ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -743,9 +651,7 @@ define <2 x float> @vfmacc_vf_v2f32_ta(<2 x float> %va, float %b, <2 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -759,9 +665,7 @@ define <2 x float> @vfmacc_vf_v2f32_commute_ta(<2 x float> %va, float %b, <2 x f ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %va, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %va, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -778,9 +682,7 @@ define <4 x float> @vfmacc_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -792,10 +694,8 @@ define <4 x float> @vfmacc_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -808,9 +708,7 @@ define <4 x float> @vfmacc_vf_v4f32(<4 x float> %va, float %b, <4 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -824,9 +722,7 @@ define <4 x float> @vfmacc_vf_v4f32_commute(<4 x float> %va, float %b, <4 x floa ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %va, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %va, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -840,10 +736,8 @@ define <4 x float> @vfmacc_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x flo ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -854,9 +748,7 @@ define <4 x float> @vfmacc_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x floa ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -870,9 +762,7 @@ define <4 x float> @vfmacc_vf_v4f32_ta(<4 x float> %va, float %b, <4 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -886,9 +776,7 @@ define <4 x float> @vfmacc_vf_v4f32_commute_ta(<4 x float> %va, float %b, <4 x f ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %va, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %va, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -905,9 +793,7 @@ define <8 x float> @vfmacc_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> ; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -919,10 +805,8 @@ define <8 x float> @vfmacc_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 ; CHECK-NEXT: vfmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -935,9 +819,7 @@ define <8 x float> @vfmacc_vf_v8f32(<8 x float> %va, float %b, <8 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -951,9 +833,7 @@ define <8 x float> @vfmacc_vf_v8f32_commute(<8 x float> %va, float %b, <8 x floa ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %va, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %va, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -967,10 +847,8 @@ define <8 x float> @vfmacc_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x flo ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -981,9 +859,7 @@ define <8 x float> @vfmacc_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x floa ; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -997,9 +873,7 @@ define <8 x float> @vfmacc_vf_v8f32_ta(<8 x float> %va, float %b, <8 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1013,9 +887,7 @@ define <8 x float> @vfmacc_vf_v8f32_commute_ta(<8 x float> %va, float %b, <8 x f ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %va, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %va, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1032,9 +904,7 @@ define <16 x float> @vfmacc_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x fl ; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1046,10 +916,8 @@ define <16 x float> @vfmacc_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b, ; CHECK-NEXT: vfmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1062,9 +930,7 @@ define <16 x float> @vfmacc_vf_v16f32(<16 x float> %va, float %b, <16 x float> % ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1078,9 +944,7 @@ define <16 x float> @vfmacc_vf_v16f32_commute(<16 x float> %va, float %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %va, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %va, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1094,10 +958,8 @@ define <16 x float> @vfmacc_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1108,9 +970,7 @@ define <16 x float> @vfmacc_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 x ; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1124,9 +984,7 @@ define <16 x float> @vfmacc_vf_v16f32_ta(<16 x float> %va, float %b, <16 x float ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1140,9 +998,7 @@ define <16 x float> @vfmacc_vf_v16f32_commute_ta(<16 x float> %va, float %b, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %va, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %va, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1159,9 +1015,7 @@ define <2 x double> @vfmacc_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x doub ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1173,10 +1027,8 @@ define <2 x double> @vfmacc_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1189,9 +1041,7 @@ define <2 x double> @vfmacc_vf_v2f64(<2 x double> %va, double %b, <2 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1205,9 +1055,7 @@ define <2 x double> @vfmacc_vf_v2f64_commute(<2 x double> %va, double %b, <2 x d ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %va, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %va, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1221,10 +1069,8 @@ define <2 x double> @vfmacc_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1235,9 +1081,7 @@ define <2 x double> @vfmacc_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x d ; CHECK-NEXT: vfmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1251,9 +1095,7 @@ define <2 x double> @vfmacc_vf_v2f64_ta(<2 x double> %va, double %b, <2 x double ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1267,9 +1109,7 @@ define <2 x double> @vfmacc_vf_v2f64_commute_ta(<2 x double> %va, double %b, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %va, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %va, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1286,9 +1126,7 @@ define <4 x double> @vfmacc_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x doub ; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1300,10 +1138,8 @@ define <4 x double> @vfmacc_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, ; CHECK-NEXT: vfmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1316,9 +1152,7 @@ define <4 x double> @vfmacc_vf_v4f64(<4 x double> %va, double %b, <4 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1332,9 +1166,7 @@ define <4 x double> @vfmacc_vf_v4f64_commute(<4 x double> %va, double %b, <4 x d ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %va, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %va, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1348,10 +1180,8 @@ define <4 x double> @vfmacc_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1362,9 +1192,7 @@ define <4 x double> @vfmacc_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x d ; CHECK-NEXT: vfmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1378,9 +1206,7 @@ define <4 x double> @vfmacc_vf_v4f64_ta(<4 x double> %va, double %b, <4 x double ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1394,9 +1220,7 @@ define <4 x double> @vfmacc_vf_v4f64_commute_ta(<4 x double> %va, double %b, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %va, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %va, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1413,9 +1237,7 @@ define <8 x double> @vfmacc_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x doub ; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1427,10 +1249,8 @@ define <8 x double> @vfmacc_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, ; CHECK-NEXT: vfmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1443,9 +1263,7 @@ define <8 x double> @vfmacc_vf_v8f64(<8 x double> %va, double %b, <8 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1459,9 +1277,7 @@ define <8 x double> @vfmacc_vf_v8f64_commute(<8 x double> %va, double %b, <8 x d ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %va, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %va, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1475,10 +1291,8 @@ define <8 x double> @vfmacc_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1489,9 +1303,7 @@ define <8 x double> @vfmacc_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x d ; CHECK-NEXT: vfmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1505,9 +1317,7 @@ define <8 x double> @vfmacc_vf_v8f64_ta(<8 x double> %va, double %b, <8 x double ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1521,9 +1331,7 @@ define <8 x double> @vfmacc_vf_v8f64_commute_ta(<8 x double> %va, double %b, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %va, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %va, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll index 86218ddb04bd6..ffa88e28d7dc8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -48,9 +48,7 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.maxnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.maxnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -94,9 +92,7 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.maxnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.maxnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -140,9 +136,7 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.maxnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.maxnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -186,9 +180,7 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.maxnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.maxnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -210,9 +202,7 @@ define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.maxnum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.maxnum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -234,9 +224,7 @@ define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.maxnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.maxnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -258,9 +246,7 @@ define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.maxnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.maxnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -282,9 +268,7 @@ define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.maxnum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.maxnum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -306,9 +290,7 @@ define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.maxnum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.maxnum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -330,9 +312,7 @@ define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.maxnum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.maxnum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -354,9 +334,7 @@ define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.maxnum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.maxnum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -378,9 +356,7 @@ define <15 x double> @vfmax_vv_v15f64_unmasked(<15 x double> %va, <15 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.maxnum.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.maxnum.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -402,9 +378,7 @@ define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.maxnum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.maxnum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -476,8 +450,6 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v16, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.maxnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.maxnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll index 8b8049ea6c628..17f851e172f81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -48,9 +48,7 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.minnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.minnum.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -94,9 +92,7 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.minnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.minnum.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -140,9 +136,7 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.minnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.minnum.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -186,9 +180,7 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.minnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.minnum.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -210,9 +202,7 @@ define <2 x float> @vfmin_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.minnum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.minnum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -234,9 +224,7 @@ define <4 x float> @vfmin_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.minnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.minnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -258,9 +246,7 @@ define <8 x float> @vfmin_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.minnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.minnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -282,9 +268,7 @@ define <16 x float> @vfmin_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.minnum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.minnum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -306,9 +290,7 @@ define <2 x double> @vfmin_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.minnum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.minnum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -330,9 +312,7 @@ define <4 x double> @vfmin_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.minnum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.minnum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -354,9 +334,7 @@ define <8 x double> @vfmin_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.minnum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.minnum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -378,9 +356,7 @@ define <15 x double> @vfmin_vv_v15f64_unmasked(<15 x double> %va, <15 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.minnum.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.minnum.v15f64(<15 x double> %va, <15 x double> %vb, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -402,9 +378,7 @@ define <16 x double> @vfmin_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.minnum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.minnum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -476,8 +450,6 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v16, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.minnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.minnum.v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmsac-vp.ll index 3960d061fd66e..fc6578225aa64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmsac-vp.ll @@ -16,10 +16,8 @@ define <2 x half> @vfmsac_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -31,11 +29,9 @@ define <2 x half> @vfmsac_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x h ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -48,10 +44,8 @@ define <2 x half> @vfmsac_vf_v2f16(<2 x half> %a, half %b, <2 x half> %c, <2 x i ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -65,10 +59,8 @@ define <2 x half> @vfmsac_vf_v2f16_commute(<2 x half> %a, half %b, <2 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %a, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %a, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -82,11 +74,9 @@ define <2 x half> @vfmsac_vf_v2f16_unmasked(<2 x half> %a, half %b, <2 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -97,10 +87,8 @@ define <2 x half> @vfmsac_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> % ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -114,10 +102,8 @@ define <2 x half> @vfmsac_vf_v2f16_ta(<2 x half> %a, half %b, <2 x half> %c, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %a, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -131,10 +117,8 @@ define <2 x half> @vfmsac_vf_v2f16_commute_ta(<2 x half> %a, half %b, <2 x half> ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %a, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %a, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -151,10 +135,8 @@ define <4 x half> @vfmsac_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -166,11 +148,9 @@ define <4 x half> @vfmsac_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x h ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -183,10 +163,8 @@ define <4 x half> @vfmsac_vf_v4f16(<4 x half> %a, half %b, <4 x half> %c, <4 x i ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -200,10 +178,8 @@ define <4 x half> @vfmsac_vf_v4f16_commute(<4 x half> %a, half %b, <4 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %a, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %a, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -217,11 +193,9 @@ define <4 x half> @vfmsac_vf_v4f16_unmasked(<4 x half> %a, half %b, <4 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -232,10 +206,8 @@ define <4 x half> @vfmsac_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> % ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -249,10 +221,8 @@ define <4 x half> @vfmsac_vf_v4f16_ta(<4 x half> %a, half %b, <4 x half> %c, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %a, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -266,10 +236,8 @@ define <4 x half> @vfmsac_vf_v4f16_commute_ta(<4 x half> %a, half %b, <4 x half> ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %a, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %a, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -286,10 +254,8 @@ define <8 x half> @vfmsac_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -301,11 +267,9 @@ define <8 x half> @vfmsac_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x h ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -318,10 +282,8 @@ define <8 x half> @vfmsac_vf_v8f16(<8 x half> %a, half %b, <8 x half> %c, <8 x i ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -335,10 +297,8 @@ define <8 x half> @vfmsac_vf_v8f16_commute(<8 x half> %a, half %b, <8 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %a, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %a, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -352,11 +312,9 @@ define <8 x half> @vfmsac_vf_v8f16_unmasked(<8 x half> %a, half %b, <8 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -367,10 +325,8 @@ define <8 x half> @vfmsac_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> % ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -384,10 +340,8 @@ define <8 x half> @vfmsac_vf_v8f16_ta(<8 x half> %a, half %b, <8 x half> %c, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %a, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -401,10 +355,8 @@ define <8 x half> @vfmsac_vf_v8f16_commute_ta(<8 x half> %a, half %b, <8 x half> ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %a, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %a, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -421,10 +373,8 @@ define <16 x half> @vfmsac_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> ; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -436,11 +386,9 @@ define <16 x half> @vfmsac_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, <1 ; CHECK-NEXT: vfmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) - %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -453,10 +401,8 @@ define <16 x half> @vfmsac_vf_v16f16(<16 x half> %a, half %b, <16 x half> %c, <1 ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -470,10 +416,8 @@ define <16 x half> @vfmsac_vf_v16f16_commute(<16 x half> %a, half %b, <16 x half ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %a, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %a, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -487,11 +431,9 @@ define <16 x half> @vfmsac_vf_v16f16_unmasked(<16 x half> %a, half %b, <16 x hal ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) - %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -502,10 +444,8 @@ define <16 x half> @vfmsac_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x ha ; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -519,10 +459,8 @@ define <16 x half> @vfmsac_vf_v16f16_ta(<16 x half> %a, half %b, <16 x half> %c, ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %a, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -536,10 +474,8 @@ define <16 x half> @vfmsac_vf_v16f16_commute_ta(<16 x half> %a, half %b, <16 x h ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %a, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %a, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -556,10 +492,8 @@ define <32 x half> @vfmsac_vv_v32f16(<32 x half> %a, <32 x half> %b, <32 x half> ; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -571,11 +505,9 @@ define <32 x half> @vfmsac_vv_v32f16_unmasked(<32 x half> %a, <32 x half> %b, <3 ; CHECK-NEXT: vfmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) - %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -588,10 +520,8 @@ define <32 x half> @vfmsac_vf_v32f16(<32 x half> %a, half %b, <32 x half> %c, <3 ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -605,10 +535,8 @@ define <32 x half> @vfmsac_vf_v32f16_commute(<32 x half> %a, half %b, <32 x half ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %a, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %a, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -622,11 +550,9 @@ define <32 x half> @vfmsac_vf_v32f16_unmasked(<32 x half> %a, half %b, <32 x hal ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) - %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -637,10 +563,8 @@ define <32 x half> @vfmsac_vv_v32f16_ta(<32 x half> %a, <32 x half> %b, <32 x ha ; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -654,10 +578,8 @@ define <32 x half> @vfmsac_vf_v32f16_ta(<32 x half> %a, half %b, <32 x half> %c, ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %a, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -671,10 +593,8 @@ define <32 x half> @vfmsac_vf_v32f16_commute_ta(<32 x half> %a, half %b, <32 x h ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %a, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %a, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -691,10 +611,8 @@ define <2 x float> @vfmsac_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -706,11 +624,9 @@ define <2 x float> @vfmsac_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -723,10 +639,8 @@ define <2 x float> @vfmsac_vf_v2f32(<2 x float> %a, float %b, <2 x float> %c, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -740,10 +654,8 @@ define <2 x float> @vfmsac_vf_v2f32_commute(<2 x float> %a, float %b, <2 x float ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %a, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %a, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -757,11 +669,9 @@ define <2 x float> @vfmsac_vf_v2f32_unmasked(<2 x float> %a, float %b, <2 x floa ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -772,10 +682,8 @@ define <2 x float> @vfmsac_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x floa ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -789,10 +697,8 @@ define <2 x float> @vfmsac_vf_v2f32_ta(<2 x float> %a, float %b, <2 x float> %c, ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %a, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -806,10 +712,8 @@ define <2 x float> @vfmsac_vf_v2f32_commute_ta(<2 x float> %a, float %b, <2 x fl ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %a, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %a, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -826,10 +730,8 @@ define <4 x float> @vfmsac_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -841,11 +743,9 @@ define <4 x float> @vfmsac_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -858,10 +758,8 @@ define <4 x float> @vfmsac_vf_v4f32(<4 x float> %a, float %b, <4 x float> %c, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -875,10 +773,8 @@ define <4 x float> @vfmsac_vf_v4f32_commute(<4 x float> %a, float %b, <4 x float ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %a, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %a, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -892,11 +788,9 @@ define <4 x float> @vfmsac_vf_v4f32_unmasked(<4 x float> %a, float %b, <4 x floa ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -907,10 +801,8 @@ define <4 x float> @vfmsac_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x floa ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -924,10 +816,8 @@ define <4 x float> @vfmsac_vf_v4f32_ta(<4 x float> %a, float %b, <4 x float> %c, ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -941,10 +831,8 @@ define <4 x float> @vfmsac_vf_v4f32_commute_ta(<4 x float> %a, float %b, <4 x fl ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %a, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %a, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -961,10 +849,8 @@ define <8 x float> @vfmsac_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> ; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -976,11 +862,9 @@ define <8 x float> @vfmsac_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 ; CHECK-NEXT: vfmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -993,10 +877,8 @@ define <8 x float> @vfmsac_vf_v8f32(<8 x float> %a, float %b, <8 x float> %c, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1010,10 +892,8 @@ define <8 x float> @vfmsac_vf_v8f32_commute(<8 x float> %a, float %b, <8 x float ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %a, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %a, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1027,11 +907,9 @@ define <8 x float> @vfmsac_vf_v8f32_unmasked(<8 x float> %a, float %b, <8 x floa ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1042,10 +920,8 @@ define <8 x float> @vfmsac_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x floa ; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1059,10 +935,8 @@ define <8 x float> @vfmsac_vf_v8f32_ta(<8 x float> %a, float %b, <8 x float> %c, ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %a, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1076,10 +950,8 @@ define <8 x float> @vfmsac_vf_v8f32_commute_ta(<8 x float> %a, float %b, <8 x fl ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %a, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %a, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1096,10 +968,8 @@ define <16 x float> @vfmsac_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x fl ; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1111,11 +981,9 @@ define <16 x float> @vfmsac_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b, ; CHECK-NEXT: vfmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) - %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1128,10 +996,8 @@ define <16 x float> @vfmsac_vf_v16f32(<16 x float> %a, float %b, <16 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1145,10 +1011,8 @@ define <16 x float> @vfmsac_vf_v16f32_commute(<16 x float> %a, float %b, <16 x f ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %a, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %a, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1162,11 +1026,9 @@ define <16 x float> @vfmsac_vf_v16f32_unmasked(<16 x float> %a, float %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) - %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1177,10 +1039,8 @@ define <16 x float> @vfmsac_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 x ; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1194,10 +1054,8 @@ define <16 x float> @vfmsac_vf_v16f32_ta(<16 x float> %a, float %b, <16 x float> ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %a, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1211,10 +1069,8 @@ define <16 x float> @vfmsac_vf_v16f32_commute_ta(<16 x float> %a, float %b, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %a, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %a, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1231,10 +1087,8 @@ define <2 x double> @vfmsac_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x doub ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1246,11 +1100,9 @@ define <2 x double> @vfmsac_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1263,10 +1115,8 @@ define <2 x double> @vfmsac_vf_v2f64(<2 x double> %a, double %b, <2 x double> %c ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1280,10 +1130,8 @@ define <2 x double> @vfmsac_vf_v2f64_commute(<2 x double> %a, double %b, <2 x do ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %a, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %a, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1297,11 +1145,9 @@ define <2 x double> @vfmsac_vf_v2f64_unmasked(<2 x double> %a, double %b, <2 x d ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1312,10 +1158,8 @@ define <2 x double> @vfmsac_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x d ; CHECK-NEXT: vfmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1329,10 +1173,8 @@ define <2 x double> @vfmsac_vf_v2f64_ta(<2 x double> %a, double %b, <2 x double> ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %a, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1346,10 +1188,8 @@ define <2 x double> @vfmsac_vf_v2f64_commute_ta(<2 x double> %a, double %b, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %a, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %a, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1366,10 +1206,8 @@ define <4 x double> @vfmsac_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x doub ; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1381,11 +1219,9 @@ define <4 x double> @vfmsac_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, ; CHECK-NEXT: vfmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1398,10 +1234,8 @@ define <4 x double> @vfmsac_vf_v4f64(<4 x double> %a, double %b, <4 x double> %c ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1415,10 +1249,8 @@ define <4 x double> @vfmsac_vf_v4f64_commute(<4 x double> %a, double %b, <4 x do ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %a, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %a, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1432,11 +1264,9 @@ define <4 x double> @vfmsac_vf_v4f64_unmasked(<4 x double> %a, double %b, <4 x d ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1447,10 +1277,8 @@ define <4 x double> @vfmsac_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x d ; CHECK-NEXT: vfmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1464,10 +1292,8 @@ define <4 x double> @vfmsac_vf_v4f64_ta(<4 x double> %a, double %b, <4 x double> ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %a, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1481,10 +1307,8 @@ define <4 x double> @vfmsac_vf_v4f64_commute_ta(<4 x double> %a, double %b, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %a, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %a, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1501,10 +1325,8 @@ define <8 x double> @vfmsac_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x doub ; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1516,11 +1338,9 @@ define <8 x double> @vfmsac_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, ; CHECK-NEXT: vfmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1533,10 +1353,8 @@ define <8 x double> @vfmsac_vf_v8f64(<8 x double> %a, double %b, <8 x double> %c ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1550,10 +1368,8 @@ define <8 x double> @vfmsac_vf_v8f64_commute(<8 x double> %a, double %b, <8 x do ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %a, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %a, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1567,11 +1383,9 @@ define <8 x double> @vfmsac_vf_v8f64_unmasked(<8 x double> %a, double %b, <8 x d ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1582,10 +1396,8 @@ define <8 x double> @vfmsac_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x d ; CHECK-NEXT: vfmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1599,10 +1411,8 @@ define <8 x double> @vfmsac_vf_v8f64_ta(<8 x double> %a, double %b, <8 x double> ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %a, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1616,10 +1426,8 @@ define <8 x double> @vfmsac_vf_v8f64_commute_ta(<8 x double> %a, double %b, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %a, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %a, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll index 4e06263f4e8b7..64ce0a12de8cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll @@ -48,9 +48,7 @@ define <2 x half> @vfmul_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -106,9 +104,7 @@ define <2 x half> @vfmul_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fmul.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -175,9 +171,7 @@ define <4 x half> @vfmul_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -233,9 +227,7 @@ define <4 x half> @vfmul_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fmul.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -279,9 +271,7 @@ define <8 x half> @vfmul_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -337,9 +327,7 @@ define <8 x half> @vfmul_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fmul.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -383,9 +371,7 @@ define <16 x half> @vfmul_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -441,9 +427,7 @@ define <16 x half> @vfmul_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fmul.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -465,9 +449,7 @@ define <2 x float> @vfmul_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -491,9 +473,7 @@ define <2 x float> @vfmul_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fmul.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -515,9 +495,7 @@ define <4 x float> @vfmul_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -541,9 +519,7 @@ define <4 x float> @vfmul_vf_v4f32_unmasked(<4 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -565,9 +541,7 @@ define <8 x float> @vfmul_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -591,9 +565,7 @@ define <8 x float> @vfmul_vf_v8f32_unmasked(<8 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fmul.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -615,9 +587,7 @@ define <16 x float> @vfmul_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -641,9 +611,7 @@ define <16 x float> @vfmul_vf_v16f32_unmasked(<16 x float> %va, float %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fmul.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -665,9 +633,7 @@ define <2 x double> @vfmul_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -691,9 +657,7 @@ define <2 x double> @vfmul_vf_v2f64_unmasked(<2 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fmul.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -715,9 +679,7 @@ define <4 x double> @vfmul_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -741,9 +703,7 @@ define <4 x double> @vfmul_vf_v4f64_unmasked(<4 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fmul.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -765,9 +725,7 @@ define <8 x double> @vfmul_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -791,9 +749,7 @@ define <8 x double> @vfmul_vf_v8f64_unmasked(<8 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fmul.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -815,9 +771,7 @@ define <16 x double> @vfmul_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -841,8 +795,6 @@ define <16 x double> @vfmul_vf_v16f64_unmasked(<16 x double> %va, double %b, i32 ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fmul.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll index 4af566cb5f55e..288efb0f1fc27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -23,9 +23,7 @@ define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x ha ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -49,9 +47,7 @@ define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %v ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -74,9 +70,7 @@ define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x ha ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -100,9 +94,7 @@ define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %v ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -125,9 +117,7 @@ define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x ha ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -151,9 +141,7 @@ define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %v ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -176,9 +164,7 @@ define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -202,9 +188,7 @@ define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -227,9 +211,7 @@ define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -253,9 +235,7 @@ define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -278,9 +258,7 @@ define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -304,9 +282,7 @@ define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -329,9 +305,7 @@ define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -355,9 +329,7 @@ define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -380,9 +352,7 @@ define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -406,9 +376,7 @@ define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x f ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -431,9 +399,7 @@ define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, < ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -457,9 +423,7 @@ define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x do ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -482,9 +446,7 @@ define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, < ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -508,9 +470,7 @@ define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x do ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -533,9 +493,7 @@ define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, < ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -559,9 +517,7 @@ define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x do ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -588,9 +544,7 @@ define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> % ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -617,9 +571,7 @@ define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> % ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -643,9 +595,7 @@ define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -796,8 +746,6 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> % ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index 3d037a5589a1d..c36ec25c04f93 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -46,9 +46,7 @@ define <2 x half> @vfneg_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -90,9 +88,7 @@ define <4 x half> @vfneg_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -134,9 +130,7 @@ define <8 x half> @vfneg_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -178,9 +172,7 @@ define <16 x half> @vfneg_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -202,9 +194,7 @@ define <2 x float> @vfneg_vv_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -226,9 +216,7 @@ define <4 x float> @vfneg_vv_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -250,9 +238,7 @@ define <8 x float> @vfneg_vv_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -274,9 +260,7 @@ define <16 x float> @vfneg_vv_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -298,9 +282,7 @@ define <2 x double> @vfneg_vv_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -322,9 +304,7 @@ define <4 x double> @vfneg_vv_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -346,9 +326,7 @@ define <8 x double> @vfneg_vv_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -370,9 +348,7 @@ define <15 x double> @vfneg_vv_v15f64_unmasked(<15 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.fneg.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.fneg.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -394,9 +370,7 @@ define <16 x double> @vfneg_vv_v16f64_unmasked(<16 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fneg.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -445,8 +419,6 @@ define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v16, v16 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmacc-vp.ll index 7dc9210666549..6d65ab4083f7c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmacc-vp.ll @@ -16,11 +16,9 @@ define <2 x half> @vfnmacc_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -32,12 +30,10 @@ define <2 x half> @vfnmacc_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -50,11 +46,9 @@ define <2 x half> @vfnmacc_vf_v2f16(<2 x half> %a, half %b, <2 x half> %c, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -68,11 +62,9 @@ define <2 x half> @vfnmacc_vf_v2f16_commute(<2 x half> %a, half %b, <2 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -86,12 +78,10 @@ define <2 x half> @vfnmacc_vf_v2f16_unmasked(<2 x half> %a, half %b, <2 x half> ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -102,11 +92,9 @@ define <2 x half> @vfnmacc_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -120,11 +108,9 @@ define <2 x half> @vfnmacc_vf_v2f16_ta(<2 x half> %a, half %b, <2 x half> %c, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -138,11 +124,9 @@ define <2 x half> @vfnmacc_vf_v2f16_commute_ta(<2 x half> %a, half %b, <2 x half ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -159,11 +143,9 @@ define <4 x half> @vfnmacc_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -175,12 +157,10 @@ define <4 x half> @vfnmacc_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -193,11 +173,9 @@ define <4 x half> @vfnmacc_vf_v4f16(<4 x half> %a, half %b, <4 x half> %c, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -211,11 +189,9 @@ define <4 x half> @vfnmacc_vf_v4f16_commute(<4 x half> %a, half %b, <4 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -229,12 +205,10 @@ define <4 x half> @vfnmacc_vf_v4f16_unmasked(<4 x half> %a, half %b, <4 x half> ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -245,11 +219,9 @@ define <4 x half> @vfnmacc_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -263,11 +235,9 @@ define <4 x half> @vfnmacc_vf_v4f16_ta(<4 x half> %a, half %b, <4 x half> %c, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -281,11 +251,9 @@ define <4 x half> @vfnmacc_vf_v4f16_commute_ta(<4 x half> %a, half %b, <4 x half ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -302,11 +270,9 @@ define <8 x half> @vfnmacc_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -318,12 +284,10 @@ define <8 x half> @vfnmacc_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -336,11 +300,9 @@ define <8 x half> @vfnmacc_vf_v8f16(<8 x half> %a, half %b, <8 x half> %c, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -354,11 +316,9 @@ define <8 x half> @vfnmacc_vf_v8f16_commute(<8 x half> %a, half %b, <8 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -372,12 +332,10 @@ define <8 x half> @vfnmacc_vf_v8f16_unmasked(<8 x half> %a, half %b, <8 x half> ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -388,11 +346,9 @@ define <8 x half> @vfnmacc_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -406,11 +362,9 @@ define <8 x half> @vfnmacc_vf_v8f16_ta(<8 x half> %a, half %b, <8 x half> %c, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -424,11 +378,9 @@ define <8 x half> @vfnmacc_vf_v8f16_commute_ta(<8 x half> %a, half %b, <8 x half ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -445,11 +397,9 @@ define <16 x half> @vfnmacc_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -461,12 +411,10 @@ define <16 x half> @vfnmacc_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, < ; CHECK-NEXT: vfnmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) - %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -479,11 +427,9 @@ define <16 x half> @vfnmacc_vf_v16f16(<16 x half> %a, half %b, <16 x half> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -497,11 +443,9 @@ define <16 x half> @vfnmacc_vf_v16f16_commute(<16 x half> %a, half %b, <16 x hal ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -515,12 +459,10 @@ define <16 x half> @vfnmacc_vf_v16f16_unmasked(<16 x half> %a, half %b, <16 x ha ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) - %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -531,11 +473,9 @@ define <16 x half> @vfnmacc_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x h ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -549,11 +489,9 @@ define <16 x half> @vfnmacc_vf_v16f16_ta(<16 x half> %a, half %b, <16 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -567,11 +505,9 @@ define <16 x half> @vfnmacc_vf_v16f16_commute_ta(<16 x half> %a, half %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -588,11 +524,9 @@ define <32 x half> @vfnmacc_vv_v32f16(<32 x half> %a, <32 x half> %b, <32 x half ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -604,12 +538,10 @@ define <32 x half> @vfnmacc_vv_v32f16_unmasked(<32 x half> %a, <32 x half> %b, < ; CHECK-NEXT: vfnmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) - %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -622,11 +554,9 @@ define <32 x half> @vfnmacc_vf_v32f16(<32 x half> %a, half %b, <32 x half> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -640,11 +570,9 @@ define <32 x half> @vfnmacc_vf_v32f16_commute(<32 x half> %a, half %b, <32 x hal ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -658,12 +586,10 @@ define <32 x half> @vfnmacc_vf_v32f16_unmasked(<32 x half> %a, half %b, <32 x ha ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) - %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -674,11 +600,9 @@ define <32 x half> @vfnmacc_vv_v32f16_ta(<32 x half> %a, <32 x half> %b, <32 x h ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -692,11 +616,9 @@ define <32 x half> @vfnmacc_vf_v32f16_ta(<32 x half> %a, half %b, <32 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -710,11 +632,9 @@ define <32 x half> @vfnmacc_vf_v32f16_commute_ta(<32 x half> %a, half %b, <32 x ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -731,11 +651,9 @@ define <2 x float> @vfnmacc_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -747,12 +665,10 @@ define <2 x float> @vfnmacc_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -765,11 +681,9 @@ define <2 x float> @vfnmacc_vf_v2f32(<2 x float> %a, float %b, <2 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -783,11 +697,9 @@ define <2 x float> @vfnmacc_vf_v2f32_commute(<2 x float> %a, float %b, <2 x floa ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -801,12 +713,10 @@ define <2 x float> @vfnmacc_vf_v2f32_unmasked(<2 x float> %a, float %b, <2 x flo ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -817,11 +727,9 @@ define <2 x float> @vfnmacc_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x flo ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -835,11 +743,9 @@ define <2 x float> @vfnmacc_vf_v2f32_ta(<2 x float> %a, float %b, <2 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -853,11 +759,9 @@ define <2 x float> @vfnmacc_vf_v2f32_commute_ta(<2 x float> %a, float %b, <2 x f ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -874,11 +778,9 @@ define <4 x float> @vfnmacc_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -890,12 +792,10 @@ define <4 x float> @vfnmacc_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -908,11 +808,9 @@ define <4 x float> @vfnmacc_vf_v4f32(<4 x float> %a, float %b, <4 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -926,11 +824,9 @@ define <4 x float> @vfnmacc_vf_v4f32_commute(<4 x float> %a, float %b, <4 x floa ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -944,12 +840,10 @@ define <4 x float> @vfnmacc_vf_v4f32_unmasked(<4 x float> %a, float %b, <4 x flo ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -960,11 +854,9 @@ define <4 x float> @vfnmacc_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x flo ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -978,11 +870,9 @@ define <4 x float> @vfnmacc_vf_v4f32_ta(<4 x float> %a, float %b, <4 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -996,11 +886,9 @@ define <4 x float> @vfnmacc_vf_v4f32_commute_ta(<4 x float> %a, float %b, <4 x f ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -1017,11 +905,9 @@ define <8 x float> @vfnmacc_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1033,12 +919,10 @@ define <8 x float> @vfnmacc_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1051,11 +935,9 @@ define <8 x float> @vfnmacc_vf_v8f32(<8 x float> %a, float %b, <8 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1069,11 +951,9 @@ define <8 x float> @vfnmacc_vf_v8f32_commute(<8 x float> %a, float %b, <8 x floa ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1087,12 +967,10 @@ define <8 x float> @vfnmacc_vf_v8f32_unmasked(<8 x float> %a, float %b, <8 x flo ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1103,11 +981,9 @@ define <8 x float> @vfnmacc_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x flo ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1121,11 +997,9 @@ define <8 x float> @vfnmacc_vf_v8f32_ta(<8 x float> %a, float %b, <8 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1139,11 +1013,9 @@ define <8 x float> @vfnmacc_vf_v8f32_commute_ta(<8 x float> %a, float %b, <8 x f ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1160,11 +1032,9 @@ define <16 x float> @vfnmacc_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x f ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1176,12 +1046,10 @@ define <16 x float> @vfnmacc_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b ; CHECK-NEXT: vfnmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) - %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1194,11 +1062,9 @@ define <16 x float> @vfnmacc_vf_v16f32(<16 x float> %a, float %b, <16 x float> % ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1212,11 +1078,9 @@ define <16 x float> @vfnmacc_vf_v16f32_commute(<16 x float> %a, float %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1230,12 +1094,10 @@ define <16 x float> @vfnmacc_vf_v16f32_unmasked(<16 x float> %a, float %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) - %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1246,11 +1108,9 @@ define <16 x float> @vfnmacc_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1264,11 +1124,9 @@ define <16 x float> @vfnmacc_vf_v16f32_ta(<16 x float> %a, float %b, <16 x float ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1282,11 +1140,9 @@ define <16 x float> @vfnmacc_vf_v16f32_commute_ta(<16 x float> %a, float %b, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1303,11 +1159,9 @@ define <2 x double> @vfnmacc_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x dou ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1319,12 +1173,10 @@ define <2 x double> @vfnmacc_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1337,11 +1189,9 @@ define <2 x double> @vfnmacc_vf_v2f64(<2 x double> %a, double %b, <2 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1355,11 +1205,9 @@ define <2 x double> @vfnmacc_vf_v2f64_commute(<2 x double> %a, double %b, <2 x d ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1373,12 +1221,10 @@ define <2 x double> @vfnmacc_vf_v2f64_unmasked(<2 x double> %a, double %b, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) - %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1389,11 +1235,9 @@ define <2 x double> @vfnmacc_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1407,11 +1251,9 @@ define <2 x double> @vfnmacc_vf_v2f64_ta(<2 x double> %a, double %b, <2 x double ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1425,11 +1267,9 @@ define <2 x double> @vfnmacc_vf_v2f64_commute_ta(<2 x double> %a, double %b, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1446,11 +1286,9 @@ define <4 x double> @vfnmacc_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x dou ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1462,12 +1300,10 @@ define <4 x double> @vfnmacc_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, ; CHECK-NEXT: vfnmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1480,11 +1316,9 @@ define <4 x double> @vfnmacc_vf_v4f64(<4 x double> %a, double %b, <4 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1498,11 +1332,9 @@ define <4 x double> @vfnmacc_vf_v4f64_commute(<4 x double> %a, double %b, <4 x d ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1516,12 +1348,10 @@ define <4 x double> @vfnmacc_vf_v4f64_unmasked(<4 x double> %a, double %b, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) - %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1532,11 +1362,9 @@ define <4 x double> @vfnmacc_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1550,11 +1378,9 @@ define <4 x double> @vfnmacc_vf_v4f64_ta(<4 x double> %a, double %b, <4 x double ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1568,11 +1394,9 @@ define <4 x double> @vfnmacc_vf_v4f64_commute_ta(<4 x double> %a, double %b, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1589,11 +1413,9 @@ define <8 x double> @vfnmacc_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1605,12 +1427,10 @@ define <8 x double> @vfnmacc_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, ; CHECK-NEXT: vfnmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1623,11 +1443,9 @@ define <8 x double> @vfnmacc_vf_v8f64(<8 x double> %a, double %b, <8 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1641,11 +1459,9 @@ define <8 x double> @vfnmacc_vf_v8f64_commute(<8 x double> %a, double %b, <8 x d ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1659,12 +1475,10 @@ define <8 x double> @vfnmacc_vf_v8f64_unmasked(<8 x double> %a, double %b, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) - %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1675,11 +1489,9 @@ define <8 x double> @vfnmacc_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1693,11 +1505,9 @@ define <8 x double> @vfnmacc_vf_v8f64_ta(<8 x double> %a, double %b, <8 x double ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1711,11 +1521,9 @@ define <8 x double> @vfnmacc_vf_v8f64_commute_ta(<8 x double> %a, double %b, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmsac-vp.ll index 86605446815bf..df705270664bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfnmsac-vp.ll @@ -16,10 +16,8 @@ define <2 x half> @vfnmsac_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -31,11 +29,9 @@ define <2 x half> @vfnmsac_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -48,10 +44,8 @@ define <2 x half> @vfnmsac_vf_v2f16(<2 x half> %a, half %b, <2 x half> %c, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -65,10 +59,8 @@ define <2 x half> @vfnmsac_vf_v2f16_commute(<2 x half> %a, half %b, <2 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -82,11 +74,9 @@ define <2 x half> @vfnmsac_vf_v2f16_unmasked(<2 x half> %a, half %b, <2 x half> ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> splat (i1 -1), <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -97,10 +87,8 @@ define <2 x half> @vfnmsac_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -114,10 +102,8 @@ define <2 x half> @vfnmsac_vf_v2f16_ta(<2 x half> %a, half %b, <2 x half> %c, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -131,10 +117,8 @@ define <2 x half> @vfnmsac_vf_v2f16_commute_ta(<2 x half> %a, half %b, <2 x half ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl) ret <2 x half> %u } @@ -151,10 +135,8 @@ define <4 x half> @vfnmsac_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -166,11 +148,9 @@ define <4 x half> @vfnmsac_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -183,10 +163,8 @@ define <4 x half> @vfnmsac_vf_v4f16(<4 x half> %a, half %b, <4 x half> %c, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -200,10 +178,8 @@ define <4 x half> @vfnmsac_vf_v4f16_commute(<4 x half> %a, half %b, <4 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -217,11 +193,9 @@ define <4 x half> @vfnmsac_vf_v4f16_unmasked(<4 x half> %a, half %b, <4 x half> ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> splat (i1 -1), <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -232,10 +206,8 @@ define <4 x half> @vfnmsac_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -249,10 +221,8 @@ define <4 x half> @vfnmsac_vf_v4f16_ta(<4 x half> %a, half %b, <4 x half> %c, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -266,10 +236,8 @@ define <4 x half> @vfnmsac_vf_v4f16_commute_ta(<4 x half> %a, half %b, <4 x half ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl) ret <4 x half> %u } @@ -286,10 +254,8 @@ define <8 x half> @vfnmsac_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -301,11 +267,9 @@ define <8 x half> @vfnmsac_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -318,10 +282,8 @@ define <8 x half> @vfnmsac_vf_v8f16(<8 x half> %a, half %b, <8 x half> %c, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -335,10 +297,8 @@ define <8 x half> @vfnmsac_vf_v8f16_commute(<8 x half> %a, half %b, <8 x half> % ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -352,11 +312,9 @@ define <8 x half> @vfnmsac_vf_v8f16_unmasked(<8 x half> %a, half %b, <8 x half> ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> splat (i1 -1), <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -367,10 +325,8 @@ define <8 x half> @vfnmsac_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -384,10 +340,8 @@ define <8 x half> @vfnmsac_vf_v8f16_ta(<8 x half> %a, half %b, <8 x half> %c, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -401,10 +355,8 @@ define <8 x half> @vfnmsac_vf_v8f16_commute_ta(<8 x half> %a, half %b, <8 x half ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl) ret <8 x half> %u } @@ -421,10 +373,8 @@ define <16 x half> @vfnmsac_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half ; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -436,11 +386,9 @@ define <16 x half> @vfnmsac_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, < ; CHECK-NEXT: vfnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -453,10 +401,8 @@ define <16 x half> @vfnmsac_vf_v16f16(<16 x half> %a, half %b, <16 x half> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -470,10 +416,8 @@ define <16 x half> @vfnmsac_vf_v16f16_commute(<16 x half> %a, half %b, <16 x hal ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -487,11 +431,9 @@ define <16 x half> @vfnmsac_vf_v16f16_unmasked(<16 x half> %a, half %b, <16 x ha ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> splat (i1 -1), <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -502,10 +444,8 @@ define <16 x half> @vfnmsac_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x h ; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -519,10 +459,8 @@ define <16 x half> @vfnmsac_vf_v16f16_ta(<16 x half> %a, half %b, <16 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -536,10 +474,8 @@ define <16 x half> @vfnmsac_vf_v16f16_commute_ta(<16 x half> %a, half %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl) ret <16 x half> %u } @@ -556,10 +492,8 @@ define <32 x half> @vfnmsac_vv_v26f16(<32 x half> %a, <32 x half> %b, <32 x half ; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %b, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v26f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -571,11 +505,9 @@ define <32 x half> @vfnmsac_vv_v26f16_unmasked(<32 x half> %a, <32 x half> %b, < ; CHECK-NEXT: vfnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) - %u = call <32 x half> @llvm.vp.merge.v26f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %b, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v26f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -588,10 +520,8 @@ define <32 x half> @vfnmsac_vf_v26f16(<32 x half> %a, half %b, <32 x half> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v26f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -605,10 +535,8 @@ define <32 x half> @vfnmsac_vf_v26f16_commute(<32 x half> %a, half %b, <32 x hal ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.merge.v26f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -622,11 +550,9 @@ define <32 x half> @vfnmsac_vf_v26f16_unmasked(<32 x half> %a, half %b, <32 x ha ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) - %u = call <32 x half> @llvm.vp.merge.v26f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x half> @llvm.vp.merge.v26f16(<32 x i1> splat (i1 -1), <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -637,10 +563,8 @@ define <32 x half> @vfnmsac_vv_v26f16_ta(<32 x half> %a, <32 x half> %b, <32 x h ; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %b, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %b, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v26f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -654,10 +578,8 @@ define <32 x half> @vfnmsac_vf_v26f16_ta(<32 x half> %a, half %b, <32 x half> %c ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v26f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -671,10 +593,8 @@ define <32 x half> @vfnmsac_vf_v26f16_commute_ta(<32 x half> %a, half %b, <32 x ; CHECK-NEXT: ret %elt.head = insertelement <32 x half> poison, half %b, i32 0 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> %allones, i32 %evl) - %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %c, <32 x i1> %allones, i32 %evl) + %nega = call <32 x half> @llvm.vp.fneg.v26f16(<32 x half> %a, <32 x i1> splat (i1 -1), i32 %evl) + %v = call <32 x half> @llvm.vp.fma.v26f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x half> @llvm.vp.select.v26f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl) ret <32 x half> %u } @@ -691,10 +611,8 @@ define <2 x float> @vfnmsac_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -706,11 +624,9 @@ define <2 x float> @vfnmsac_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -723,10 +639,8 @@ define <2 x float> @vfnmsac_vf_v2f32(<2 x float> %a, float %b, <2 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -740,10 +654,8 @@ define <2 x float> @vfnmsac_vf_v2f32_commute(<2 x float> %a, float %b, <2 x floa ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -757,11 +669,9 @@ define <2 x float> @vfnmsac_vf_v2f32_unmasked(<2 x float> %a, float %b, <2 x flo ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> splat (i1 -1), <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -772,10 +682,8 @@ define <2 x float> @vfnmsac_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x flo ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -789,10 +697,8 @@ define <2 x float> @vfnmsac_vf_v2f32_ta(<2 x float> %a, float %b, <2 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -806,10 +712,8 @@ define <2 x float> @vfnmsac_vf_v2f32_commute_ta(<2 x float> %a, float %b, <2 x f ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl) ret <2 x float> %u } @@ -826,10 +730,8 @@ define <4 x float> @vfnmsac_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -841,11 +743,9 @@ define <4 x float> @vfnmsac_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -858,10 +758,8 @@ define <4 x float> @vfnmsac_vf_v4f32(<4 x float> %a, float %b, <4 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -875,10 +773,8 @@ define <4 x float> @vfnmsac_vf_v4f32_commute(<4 x float> %a, float %b, <4 x floa ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -892,11 +788,9 @@ define <4 x float> @vfnmsac_vf_v4f32_unmasked(<4 x float> %a, float %b, <4 x flo ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> splat (i1 -1), <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -907,10 +801,8 @@ define <4 x float> @vfnmsac_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x flo ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -924,10 +816,8 @@ define <4 x float> @vfnmsac_vf_v4f32_ta(<4 x float> %a, float %b, <4 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -941,10 +831,8 @@ define <4 x float> @vfnmsac_vf_v4f32_commute_ta(<4 x float> %a, float %b, <4 x f ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl) ret <4 x float> %u } @@ -961,10 +849,8 @@ define <8 x float> @vfnmsac_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> ; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -976,11 +862,9 @@ define <8 x float> @vfnmsac_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 ; CHECK-NEXT: vfnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -993,10 +877,8 @@ define <8 x float> @vfnmsac_vf_v8f32(<8 x float> %a, float %b, <8 x float> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1010,10 +892,8 @@ define <8 x float> @vfnmsac_vf_v8f32_commute(<8 x float> %a, float %b, <8 x floa ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1027,11 +907,9 @@ define <8 x float> @vfnmsac_vf_v8f32_unmasked(<8 x float> %a, float %b, <8 x flo ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> splat (i1 -1), <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1042,10 +920,8 @@ define <8 x float> @vfnmsac_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x flo ; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1059,10 +935,8 @@ define <8 x float> @vfnmsac_vf_v8f32_ta(<8 x float> %a, float %b, <8 x float> %c ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1076,10 +950,8 @@ define <8 x float> @vfnmsac_vf_v8f32_commute_ta(<8 x float> %a, float %b, <8 x f ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl) ret <8 x float> %u } @@ -1096,10 +968,8 @@ define <16 x float> @vfnmsac_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x f ; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1111,11 +981,9 @@ define <16 x float> @vfnmsac_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b ; CHECK-NEXT: vfnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1128,10 +996,8 @@ define <16 x float> @vfnmsac_vf_v16f32(<16 x float> %a, float %b, <16 x float> % ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1145,10 +1011,8 @@ define <16 x float> @vfnmsac_vf_v16f32_commute(<16 x float> %a, float %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1162,11 +1026,9 @@ define <16 x float> @vfnmsac_vf_v16f32_unmasked(<16 x float> %a, float %b, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> splat (i1 -1), <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1177,10 +1039,8 @@ define <16 x float> @vfnmsac_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 ; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1194,10 +1054,8 @@ define <16 x float> @vfnmsac_vf_v16f32_ta(<16 x float> %a, float %b, <16 x float ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1211,10 +1069,8 @@ define <16 x float> @vfnmsac_vf_v16f32_commute_ta(<16 x float> %a, float %b, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl) - %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %c, <16 x i1> %allones, i32 %evl) + %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> splat (i1 -1), i32 %evl) + %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl) ret <16 x float> %u } @@ -1231,10 +1087,8 @@ define <2 x double> @vfnmsac_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x dou ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1246,11 +1100,9 @@ define <2 x double> @vfnmsac_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1263,10 +1115,8 @@ define <2 x double> @vfnmsac_vf_v2f64(<2 x double> %a, double %b, <2 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1280,10 +1130,8 @@ define <2 x double> @vfnmsac_vf_v2f64_commute(<2 x double> %a, double %b, <2 x d ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1297,11 +1145,9 @@ define <2 x double> @vfnmsac_vf_v2f64_unmasked(<2 x double> %a, double %b, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> splat (i1 -1), <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1312,10 +1158,8 @@ define <2 x double> @vfnmsac_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x ; CHECK-NEXT: vfnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1329,10 +1173,8 @@ define <2 x double> @vfnmsac_vf_v2f64_ta(<2 x double> %a, double %b, <2 x double ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1346,10 +1188,8 @@ define <2 x double> @vfnmsac_vf_v2f64_commute_ta(<2 x double> %a, double %b, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl) - %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %c, <2 x i1> %allones, i32 %evl) + %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> splat (i1 -1), i32 %evl) + %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl) ret <2 x double> %u } @@ -1366,10 +1206,8 @@ define <4 x double> @vfnmsac_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x dou ; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1381,11 +1219,9 @@ define <4 x double> @vfnmsac_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, ; CHECK-NEXT: vfnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1398,10 +1234,8 @@ define <4 x double> @vfnmsac_vf_v4f64(<4 x double> %a, double %b, <4 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1415,10 +1249,8 @@ define <4 x double> @vfnmsac_vf_v4f64_commute(<4 x double> %a, double %b, <4 x d ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1432,11 +1264,9 @@ define <4 x double> @vfnmsac_vf_v4f64_unmasked(<4 x double> %a, double %b, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> splat (i1 -1), <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1447,10 +1277,8 @@ define <4 x double> @vfnmsac_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x ; CHECK-NEXT: vfnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1464,10 +1292,8 @@ define <4 x double> @vfnmsac_vf_v4f64_ta(<4 x double> %a, double %b, <4 x double ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1481,10 +1307,8 @@ define <4 x double> @vfnmsac_vf_v4f64_commute_ta(<4 x double> %a, double %b, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl) - %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %c, <4 x i1> %allones, i32 %evl) + %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> splat (i1 -1), i32 %evl) + %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl) ret <4 x double> %u } @@ -1501,10 +1325,8 @@ define <8 x double> @vfnmsac_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou ; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1516,11 +1338,9 @@ define <8 x double> @vfnmsac_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, ; CHECK-NEXT: vfnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1533,10 +1353,8 @@ define <8 x double> @vfnmsac_vf_v8f64(<8 x double> %a, double %b, <8 x double> % ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1550,10 +1368,8 @@ define <8 x double> @vfnmsac_vf_v8f64_commute(<8 x double> %a, double %b, <8 x d ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1567,11 +1383,9 @@ define <8 x double> @vfnmsac_vf_v8f64_unmasked(<8 x double> %a, double %b, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> splat (i1 -1), <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1582,10 +1396,8 @@ define <8 x double> @vfnmsac_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x ; CHECK-NEXT: vfnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1599,10 +1411,8 @@ define <8 x double> @vfnmsac_vf_v8f64_ta(<8 x double> %a, double %b, <8 x double ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } @@ -1616,10 +1426,8 @@ define <8 x double> @vfnmsac_vf_v8f64_commute_ta(<8 x double> %a, double %b, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl) - %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %c, <8 x i1> %allones, i32 %evl) + %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> splat (i1 -1), i32 %evl) + %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl) ret <8 x double> %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll index db7de5907769f..bd354b7dae803 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll @@ -26,9 +26,7 @@ define <2 x half> @vfrdiv_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %vb, <2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fdiv.v2f16(<2 x half> %vb, <2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -54,9 +52,7 @@ define <4 x half> @vfrdiv_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %vb, <4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fdiv.v4f16(<4 x half> %vb, <4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -82,9 +78,7 @@ define <8 x half> @vfrdiv_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %vb, <8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fdiv.v8f16(<8 x half> %vb, <8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -110,9 +104,7 @@ define <16 x half> @vfrdiv_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zero ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %vb, <16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fdiv.v16f16(<16 x half> %vb, <16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -138,9 +130,7 @@ define <2 x float> @vfrdiv_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zero ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fdiv.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -166,9 +156,7 @@ define <4 x float> @vfrdiv_vf_v4f32_unmasked(<4 x float> %va, float %b, i32 zero ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %vb, <4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %vb, <4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -194,9 +182,7 @@ define <8 x float> @vfrdiv_vf_v8f32_unmasked(<8 x float> %va, float %b, i32 zero ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %vb, <8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fdiv.v8f32(<8 x float> %vb, <8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -222,9 +208,7 @@ define <16 x float> @vfrdiv_vf_v16f32_unmasked(<16 x float> %va, float %b, i32 z ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %vb, <16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fdiv.v16f32(<16 x float> %vb, <16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -250,9 +234,7 @@ define <2 x double> @vfrdiv_vf_v2f64_unmasked(<2 x double> %va, double %b, i32 z ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %vb, <2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fdiv.v2f64(<2 x double> %vb, <2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -278,9 +260,7 @@ define <4 x double> @vfrdiv_vf_v4f64_unmasked(<4 x double> %va, double %b, i32 z ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %vb, <4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fdiv.v4f64(<4 x double> %vb, <4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -306,9 +286,7 @@ define <8 x double> @vfrdiv_vf_v8f64_unmasked(<8 x double> %va, double %b, i32 z ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %vb, <8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fdiv.v8f64(<8 x double> %vb, <8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -334,8 +312,6 @@ define <16 x double> @vfrdiv_vf_v16f64_unmasked(<16 x double> %va, double %b, i3 ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %vb, <16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fdiv.v16f64(<16 x double> %vb, <16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll index 3391abf8740a7..0903ef8c8ec3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll @@ -26,9 +26,7 @@ define <2 x half> @vfrsub_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %vb, <2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %vb, <2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -54,9 +52,7 @@ define <4 x half> @vfrsub_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %vb, <4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %vb, <4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -82,9 +78,7 @@ define <8 x half> @vfrsub_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %vb, <8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %vb, <8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -110,9 +104,7 @@ define <16 x half> @vfrsub_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zero ; CHECK-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %vb, <16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %vb, <16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -138,9 +130,7 @@ define <2 x float> @vfrsub_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zero ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -166,9 +156,7 @@ define <4 x float> @vfrsub_vf_v4f32_unmasked(<4 x float> %va, float %b, i32 zero ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %vb, <4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %vb, <4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -194,9 +182,7 @@ define <8 x float> @vfrsub_vf_v8f32_unmasked(<8 x float> %va, float %b, i32 zero ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %vb, <8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %vb, <8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -222,9 +208,7 @@ define <16 x float> @vfrsub_vf_v16f32_unmasked(<16 x float> %va, float %b, i32 z ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %vb, <16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %vb, <16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -250,9 +234,7 @@ define <2 x double> @vfrsub_vf_v2f64_unmasked(<2 x double> %va, double %b, i32 z ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %vb, <2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %vb, <2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -278,9 +260,7 @@ define <4 x double> @vfrsub_vf_v4f64_unmasked(<4 x double> %va, double %b, i32 z ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %vb, <4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %vb, <4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -306,9 +286,7 @@ define <8 x double> @vfrsub_vf_v8f64_unmasked(<8 x double> %va, double %b, i32 z ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %vb, <8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %vb, <8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -334,8 +312,6 @@ define <16 x double> @vfrsub_vf_v16f64_unmasked(<16 x double> %va, double %b, i3 ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %vb, <16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %vb, <16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll index 60022644e5ab6..6004eb4fe217a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -46,9 +46,7 @@ define <2 x half> @vfsqrt_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.sqrt.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -90,9 +88,7 @@ define <4 x half> @vfsqrt_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.sqrt.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -134,9 +130,7 @@ define <8 x half> @vfsqrt_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.sqrt.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -178,9 +172,7 @@ define <16 x half> @vfsqrt_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.sqrt.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -202,9 +194,7 @@ define <2 x float> @vfsqrt_vv_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.sqrt.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -226,9 +216,7 @@ define <4 x float> @vfsqrt_vv_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -250,9 +238,7 @@ define <8 x float> @vfsqrt_vv_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.sqrt.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -274,9 +260,7 @@ define <16 x float> @vfsqrt_vv_v16f32_unmasked(<16 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.sqrt.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -298,9 +282,7 @@ define <2 x double> @vfsqrt_vv_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.sqrt.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -322,9 +304,7 @@ define <4 x double> @vfsqrt_vv_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.sqrt.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -346,9 +326,7 @@ define <8 x double> @vfsqrt_vv_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.sqrt.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -370,9 +348,7 @@ define <15 x double> @vfsqrt_vv_v15f64_unmasked(<15 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <15 x i1> poison, i1 true, i32 0 - %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer - %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) + %v = call <15 x double> @llvm.vp.sqrt.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v } @@ -394,9 +370,7 @@ define <16 x double> @vfsqrt_vv_v16f64_unmasked(<16 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.sqrt.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -445,8 +419,6 @@ define <32 x double> @vfsqrt_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v16, v16 ; CHECK-NEXT: ret - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) + %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll index 76ca7d971eb49..eb717a851ed46 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll @@ -48,9 +48,7 @@ define <2 x half> @vfsub_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -106,9 +104,7 @@ define <2 x half> @vfsub_vf_v2f16_unmasked(<2 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <2 x half> poison, half %b, i32 0 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x half> @llvm.vp.fsub.v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v } @@ -175,9 +171,7 @@ define <4 x half> @vfsub_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -233,9 +227,7 @@ define <4 x half> @vfsub_vf_v4f16_unmasked(<4 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <4 x half> poison, half %b, i32 0 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x half> @llvm.vp.fsub.v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v } @@ -279,9 +271,7 @@ define <8 x half> @vfsub_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, i32 ze ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -337,9 +327,7 @@ define <8 x half> @vfsub_vf_v8f16_unmasked(<8 x half> %va, half %b, i32 zeroext ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x half> @llvm.vp.fsub.v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v } @@ -383,9 +371,7 @@ define <16 x half> @vfsub_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, i3 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -441,9 +427,7 @@ define <16 x half> @vfsub_vf_v16f16_unmasked(<16 x half> %va, half %b, i32 zeroe ; ZVFHMIN-NEXT: ret %elt.head = insertelement <16 x half> poison, half %b, i32 0 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x half> @llvm.vp.fsub.v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v } @@ -465,9 +449,7 @@ define <2 x float> @vfsub_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %va, <2 x float> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -491,9 +473,7 @@ define <2 x float> @vfsub_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <2 x float> poison, float %b, i32 0 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x float> @llvm.vp.fsub.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v } @@ -515,9 +495,7 @@ define <4 x float> @vfsub_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -541,9 +519,7 @@ define <4 x float> @vfsub_vf_v4f32_unmasked(<4 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <4 x float> poison, float %b, i32 0 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v } @@ -565,9 +541,7 @@ define <8 x float> @vfsub_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %va, <8 x float> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -591,9 +565,7 @@ define <8 x float> @vfsub_vf_v8f32_unmasked(<8 x float> %va, float %b, i32 zeroe ; CHECK-NEXT: ret %elt.head = insertelement <8 x float> poison, float %b, i32 0 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x float> @llvm.vp.fsub.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v } @@ -615,9 +587,7 @@ define <16 x float> @vfsub_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %va, <16 x float> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -641,9 +611,7 @@ define <16 x float> @vfsub_vf_v16f32_unmasked(<16 x float> %va, float %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <16 x float> poison, float %b, i32 0 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x float> @llvm.vp.fsub.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v } @@ -665,9 +633,7 @@ define <2 x double> @vfsub_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %va, <2 x double> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -691,9 +657,7 @@ define <2 x double> @vfsub_vf_v2f64_unmasked(<2 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <2 x double> poison, double %b, i32 0 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x double> @llvm.vp.fsub.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v } @@ -715,9 +679,7 @@ define <4 x double> @vfsub_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %va, <4 x double> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -741,9 +703,7 @@ define <4 x double> @vfsub_vf_v4f64_unmasked(<4 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <4 x double> poison, double %b, i32 0 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x double> @llvm.vp.fsub.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v } @@ -765,9 +725,7 @@ define <8 x double> @vfsub_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %va, <8 x double> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -791,9 +749,7 @@ define <8 x double> @vfsub_vf_v8f64_unmasked(<8 x double> %va, double %b, i32 ze ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x double> @llvm.vp.fsub.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v } @@ -815,9 +771,7 @@ define <16 x double> @vfsub_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %va, <16 x double> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } @@ -841,8 +795,6 @@ define <16 x double> @vfsub_vf_v16f64_unmasked(<16 x double> %va, double %b, i32 ; CHECK-NEXT: ret %elt.head = insertelement <16 x double> poison, double %b, i32 0 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x double> @llvm.vp.fsub.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll index b19c30df55114..3dec7daf66ac9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vitofp-constrained-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 declare <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i1(<1 x i1>, metadata, metadata) define <1 x half> @vsitofp_v1i1_v1f16(<1 x i1> %va) strictfp { @@ -410,6 +410,33 @@ define <1 x half> @vsitofp_v1i8_v1f16(<1 x i8> %va) strictfp { declare <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7>, metadata, metadata) define <1 x half> @vsitofp_v1i7_v1f16(<1 x i7> %va) strictfp { +; RV32-LABEL: vsitofp_v1i7_v1f16: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: slli a0, a0, 25 +; RV32-NEXT: srai a0, a0, 25 +; RV32-NEXT: fcvt.h.w fa5, a0 +; RV32-NEXT: fsh fa5, 14(sp) +; RV32-NEXT: addi a0, sp, 14 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vsitofp_v1i7_v1f16: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: slli a0, a0, 57 +; RV64-NEXT: srai a0, a0, 57 +; RV64-NEXT: fcvt.h.w fa5, a0 +; RV64-NEXT: fsh fa5, 14(sp) +; RV64-NEXT: addi a0, sp, 14 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret %evec = call <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <1 x half> %evec } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll index 792c6231aa4a5..2181fd8498f5a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmacc-vp.ll @@ -16,10 +16,8 @@ define <2 x i8> @vmacc_vv_nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i1 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -31,11 +29,9 @@ define <2 x i8> @vmacc_vv_nxv2i8_unmasked(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> splat (i1 -1), <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -48,10 +44,8 @@ define <2 x i8> @vmacc_vx_nxv2i8(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -65,11 +59,9 @@ define <2 x i8> @vmacc_vx_nxv2i8_unmasked(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> splat (i1 -1), <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -80,10 +72,8 @@ define <2 x i8> @vmacc_vv_nxv2i8_ta(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -97,10 +87,8 @@ define <2 x i8> @vmacc_vx_nxv2i8_ta(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> % ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.add.nxv2i8(<2 x i8> %x, <2 x i8> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -117,10 +105,8 @@ define <4 x i8> @vmacc_vv_nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i1 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -132,11 +118,9 @@ define <4 x i8> @vmacc_vv_nxv4i8_unmasked(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> splat (i1 -1), <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -149,10 +133,8 @@ define <4 x i8> @vmacc_vx_nxv4i8(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -166,11 +148,9 @@ define <4 x i8> @vmacc_vx_nxv4i8_unmasked(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> splat (i1 -1), <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -181,10 +161,8 @@ define <4 x i8> @vmacc_vv_nxv4i8_ta(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -198,10 +176,8 @@ define <4 x i8> @vmacc_vx_nxv4i8_ta(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> % ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.add.nxv4i8(<4 x i8> %x, <4 x i8> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -218,10 +194,8 @@ define <8 x i8> @vmacc_vv_nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i1 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -233,11 +207,9 @@ define <8 x i8> @vmacc_vv_nxv8i8_unmasked(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> splat (i1 -1), <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -250,10 +222,8 @@ define <8 x i8> @vmacc_vx_nxv8i8(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -267,11 +237,9 @@ define <8 x i8> @vmacc_vx_nxv8i8_unmasked(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> splat (i1 -1), <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -282,10 +250,8 @@ define <8 x i8> @vmacc_vv_nxv8i8_ta(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -299,10 +265,8 @@ define <8 x i8> @vmacc_vx_nxv8i8_ta(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> % ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.add.nxv8i8(<8 x i8> %x, <8 x i8> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -319,10 +283,8 @@ define <16 x i8> @vmacc_vv_nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <1 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -334,11 +296,9 @@ define <16 x i8> @vmacc_vv_nxv16i8_unmasked(<16 x i8> %a, <16 x i8> %b, <16 x i8 ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> splat (i1 -1), <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -351,10 +311,8 @@ define <16 x i8> @vmacc_vx_nxv16i8(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -368,11 +326,9 @@ define <16 x i8> @vmacc_vx_nxv16i8_unmasked(<16 x i8> %a, i8 %b, <16 x i8> %c, ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> splat (i1 -1), <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -383,10 +339,8 @@ define <16 x i8> @vmacc_vv_nxv16i8_ta(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -400,10 +354,8 @@ define <16 x i8> @vmacc_vx_nxv16i8_ta(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.add.nxv16i8(<16 x i8> %x, <16 x i8> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -420,10 +372,8 @@ define <32 x i8> @vmacc_vv_nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <3 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -435,11 +385,9 @@ define <32 x i8> @vmacc_vv_nxv32i8_unmasked(<32 x i8> %a, <32 x i8> %b, <32 x i8 ; CHECK-NEXT: vmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) - %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> splat (i1 -1), <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -452,10 +400,8 @@ define <32 x i8> @vmacc_vx_nxv32i8(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -469,11 +415,9 @@ define <32 x i8> @vmacc_vx_nxv32i8_unmasked(<32 x i8> %a, i8 %b, <32 x i8> %c, ; CHECK-NEXT: ret %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) - %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> splat (i1 -1), <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -484,10 +428,8 @@ define <32 x i8> @vmacc_vv_nxv32i8_ta(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -501,10 +443,8 @@ define <32 x i8> @vmacc_vx_nxv32i8_ta(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x ; CHECK-NEXT: ret %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> %allones, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.add.nxv32i8(<32 x i8> %x, <32 x i8> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -521,10 +461,8 @@ define <64 x i8> @vmacc_vv_nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <6 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl) %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -536,11 +474,9 @@ define <64 x i8> @vmacc_vv_nxv64i8_unmasked(<64 x i8> %a, <64 x i8> %b, <64 x i8 ; CHECK-NEXT: vmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) - %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> splat (i1 -1), <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -553,10 +489,8 @@ define <64 x i8> @vmacc_vx_nxv64i8(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl) %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -570,11 +504,9 @@ define <64 x i8> @vmacc_vx_nxv64i8_unmasked(<64 x i8> %a, i8 %b, <64 x i8> %c, ; CHECK-NEXT: ret %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) - %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> splat (i1 -1), <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -585,10 +517,8 @@ define <64 x i8> @vmacc_vv_nxv64i8_ta(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl) %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -602,10 +532,8 @@ define <64 x i8> @vmacc_vx_nxv64i8_ta(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x ; CHECK-NEXT: ret %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> %allones, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.add.nxv64i8(<64 x i8> %x, <64 x i8> %c, <64 x i1> splat (i1 -1), i32 %evl) %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -622,10 +550,8 @@ define <2 x i16> @vmacc_vv_nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -637,11 +563,9 @@ define <2 x i16> @vmacc_vv_nxv2i16_unmasked(<2 x i16> %a, <2 x i16> %b, <2 x i16 ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> splat (i1 -1), <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -654,10 +578,8 @@ define <2 x i16> @vmacc_vx_nxv2i16(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -671,11 +593,9 @@ define <2 x i16> @vmacc_vx_nxv2i16_unmasked(<2 x i16> %a, i16 %b, <2 x i16> %c, ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> splat (i1 -1), <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -686,10 +606,8 @@ define <2 x i16> @vmacc_vv_nxv2i16_ta(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -703,10 +621,8 @@ define <2 x i16> @vmacc_vx_nxv2i16_ta(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.add.nxv2i16(<2 x i16> %x, <2 x i16> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -723,10 +639,8 @@ define <4 x i16> @vmacc_vv_nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -738,11 +652,9 @@ define <4 x i16> @vmacc_vv_nxv4i16_unmasked(<4 x i16> %a, <4 x i16> %b, <4 x i16 ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> splat (i1 -1), <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -755,10 +667,8 @@ define <4 x i16> @vmacc_vx_nxv4i16(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -772,11 +682,9 @@ define <4 x i16> @vmacc_vx_nxv4i16_unmasked(<4 x i16> %a, i16 %b, <4 x i16> %c, ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> splat (i1 -1), <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -787,10 +695,8 @@ define <4 x i16> @vmacc_vv_nxv4i16_ta(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -804,10 +710,8 @@ define <4 x i16> @vmacc_vx_nxv4i16_ta(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.add.nxv4i16(<4 x i16> %x, <4 x i16> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -824,10 +728,8 @@ define <8 x i16> @vmacc_vv_nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -839,11 +741,9 @@ define <8 x i16> @vmacc_vv_nxv8i16_unmasked(<8 x i16> %a, <8 x i16> %b, <8 x i16 ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> splat (i1 -1), <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -856,10 +756,8 @@ define <8 x i16> @vmacc_vx_nxv8i16(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -873,11 +771,9 @@ define <8 x i16> @vmacc_vx_nxv8i16_unmasked(<8 x i16> %a, i16 %b, <8 x i16> %c, ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> splat (i1 -1), <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -888,10 +784,8 @@ define <8 x i16> @vmacc_vv_nxv8i16_ta(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -905,10 +799,8 @@ define <8 x i16> @vmacc_vx_nxv8i16_ta(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.add.nxv8i16(<8 x i16> %x, <8 x i16> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -925,10 +817,8 @@ define <16 x i16> @vmacc_vv_nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %c ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -940,11 +830,9 @@ define <16 x i16> @vmacc_vv_nxv16i16_unmasked(<16 x i16> %a, <16 x i16> %b, <16 ; CHECK-NEXT: vmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> splat (i1 -1), <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -957,10 +845,8 @@ define <16 x i16> @vmacc_vx_nxv16i16(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -974,11 +860,9 @@ define <16 x i16> @vmacc_vx_nxv16i16_unmasked(<16 x i16> %a, i16 %b, <16 x i16> ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> splat (i1 -1), <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -989,10 +873,8 @@ define <16 x i16> @vmacc_vv_nxv16i16_ta(<16 x i16> %a, <16 x i16> %b, <16 x i16> ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -1006,10 +888,8 @@ define <16 x i16> @vmacc_vx_nxv16i16_ta(<16 x i16> %a, i16 %b, <16 x i16> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.add.nxv16i16(<16 x i16> %x, <16 x i16> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -1026,10 +906,8 @@ define <32 x i16> @vmacc_vv_nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> %c ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1041,11 +919,9 @@ define <32 x i16> @vmacc_vv_nxv32i16_unmasked(<32 x i16> %a, <32 x i16> %b, <32 ; CHECK-NEXT: vmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) - %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> splat (i1 -1), <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1058,10 +934,8 @@ define <32 x i16> @vmacc_vx_nxv32i16(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 ; CHECK-NEXT: ret %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1075,11 +949,9 @@ define <32 x i16> @vmacc_vx_nxv32i16_unmasked(<32 x i16> %a, i16 %b, <32 x i16> ; CHECK-NEXT: ret %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) - %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> splat (i1 -1), <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1090,10 +962,8 @@ define <32 x i16> @vmacc_vv_nxv32i16_ta(<32 x i16> %a, <32 x i16> %b, <32 x i16> ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1107,10 +977,8 @@ define <32 x i16> @vmacc_vx_nxv32i16_ta(<32 x i16> %a, i16 %b, <32 x i16> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> %allones, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.add.nxv32i16(<32 x i16> %x, <32 x i16> %c, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1127,10 +995,8 @@ define <2 x i32> @vmacc_vv_nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1142,11 +1008,9 @@ define <2 x i32> @vmacc_vv_nxv2i32_unmasked(<2 x i32> %a, <2 x i32> %b, <2 x i32 ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> splat (i1 -1), <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1159,10 +1023,8 @@ define <2 x i32> @vmacc_vx_nxv2i32(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1176,11 +1038,9 @@ define <2 x i32> @vmacc_vx_nxv2i32_unmasked(<2 x i32> %a, i32 %b, <2 x i32> %c, ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> splat (i1 -1), <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1191,10 +1051,8 @@ define <2 x i32> @vmacc_vv_nxv2i32_ta(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1208,10 +1066,8 @@ define <2 x i32> @vmacc_vx_nxv2i32_ta(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.add.nxv2i32(<2 x i32> %x, <2 x i32> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1228,10 +1084,8 @@ define <4 x i32> @vmacc_vv_nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1243,11 +1097,9 @@ define <4 x i32> @vmacc_vv_nxv4i32_unmasked(<4 x i32> %a, <4 x i32> %b, <4 x i32 ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> splat (i1 -1), <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1260,10 +1112,8 @@ define <4 x i32> @vmacc_vx_nxv4i32(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1277,11 +1127,9 @@ define <4 x i32> @vmacc_vx_nxv4i32_unmasked(<4 x i32> %a, i32 %b, <4 x i32> %c, ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> splat (i1 -1), <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1292,10 +1140,8 @@ define <4 x i32> @vmacc_vv_nxv4i32_ta(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1309,10 +1155,8 @@ define <4 x i32> @vmacc_vx_nxv4i32_ta(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.add.nxv4i32(<4 x i32> %x, <4 x i32> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1329,10 +1173,8 @@ define <8 x i32> @vmacc_vv_nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1344,11 +1186,9 @@ define <8 x i32> @vmacc_vv_nxv8i32_unmasked(<8 x i32> %a, <8 x i32> %b, <8 x i32 ; CHECK-NEXT: vmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> splat (i1 -1), <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1361,10 +1201,8 @@ define <8 x i32> @vmacc_vx_nxv8i32(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1378,11 +1216,9 @@ define <8 x i32> @vmacc_vx_nxv8i32_unmasked(<8 x i32> %a, i32 %b, <8 x i32> %c, ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> splat (i1 -1), <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1393,10 +1229,8 @@ define <8 x i32> @vmacc_vv_nxv8i32_ta(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1410,10 +1244,8 @@ define <8 x i32> @vmacc_vx_nxv8i32_ta(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.add.nxv8i32(<8 x i32> %x, <8 x i32> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1430,10 +1262,8 @@ define <16 x i32> @vmacc_vv_nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> %c ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1445,11 +1275,9 @@ define <16 x i32> @vmacc_vv_nxv16i32_unmasked(<16 x i32> %a, <16 x i32> %b, <16 ; CHECK-NEXT: vmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> splat (i1 -1), <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1462,10 +1290,8 @@ define <16 x i32> @vmacc_vx_nxv16i32(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1479,11 +1305,9 @@ define <16 x i32> @vmacc_vx_nxv16i32_unmasked(<16 x i32> %a, i32 %b, <16 x i32> ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) - %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> splat (i1 -1), <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1494,10 +1318,8 @@ define <16 x i32> @vmacc_vv_nxv16i32_ta(<16 x i32> %a, <16 x i32> %b, <16 x i32> ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1511,10 +1333,8 @@ define <16 x i32> @vmacc_vx_nxv16i32_ta(<16 x i32> %a, i32 %b, <16 x i32> %c, < ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> %allones, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.add.nxv16i32(<16 x i32> %x, <16 x i32> %c, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1531,10 +1351,8 @@ define <2 x i64> @vmacc_vv_nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1546,11 +1364,9 @@ define <2 x i64> @vmacc_vv_nxv2i64_unmasked(<2 x i64> %a, <2 x i64> %b, <2 x i64 ; CHECK-NEXT: vmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> splat (i1 -1), <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1578,10 +1394,8 @@ define <2 x i64> @vmacc_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1> ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1610,11 +1424,9 @@ define <2 x i64> @vmacc_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c, ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) - %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> splat (i1 -1), <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1625,10 +1437,8 @@ define <2 x i64> @vmacc_vv_nxv2i64_ta(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, ; CHECK-NEXT: vmacc.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1657,10 +1467,8 @@ define <2 x i64> @vmacc_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> %allones, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.add.nxv2i64(<2 x i64> %x, <2 x i64> %c, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1677,10 +1485,8 @@ define <4 x i64> @vmacc_vv_nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1692,11 +1498,9 @@ define <4 x i64> @vmacc_vv_nxv4i64_unmasked(<4 x i64> %a, <4 x i64> %b, <4 x i64 ; CHECK-NEXT: vmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> splat (i1 -1), <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1724,10 +1528,8 @@ define <4 x i64> @vmacc_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1> ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1756,11 +1558,9 @@ define <4 x i64> @vmacc_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c, ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) - %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> splat (i1 -1), <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1771,10 +1571,8 @@ define <4 x i64> @vmacc_vv_nxv4i64_ta(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, ; CHECK-NEXT: vmacc.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1803,10 +1601,8 @@ define <4 x i64> @vmacc_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> %allones, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.add.nxv4i64(<4 x i64> %x, <4 x i64> %c, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1823,10 +1619,8 @@ define <8 x i64> @vmacc_vv_nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1838,11 +1632,9 @@ define <8 x i64> @vmacc_vv_nxv8i64_unmasked(<8 x i64> %a, <8 x i64> %b, <8 x i64 ; CHECK-NEXT: vmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> splat (i1 -1), <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1870,10 +1662,8 @@ define <8 x i64> @vmacc_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1> ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1902,11 +1692,9 @@ define <8 x i64> @vmacc_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c, ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) - %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> splat (i1 -1), <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1917,10 +1705,8 @@ define <8 x i64> @vmacc_vv_nxv8i64_ta(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, ; CHECK-NEXT: vmacc.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1949,10 +1735,8 @@ define <8 x i64> @vmacc_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> %allones, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.add.nxv8i64(<8 x i64> %x, <8 x i64> %c, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 6af5ba185b8b8..3db44e87109bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -39,9 +39,7 @@ define <2 x i8> @vmax_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -65,9 +63,7 @@ define <2 x i8> @vmax_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.smax.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -89,9 +85,7 @@ define <4 x i8> @vmax_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -127,9 +121,7 @@ define <4 x i8> @vmax_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.smax.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -151,9 +143,7 @@ define <5 x i8> @vmax_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.smax.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.smax.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -177,9 +167,7 @@ define <5 x i8> @vmax_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.smax.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.smax.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -201,9 +189,7 @@ define <8 x i8> @vmax_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -227,9 +213,7 @@ define <8 x i8> @vmax_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.smax.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -251,9 +235,7 @@ define <16 x i8> @vmax_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -277,9 +259,7 @@ define <16 x i8> @vmax_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.smax.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -332,9 +312,7 @@ define <256 x i8> @vmax_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.smax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.smax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -390,9 +368,7 @@ define <2 x i16> @vmax_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -416,9 +392,7 @@ define <2 x i16> @vmax_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.smax.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -440,9 +414,7 @@ define <4 x i16> @vmax_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -466,9 +438,7 @@ define <4 x i16> @vmax_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.smax.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -490,9 +460,7 @@ define <8 x i16> @vmax_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -516,9 +484,7 @@ define <8 x i16> @vmax_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.smax.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -540,9 +506,7 @@ define <16 x i16> @vmax_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -566,9 +530,7 @@ define <16 x i16> @vmax_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.smax.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -590,9 +552,7 @@ define <2 x i32> @vmax_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -616,9 +576,7 @@ define <2 x i32> @vmax_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.smax.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -640,9 +598,7 @@ define <4 x i32> @vmax_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -666,9 +622,7 @@ define <4 x i32> @vmax_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -690,9 +644,7 @@ define <8 x i32> @vmax_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -716,9 +668,7 @@ define <8 x i32> @vmax_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.smax.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -740,9 +690,7 @@ define <16 x i32> @vmax_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -766,9 +714,7 @@ define <16 x i32> @vmax_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.smax.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -790,9 +736,7 @@ define <2 x i64> @vmax_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -844,9 +788,7 @@ define <2 x i64> @vmax_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.smax.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -868,9 +810,7 @@ define <4 x i64> @vmax_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -922,9 +862,7 @@ define <4 x i64> @vmax_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.smax.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -946,9 +884,7 @@ define <8 x i64> @vmax_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1000,9 +936,7 @@ define <8 x i64> @vmax_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.smax.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1024,9 +958,7 @@ define <16 x i64> @vmax_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1078,9 +1010,7 @@ define <16 x i64> @vmax_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.smax.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1092,16 +1022,16 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-LABEL: vmax_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmax.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1134,8 +1064,6 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmax.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.smax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index 12c6410068c69..c97c2232715f5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -38,9 +38,7 @@ define <2 x i8> @vmaxu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -64,9 +62,7 @@ define <2 x i8> @vmaxu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.umax.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -88,9 +84,7 @@ define <4 x i8> @vmaxu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -126,9 +120,7 @@ define <4 x i8> @vmaxu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.umax.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -150,9 +142,7 @@ define <5 x i8> @vmaxu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.umax.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.umax.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -176,9 +166,7 @@ define <5 x i8> @vmaxu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.umax.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.umax.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -200,9 +188,7 @@ define <8 x i8> @vmaxu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -226,9 +212,7 @@ define <8 x i8> @vmaxu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.umax.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -250,9 +234,7 @@ define <16 x i8> @vmaxu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -276,9 +258,7 @@ define <16 x i8> @vmaxu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.umax.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -331,9 +311,7 @@ define <256 x i8> @vmaxu_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.umax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.umax.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -389,9 +367,7 @@ define <2 x i16> @vmaxu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -415,9 +391,7 @@ define <2 x i16> @vmaxu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.umax.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -439,9 +413,7 @@ define <4 x i16> @vmaxu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -465,9 +437,7 @@ define <4 x i16> @vmaxu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.umax.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -489,9 +459,7 @@ define <8 x i16> @vmaxu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -515,9 +483,7 @@ define <8 x i16> @vmaxu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.umax.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -539,9 +505,7 @@ define <16 x i16> @vmaxu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -565,9 +529,7 @@ define <16 x i16> @vmaxu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.umax.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -589,9 +551,7 @@ define <2 x i32> @vmaxu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -615,9 +575,7 @@ define <2 x i32> @vmaxu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.umax.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -639,9 +597,7 @@ define <4 x i32> @vmaxu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -665,9 +621,7 @@ define <4 x i32> @vmaxu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -689,9 +643,7 @@ define <8 x i32> @vmaxu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -715,9 +667,7 @@ define <8 x i32> @vmaxu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.umax.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -739,9 +689,7 @@ define <16 x i32> @vmaxu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -765,9 +713,7 @@ define <16 x i32> @vmaxu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.umax.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -789,9 +735,7 @@ define <2 x i64> @vmaxu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -843,9 +787,7 @@ define <2 x i64> @vmaxu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.umax.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -867,9 +809,7 @@ define <4 x i64> @vmaxu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -921,9 +861,7 @@ define <4 x i64> @vmaxu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.umax.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -945,9 +883,7 @@ define <8 x i64> @vmaxu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -999,9 +935,7 @@ define <8 x i64> @vmaxu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.umax.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1023,9 +957,7 @@ define <16 x i64> @vmaxu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1077,9 +1009,7 @@ define <16 x i64> @vmaxu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.umax.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1091,16 +1021,16 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-LABEL: vmaxu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1133,8 +1063,6 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmaxu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.umax.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index f5b9421d28c34..eaa19110a2a28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -39,9 +39,7 @@ define <2 x i8> @vmin_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -65,9 +63,7 @@ define <2 x i8> @vmin_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.smin.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -89,9 +85,7 @@ define <4 x i8> @vmin_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -127,9 +121,7 @@ define <4 x i8> @vmin_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.smin.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -151,9 +143,7 @@ define <5 x i8> @vmin_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.smin.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.smin.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -177,9 +167,7 @@ define <5 x i8> @vmin_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.smin.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.smin.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -201,9 +189,7 @@ define <8 x i8> @vmin_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -227,9 +213,7 @@ define <8 x i8> @vmin_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.smin.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -251,9 +235,7 @@ define <16 x i8> @vmin_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -277,9 +259,7 @@ define <16 x i8> @vmin_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.smin.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -332,9 +312,7 @@ define <256 x i8> @vmin_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.smin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.smin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -390,9 +368,7 @@ define <2 x i16> @vmin_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -416,9 +392,7 @@ define <2 x i16> @vmin_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.smin.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -440,9 +414,7 @@ define <4 x i16> @vmin_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -466,9 +438,7 @@ define <4 x i16> @vmin_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.smin.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -490,9 +460,7 @@ define <8 x i16> @vmin_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -516,9 +484,7 @@ define <8 x i16> @vmin_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.smin.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -540,9 +506,7 @@ define <16 x i16> @vmin_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -566,9 +530,7 @@ define <16 x i16> @vmin_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.smin.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -590,9 +552,7 @@ define <2 x i32> @vmin_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -616,9 +576,7 @@ define <2 x i32> @vmin_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.smin.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -640,9 +598,7 @@ define <4 x i32> @vmin_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -666,9 +622,7 @@ define <4 x i32> @vmin_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -690,9 +644,7 @@ define <8 x i32> @vmin_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -716,9 +668,7 @@ define <8 x i32> @vmin_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.smin.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -740,9 +690,7 @@ define <16 x i32> @vmin_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -766,9 +714,7 @@ define <16 x i32> @vmin_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.smin.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -790,9 +736,7 @@ define <2 x i64> @vmin_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -844,9 +788,7 @@ define <2 x i64> @vmin_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.smin.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -868,9 +810,7 @@ define <4 x i64> @vmin_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -922,9 +862,7 @@ define <4 x i64> @vmin_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.smin.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -946,9 +884,7 @@ define <8 x i64> @vmin_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1000,9 +936,7 @@ define <8 x i64> @vmin_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.smin.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1024,9 +958,7 @@ define <16 x i64> @vmin_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1078,9 +1010,7 @@ define <16 x i64> @vmin_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.smin.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1092,16 +1022,16 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-LABEL: vmin_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmin.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1134,8 +1064,6 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmin.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.smin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index d07580efceb50..48175e5b905ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -38,9 +38,7 @@ define <2 x i8> @vminu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -64,9 +62,7 @@ define <2 x i8> @vminu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.umin.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -88,9 +84,7 @@ define <4 x i8> @vminu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -126,9 +120,7 @@ define <4 x i8> @vminu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.umin.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -150,9 +142,7 @@ define <5 x i8> @vminu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.umin.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.umin.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -176,9 +166,7 @@ define <5 x i8> @vminu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.umin.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.umin.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -200,9 +188,7 @@ define <8 x i8> @vminu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -226,9 +212,7 @@ define <8 x i8> @vminu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.umin.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -250,9 +234,7 @@ define <16 x i8> @vminu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -276,9 +258,7 @@ define <16 x i8> @vminu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.umin.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -331,9 +311,7 @@ define <256 x i8> @vminu_vx_v258i8_unmasked(<256 x i8> %va, i8 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.umin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.umin.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -389,9 +367,7 @@ define <2 x i16> @vminu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -415,9 +391,7 @@ define <2 x i16> @vminu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.umin.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -439,9 +413,7 @@ define <4 x i16> @vminu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -465,9 +437,7 @@ define <4 x i16> @vminu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.umin.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -489,9 +459,7 @@ define <8 x i16> @vminu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -515,9 +483,7 @@ define <8 x i16> @vminu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.umin.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -539,9 +505,7 @@ define <16 x i16> @vminu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -565,9 +529,7 @@ define <16 x i16> @vminu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.umin.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -589,9 +551,7 @@ define <2 x i32> @vminu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -615,9 +575,7 @@ define <2 x i32> @vminu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.umin.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -639,9 +597,7 @@ define <4 x i32> @vminu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -665,9 +621,7 @@ define <4 x i32> @vminu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -689,9 +643,7 @@ define <8 x i32> @vminu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -715,9 +667,7 @@ define <8 x i32> @vminu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.umin.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -739,9 +689,7 @@ define <16 x i32> @vminu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -765,9 +713,7 @@ define <16 x i32> @vminu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.umin.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -789,9 +735,7 @@ define <2 x i64> @vminu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -843,9 +787,7 @@ define <2 x i64> @vminu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.umin.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -867,9 +809,7 @@ define <4 x i64> @vminu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -921,9 +861,7 @@ define <4 x i64> @vminu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.umin.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -945,9 +883,7 @@ define <8 x i64> @vminu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -999,9 +935,7 @@ define <8 x i64> @vminu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.umin.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1023,9 +957,7 @@ define <16 x i64> @vminu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1077,9 +1009,7 @@ define <16 x i64> @vminu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.umin.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1091,16 +1021,16 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-LABEL: vminu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB74_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vminu.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1133,8 +1063,6 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vminu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.umin.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll index fbb97d6bf3229..2525ac03ea9a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll @@ -34,9 +34,7 @@ define <2 x i8> @vmul_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.mul.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.mul.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -60,9 +58,7 @@ define <2 x i8> @vmul_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.mul.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.mul.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -84,9 +80,7 @@ define <4 x i8> @vmul_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.mul.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.mul.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -110,9 +104,7 @@ define <4 x i8> @vmul_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.mul.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.mul.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -134,9 +126,7 @@ define <8 x i8> @vmul_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.mul.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.mul.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -160,9 +150,7 @@ define <8 x i8> @vmul_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.mul.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.mul.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -184,9 +172,7 @@ define <16 x i8> @vmul_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.mul.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.mul.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -210,9 +196,7 @@ define <16 x i8> @vmul_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.mul.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.mul.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -234,9 +218,7 @@ define <2 x i16> @vmul_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.mul.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.mul.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -260,9 +242,7 @@ define <2 x i16> @vmul_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.mul.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.mul.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -284,9 +264,7 @@ define <4 x i16> @vmul_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.mul.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.mul.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -310,9 +288,7 @@ define <4 x i16> @vmul_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.mul.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.mul.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -334,9 +310,7 @@ define <8 x i16> @vmul_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.mul.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.mul.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -372,9 +346,7 @@ define <8 x i16> @vmul_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.mul.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.mul.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -396,9 +368,7 @@ define <12 x i16> @vmul_vv_v12i16_unmasked(<12 x i16> %va, <12 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <12 x i1> poison, i1 true, i32 0 - %m = shufflevector <12 x i1> %head, <12 x i1> poison, <12 x i32> zeroinitializer - %v = call <12 x i16> @llvm.vp.mul.v12i16(<12 x i16> %va, <12 x i16> %b, <12 x i1> %m, i32 %evl) + %v = call <12 x i16> @llvm.vp.mul.v12i16(<12 x i16> %va, <12 x i16> %b, <12 x i1> splat (i1 true), i32 %evl) ret <12 x i16> %v } @@ -422,9 +392,7 @@ define <12 x i16> @vmul_vx_v12i16_unmasked(<12 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <12 x i16> poison, i16 %b, i32 0 %vb = shufflevector <12 x i16> %elt.head, <12 x i16> poison, <12 x i32> zeroinitializer - %head = insertelement <12 x i1> poison, i1 true, i32 0 - %m = shufflevector <12 x i1> %head, <12 x i1> poison, <12 x i32> zeroinitializer - %v = call <12 x i16> @llvm.vp.mul.v12i16(<12 x i16> %va, <12 x i16> %vb, <12 x i1> %m, i32 %evl) + %v = call <12 x i16> @llvm.vp.mul.v12i16(<12 x i16> %va, <12 x i16> %vb, <12 x i1> splat (i1 true), i32 %evl) ret <12 x i16> %v } @@ -446,9 +414,7 @@ define <16 x i16> @vmul_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.mul.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.mul.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -472,9 +438,7 @@ define <16 x i16> @vmul_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.mul.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.mul.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -496,9 +460,7 @@ define <2 x i32> @vmul_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.mul.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.mul.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -522,9 +484,7 @@ define <2 x i32> @vmul_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.mul.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.mul.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -546,9 +506,7 @@ define <4 x i32> @vmul_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -572,9 +530,7 @@ define <4 x i32> @vmul_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -596,9 +552,7 @@ define <8 x i32> @vmul_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -622,9 +576,7 @@ define <8 x i32> @vmul_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.mul.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -646,9 +598,7 @@ define <16 x i32> @vmul_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.mul.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.mul.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -672,9 +622,7 @@ define <16 x i32> @vmul_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.mul.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.mul.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -696,9 +644,7 @@ define <2 x i64> @vmul_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.mul.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.mul.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -750,9 +696,7 @@ define <2 x i64> @vmul_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.mul.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.mul.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -774,9 +718,7 @@ define <4 x i64> @vmul_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -828,9 +770,7 @@ define <4 x i64> @vmul_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -852,9 +792,7 @@ define <8 x i64> @vmul_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -906,9 +844,7 @@ define <8 x i64> @vmul_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.mul.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -930,9 +866,7 @@ define <16 x i64> @vmul_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -984,8 +918,6 @@ define <16 x i64> @vmul_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.mul.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll index 744ed991bccc2..695fba6d54e03 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnmsac-vp.ll @@ -16,10 +16,8 @@ define <2 x i8> @vnmsac_vv_nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -31,11 +29,9 @@ define <2 x i8> @vnmsac_vv_nxv2i8_unmasked(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) - %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> splat (i1 -1), <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -48,10 +44,8 @@ define <2 x i8> @vnmsac_vx_nxv2i8(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> %m, ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -65,11 +59,9 @@ define <2 x i8> @vnmsac_vx_nxv2i8_unmasked(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) - %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> %allones, <2 x i8> %y, <2 x i8> %c, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i8> @llvm.vp.merge.nxv2i8(<2 x i1> splat (i1 -1), <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -80,10 +72,8 @@ define <2 x i8> @vnmsac_vv_nxv2i8_ta(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -97,10 +87,8 @@ define <2 x i8> @vnmsac_vx_nxv2i8_ta(<2 x i8> %a, i8 %b, <2 x i8> %c, <2 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i8> @llvm.vp.mul.nxv2i8(<2 x i8> %a, <2 x i8> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i8> @llvm.vp.sub.nxv2i8(<2 x i8> %c, <2 x i8> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i8> @llvm.vp.select.nxv2i8(<2 x i1> %m, <2 x i8> %y, <2 x i8> %c, i32 %evl) ret <2 x i8> %u } @@ -117,10 +105,8 @@ define <4 x i8> @vnmsac_vv_nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -132,11 +118,9 @@ define <4 x i8> @vnmsac_vv_nxv4i8_unmasked(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) - %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> splat (i1 -1), <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -149,10 +133,8 @@ define <4 x i8> @vnmsac_vx_nxv4i8(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> %m, ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -166,11 +148,9 @@ define <4 x i8> @vnmsac_vx_nxv4i8_unmasked(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) - %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> %allones, <4 x i8> %y, <4 x i8> %c, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i8> @llvm.vp.merge.nxv4i8(<4 x i1> splat (i1 -1), <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -181,10 +161,8 @@ define <4 x i8> @vnmsac_vv_nxv4i8_ta(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -198,10 +176,8 @@ define <4 x i8> @vnmsac_vx_nxv4i8_ta(<4 x i8> %a, i8 %b, <4 x i8> %c, <4 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i8> @llvm.vp.mul.nxv4i8(<4 x i8> %a, <4 x i8> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i8> @llvm.vp.sub.nxv4i8(<4 x i8> %c, <4 x i8> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i8> @llvm.vp.select.nxv4i8(<4 x i1> %m, <4 x i8> %y, <4 x i8> %c, i32 %evl) ret <4 x i8> %u } @@ -218,10 +194,8 @@ define <8 x i8> @vnmsac_vv_nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -233,11 +207,9 @@ define <8 x i8> @vnmsac_vv_nxv8i8_unmasked(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) - %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> splat (i1 -1), <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -250,10 +222,8 @@ define <8 x i8> @vnmsac_vx_nxv8i8(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> %m, ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -267,11 +237,9 @@ define <8 x i8> @vnmsac_vx_nxv8i8_unmasked(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) - %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> %allones, <8 x i8> %y, <8 x i8> %c, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i8> @llvm.vp.merge.nxv8i8(<8 x i1> splat (i1 -1), <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -282,10 +250,8 @@ define <8 x i8> @vnmsac_vv_nxv8i8_ta(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -299,10 +265,8 @@ define <8 x i8> @vnmsac_vx_nxv8i8_ta(<8 x i8> %a, i8 %b, <8 x i8> %c, <8 x i1> ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i8> @llvm.vp.mul.nxv8i8(<8 x i8> %a, <8 x i8> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i8> @llvm.vp.sub.nxv8i8(<8 x i8> %c, <8 x i8> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i8> @llvm.vp.select.nxv8i8(<8 x i1> %m, <8 x i8> %y, <8 x i8> %c, i32 %evl) ret <8 x i8> %u } @@ -319,10 +283,8 @@ define <16 x i8> @vnmsac_vv_nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, < ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -334,11 +296,9 @@ define <16 x i8> @vnmsac_vv_nxv16i8_unmasked(<16 x i8> %a, <16 x i8> %b, <16 x i ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) - %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> splat (i1 -1), <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -351,10 +311,8 @@ define <16 x i8> @vnmsac_vx_nxv16i8(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -368,11 +326,9 @@ define <16 x i8> @vnmsac_vx_nxv16i8_unmasked(<16 x i8> %a, i8 %b, <16 x i8> %c, ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) - %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> %allones, <16 x i8> %y, <16 x i8> %c, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i8> @llvm.vp.merge.nxv16i8(<16 x i1> splat (i1 -1), <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -383,10 +339,8 @@ define <16 x i8> @vnmsac_vv_nxv16i8_ta(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -400,10 +354,8 @@ define <16 x i8> @vnmsac_vx_nxv16i8_ta(<16 x i8> %a, i8 %b, <16 x i8> %c, <16 x ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i8> @llvm.vp.mul.nxv16i8(<16 x i8> %a, <16 x i8> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i8> @llvm.vp.sub.nxv16i8(<16 x i8> %c, <16 x i8> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i8> @llvm.vp.select.nxv16i8(<16 x i1> %m, <16 x i8> %y, <16 x i8> %c, i32 %evl) ret <16 x i8> %u } @@ -420,10 +372,8 @@ define <32 x i8> @vnmsac_vv_nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, < ; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -435,11 +385,9 @@ define <32 x i8> @vnmsac_vv_nxv32i8_unmasked(<32 x i8> %a, <32 x i8> %b, <32 x i ; CHECK-NEXT: vnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) - %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> splat (i1 -1), <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -452,10 +400,8 @@ define <32 x i8> @vnmsac_vx_nxv32i8(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -469,11 +415,9 @@ define <32 x i8> @vnmsac_vx_nxv32i8_unmasked(<32 x i8> %a, i8 %b, <32 x i8> %c, ; CHECK-NEXT: ret %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) - %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> %allones, <32 x i8> %y, <32 x i8> %c, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x i8> @llvm.vp.merge.nxv32i8(<32 x i1> splat (i1 -1), <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -484,10 +428,8 @@ define <32 x i8> @vnmsac_vv_nxv32i8_ta(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, ; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -501,10 +443,8 @@ define <32 x i8> @vnmsac_vx_nxv32i8_ta(<32 x i8> %a, i8 %b, <32 x i8> %c, <32 x ; CHECK-NEXT: ret %elt.head = insertelement <32 x i8> poison, i8 %b, i32 0 %vb = shufflevector <32 x i8> %elt.head, <32 x i8> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> %allones, i32 %evl) + %x = call <32 x i8> @llvm.vp.mul.nxv32i8(<32 x i8> %a, <32 x i8> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i8> @llvm.vp.sub.nxv32i8(<32 x i8> %c, <32 x i8> %x, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i8> @llvm.vp.select.nxv32i8(<32 x i1> %m, <32 x i8> %y, <32 x i8> %c, i32 %evl) ret <32 x i8> %u } @@ -521,10 +461,8 @@ define <64 x i8> @vnmsac_vv_nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, < ; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> splat (i1 -1), i32 %evl) %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -536,11 +474,9 @@ define <64 x i8> @vnmsac_vv_nxv64i8_unmasked(<64 x i8> %a, <64 x i8> %b, <64 x i ; CHECK-NEXT: vnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) - %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> splat (i1 -1), i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> splat (i1 -1), <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -553,10 +489,8 @@ define <64 x i8> @vnmsac_vx_nxv64i8(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> splat (i1 -1), i32 %evl) %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -570,11 +504,9 @@ define <64 x i8> @vnmsac_vx_nxv64i8_unmasked(<64 x i8> %a, i8 %b, <64 x i8> %c, ; CHECK-NEXT: ret %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) - %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> %allones, <64 x i8> %y, <64 x i8> %c, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> splat (i1 -1), i32 %evl) + %u = call <64 x i8> @llvm.vp.merge.nxv64i8(<64 x i1> splat (i1 -1), <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -585,10 +517,8 @@ define <64 x i8> @vnmsac_vv_nxv64i8_ta(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, ; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %b, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> splat (i1 -1), i32 %evl) %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -602,10 +532,8 @@ define <64 x i8> @vnmsac_vx_nxv64i8_ta(<64 x i8> %a, i8 %b, <64 x i8> %c, <64 x ; CHECK-NEXT: ret %elt.head = insertelement <64 x i8> poison, i8 %b, i32 0 %vb = shufflevector <64 x i8> %elt.head, <64 x i8> poison, <64 x i32> zeroinitializer - %splat = insertelement <64 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <64 x i1> %splat, <64 x i1> poison, <64 x i32> zeroinitializer - %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> %allones, i32 %evl) - %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> %allones, i32 %evl) + %x = call <64 x i8> @llvm.vp.mul.nxv64i8(<64 x i8> %a, <64 x i8> %vb, <64 x i1> splat (i1 -1), i32 %evl) + %y = call <64 x i8> @llvm.vp.sub.nxv64i8(<64 x i8> %c, <64 x i8> %x, <64 x i1> splat (i1 -1), i32 %evl) %u = call <64 x i8> @llvm.vp.select.nxv64i8(<64 x i1> %m, <64 x i8> %y, <64 x i8> %c, i32 %evl) ret <64 x i8> %u } @@ -622,10 +550,8 @@ define <2 x i16> @vnmsac_vv_nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, < ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -637,11 +563,9 @@ define <2 x i16> @vnmsac_vv_nxv2i16_unmasked(<2 x i16> %a, <2 x i16> %b, <2 x i1 ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) - %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> splat (i1 -1), <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -654,10 +578,8 @@ define <2 x i16> @vnmsac_vx_nxv2i16(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -671,11 +593,9 @@ define <2 x i16> @vnmsac_vx_nxv2i16_unmasked(<2 x i16> %a, i16 %b, <2 x i16> %c, ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) - %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> %allones, <2 x i16> %y, <2 x i16> %c, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i16> @llvm.vp.merge.nxv2i16(<2 x i1> splat (i1 -1), <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -686,10 +606,8 @@ define <2 x i16> @vnmsac_vv_nxv2i16_ta(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -703,10 +621,8 @@ define <2 x i16> @vnmsac_vx_nxv2i16_ta(<2 x i16> %a, i16 %b, <2 x i16> %c, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i16> @llvm.vp.mul.nxv2i16(<2 x i16> %a, <2 x i16> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i16> @llvm.vp.sub.nxv2i16(<2 x i16> %c, <2 x i16> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i16> @llvm.vp.select.nxv2i16(<2 x i1> %m, <2 x i16> %y, <2 x i16> %c, i32 %evl) ret <2 x i16> %u } @@ -723,10 +639,8 @@ define <4 x i16> @vnmsac_vv_nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, < ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -738,11 +652,9 @@ define <4 x i16> @vnmsac_vv_nxv4i16_unmasked(<4 x i16> %a, <4 x i16> %b, <4 x i1 ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) - %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> splat (i1 -1), <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -755,10 +667,8 @@ define <4 x i16> @vnmsac_vx_nxv4i16(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -772,11 +682,9 @@ define <4 x i16> @vnmsac_vx_nxv4i16_unmasked(<4 x i16> %a, i16 %b, <4 x i16> %c, ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) - %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> %allones, <4 x i16> %y, <4 x i16> %c, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i16> @llvm.vp.merge.nxv4i16(<4 x i1> splat (i1 -1), <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -787,10 +695,8 @@ define <4 x i16> @vnmsac_vv_nxv4i16_ta(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -804,10 +710,8 @@ define <4 x i16> @vnmsac_vx_nxv4i16_ta(<4 x i16> %a, i16 %b, <4 x i16> %c, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i16> @llvm.vp.mul.nxv4i16(<4 x i16> %a, <4 x i16> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i16> @llvm.vp.sub.nxv4i16(<4 x i16> %c, <4 x i16> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i16> @llvm.vp.select.nxv4i16(<4 x i1> %m, <4 x i16> %y, <4 x i16> %c, i32 %evl) ret <4 x i16> %u } @@ -824,10 +728,8 @@ define <8 x i16> @vnmsac_vv_nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, < ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -839,11 +741,9 @@ define <8 x i16> @vnmsac_vv_nxv8i16_unmasked(<8 x i16> %a, <8 x i16> %b, <8 x i1 ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) - %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> splat (i1 -1), <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -856,10 +756,8 @@ define <8 x i16> @vnmsac_vx_nxv8i16(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -873,11 +771,9 @@ define <8 x i16> @vnmsac_vx_nxv8i16_unmasked(<8 x i16> %a, i16 %b, <8 x i16> %c, ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) - %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> %allones, <8 x i16> %y, <8 x i16> %c, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i16> @llvm.vp.merge.nxv8i16(<8 x i1> splat (i1 -1), <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -888,10 +784,8 @@ define <8 x i16> @vnmsac_vv_nxv8i16_ta(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -905,10 +799,8 @@ define <8 x i16> @vnmsac_vx_nxv8i16_ta(<8 x i16> %a, i16 %b, <8 x i16> %c, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i16> @llvm.vp.mul.nxv8i16(<8 x i16> %a, <8 x i16> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i16> @llvm.vp.sub.nxv8i16(<8 x i16> %c, <8 x i16> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i16> @llvm.vp.select.nxv8i16(<8 x i1> %m, <8 x i16> %y, <8 x i16> %c, i32 %evl) ret <8 x i16> %u } @@ -925,10 +817,8 @@ define <16 x i16> @vnmsac_vv_nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> % ; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -940,11 +830,9 @@ define <16 x i16> @vnmsac_vv_nxv16i16_unmasked(<16 x i16> %a, <16 x i16> %b, <16 ; CHECK-NEXT: vnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) - %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> splat (i1 -1), <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -957,10 +845,8 @@ define <16 x i16> @vnmsac_vx_nxv16i16(<16 x i16> %a, i16 %b, <16 x i16> %c, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -974,11 +860,9 @@ define <16 x i16> @vnmsac_vx_nxv16i16_unmasked(<16 x i16> %a, i16 %b, <16 x i16> ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) - %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> %allones, <16 x i16> %y, <16 x i16> %c, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i16> @llvm.vp.merge.nxv16i16(<16 x i1> splat (i1 -1), <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -989,10 +873,8 @@ define <16 x i16> @vnmsac_vv_nxv16i16_ta(<16 x i16> %a, <16 x i16> %b, <16 x i16 ; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -1006,10 +888,8 @@ define <16 x i16> @vnmsac_vx_nxv16i16_ta(<16 x i16> %a, i16 %b, <16 x i16> %c, ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i16> @llvm.vp.mul.nxv16i16(<16 x i16> %a, <16 x i16> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i16> @llvm.vp.sub.nxv16i16(<16 x i16> %c, <16 x i16> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i16> @llvm.vp.select.nxv16i16(<16 x i1> %m, <16 x i16> %y, <16 x i16> %c, i32 %evl) ret <16 x i16> %u } @@ -1026,10 +906,8 @@ define <32 x i16> @vnmsac_vv_nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i16> % ; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1041,11 +919,9 @@ define <32 x i16> @vnmsac_vv_nxv32i16_unmasked(<32 x i16> %a, <32 x i16> %b, <32 ; CHECK-NEXT: vnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) - %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> splat (i1 -1), <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1058,10 +934,8 @@ define <32 x i16> @vnmsac_vx_nxv32i16(<32 x i16> %a, i16 %b, <32 x i16> %c, <32 ; CHECK-NEXT: ret %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1075,11 +949,9 @@ define <32 x i16> @vnmsac_vx_nxv32i16_unmasked(<32 x i16> %a, i16 %b, <32 x i16> ; CHECK-NEXT: ret %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) - %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> %allones, <32 x i16> %y, <32 x i16> %c, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> splat (i1 -1), i32 %evl) + %u = call <32 x i16> @llvm.vp.merge.nxv32i16(<32 x i1> splat (i1 -1), <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1090,10 +962,8 @@ define <32 x i16> @vnmsac_vv_nxv32i16_ta(<32 x i16> %a, <32 x i16> %b, <32 x i16 ; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %b, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1107,10 +977,8 @@ define <32 x i16> @vnmsac_vx_nxv32i16_ta(<32 x i16> %a, i16 %b, <32 x i16> %c, ; CHECK-NEXT: ret %elt.head = insertelement <32 x i16> poison, i16 %b, i32 0 %vb = shufflevector <32 x i16> %elt.head, <32 x i16> poison, <32 x i32> zeroinitializer - %splat = insertelement <32 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer - %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> %allones, i32 %evl) - %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> %allones, i32 %evl) + %x = call <32 x i16> @llvm.vp.mul.nxv32i16(<32 x i16> %a, <32 x i16> %vb, <32 x i1> splat (i1 -1), i32 %evl) + %y = call <32 x i16> @llvm.vp.sub.nxv32i16(<32 x i16> %c, <32 x i16> %x, <32 x i1> splat (i1 -1), i32 %evl) %u = call <32 x i16> @llvm.vp.select.nxv32i16(<32 x i1> %m, <32 x i16> %y, <32 x i16> %c, i32 %evl) ret <32 x i16> %u } @@ -1127,10 +995,8 @@ define <2 x i32> @vnmsac_vv_nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, < ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1142,11 +1008,9 @@ define <2 x i32> @vnmsac_vv_nxv2i32_unmasked(<2 x i32> %a, <2 x i32> %b, <2 x i3 ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) - %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> splat (i1 -1), <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1159,10 +1023,8 @@ define <2 x i32> @vnmsac_vx_nxv2i32(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1176,11 +1038,9 @@ define <2 x i32> @vnmsac_vx_nxv2i32_unmasked(<2 x i32> %a, i32 %b, <2 x i32> %c, ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) - %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> %allones, <2 x i32> %y, <2 x i32> %c, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i32> @llvm.vp.merge.nxv2i32(<2 x i1> splat (i1 -1), <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1191,10 +1051,8 @@ define <2 x i32> @vnmsac_vv_nxv2i32_ta(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1208,10 +1066,8 @@ define <2 x i32> @vnmsac_vx_nxv2i32_ta(<2 x i32> %a, i32 %b, <2 x i32> %c, <2 x ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i32> @llvm.vp.mul.nxv2i32(<2 x i32> %a, <2 x i32> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i32> @llvm.vp.sub.nxv2i32(<2 x i32> %c, <2 x i32> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i32> @llvm.vp.select.nxv2i32(<2 x i1> %m, <2 x i32> %y, <2 x i32> %c, i32 %evl) ret <2 x i32> %u } @@ -1228,10 +1084,8 @@ define <4 x i32> @vnmsac_vv_nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, < ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1243,11 +1097,9 @@ define <4 x i32> @vnmsac_vv_nxv4i32_unmasked(<4 x i32> %a, <4 x i32> %b, <4 x i3 ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) - %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> splat (i1 -1), <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1260,10 +1112,8 @@ define <4 x i32> @vnmsac_vx_nxv4i32(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1277,11 +1127,9 @@ define <4 x i32> @vnmsac_vx_nxv4i32_unmasked(<4 x i32> %a, i32 %b, <4 x i32> %c, ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) - %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> %allones, <4 x i32> %y, <4 x i32> %c, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i32> @llvm.vp.merge.nxv4i32(<4 x i1> splat (i1 -1), <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1292,10 +1140,8 @@ define <4 x i32> @vnmsac_vv_nxv4i32_ta(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1309,10 +1155,8 @@ define <4 x i32> @vnmsac_vx_nxv4i32_ta(<4 x i32> %a, i32 %b, <4 x i32> %c, <4 x ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i32> @llvm.vp.mul.nxv4i32(<4 x i32> %a, <4 x i32> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i32> @llvm.vp.sub.nxv4i32(<4 x i32> %c, <4 x i32> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i32> @llvm.vp.select.nxv4i32(<4 x i1> %m, <4 x i32> %y, <4 x i32> %c, i32 %evl) ret <4 x i32> %u } @@ -1329,10 +1173,8 @@ define <8 x i32> @vnmsac_vv_nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, < ; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1344,11 +1186,9 @@ define <8 x i32> @vnmsac_vv_nxv8i32_unmasked(<8 x i32> %a, <8 x i32> %b, <8 x i3 ; CHECK-NEXT: vnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) - %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> splat (i1 -1), <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1361,10 +1201,8 @@ define <8 x i32> @vnmsac_vx_nxv8i32(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x i1 ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1378,11 +1216,9 @@ define <8 x i32> @vnmsac_vx_nxv8i32_unmasked(<8 x i32> %a, i32 %b, <8 x i32> %c, ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) - %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> %allones, <8 x i32> %y, <8 x i32> %c, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i32> @llvm.vp.merge.nxv8i32(<8 x i1> splat (i1 -1), <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1393,10 +1229,8 @@ define <8 x i32> @vnmsac_vv_nxv8i32_ta(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, ; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1410,10 +1244,8 @@ define <8 x i32> @vnmsac_vx_nxv8i32_ta(<8 x i32> %a, i32 %b, <8 x i32> %c, <8 x ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i32> @llvm.vp.mul.nxv8i32(<8 x i32> %a, <8 x i32> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i32> @llvm.vp.sub.nxv8i32(<8 x i32> %c, <8 x i32> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i32> @llvm.vp.select.nxv8i32(<8 x i1> %m, <8 x i32> %y, <8 x i32> %c, i32 %evl) ret <8 x i32> %u } @@ -1430,10 +1262,8 @@ define <16 x i32> @vnmsac_vv_nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i32> % ; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1445,11 +1275,9 @@ define <16 x i32> @vnmsac_vv_nxv16i32_unmasked(<16 x i32> %a, <16 x i32> %b, <16 ; CHECK-NEXT: vnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) - %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> splat (i1 -1), <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1462,10 +1290,8 @@ define <16 x i32> @vnmsac_vx_nxv16i32(<16 x i32> %a, i32 %b, <16 x i32> %c, <16 ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1479,11 +1305,9 @@ define <16 x i32> @vnmsac_vx_nxv16i32_unmasked(<16 x i32> %a, i32 %b, <16 x i32> ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) - %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> %allones, <16 x i32> %y, <16 x i32> %c, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> splat (i1 -1), i32 %evl) + %u = call <16 x i32> @llvm.vp.merge.nxv16i32(<16 x i1> splat (i1 -1), <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1494,10 +1318,8 @@ define <16 x i32> @vnmsac_vv_nxv16i32_ta(<16 x i32> %a, <16 x i32> %b, <16 x i32 ; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %b, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1511,10 +1333,8 @@ define <16 x i32> @vnmsac_vx_nxv16i32_ta(<16 x i32> %a, i32 %b, <16 x i32> %c, ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %splat = insertelement <16 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer - %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> %allones, i32 %evl) - %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> %allones, i32 %evl) + %x = call <16 x i32> @llvm.vp.mul.nxv16i32(<16 x i32> %a, <16 x i32> %vb, <16 x i1> splat (i1 -1), i32 %evl) + %y = call <16 x i32> @llvm.vp.sub.nxv16i32(<16 x i32> %c, <16 x i32> %x, <16 x i1> splat (i1 -1), i32 %evl) %u = call <16 x i32> @llvm.vp.select.nxv16i32(<16 x i1> %m, <16 x i32> %y, <16 x i32> %c, i32 %evl) ret <16 x i32> %u } @@ -1531,10 +1351,8 @@ define <2 x i64> @vnmsac_vv_nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, < ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1546,11 +1364,9 @@ define <2 x i64> @vnmsac_vv_nxv2i64_unmasked(<2 x i64> %a, <2 x i64> %b, <2 x i6 ; CHECK-NEXT: vnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) - %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> splat (i1 -1), <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1578,10 +1394,8 @@ define <2 x i64> @vnmsac_vx_nxv2i64(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x i1 ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1610,11 +1424,9 @@ define <2 x i64> @vnmsac_vx_nxv2i64_unmasked(<2 x i64> %a, i64 %b, <2 x i64> %c, ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) - %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> %allones, <2 x i64> %y, <2 x i64> %c, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> splat (i1 -1), i32 %evl) + %u = call <2 x i64> @llvm.vp.merge.nxv2i64(<2 x i1> splat (i1 -1), <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1625,10 +1437,8 @@ define <2 x i64> @vnmsac_vv_nxv2i64_ta(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, ; CHECK-NEXT: vnmsac.vv v10, v8, v9, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %b, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1657,10 +1467,8 @@ define <2 x i64> @vnmsac_vx_nxv2i64_ta(<2 x i64> %a, i64 %b, <2 x i64> %c, <2 x ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %splat = insertelement <2 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer - %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> %allones, i32 %evl) - %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> %allones, i32 %evl) + %x = call <2 x i64> @llvm.vp.mul.nxv2i64(<2 x i64> %a, <2 x i64> %vb, <2 x i1> splat (i1 -1), i32 %evl) + %y = call <2 x i64> @llvm.vp.sub.nxv2i64(<2 x i64> %c, <2 x i64> %x, <2 x i1> splat (i1 -1), i32 %evl) %u = call <2 x i64> @llvm.vp.select.nxv2i64(<2 x i1> %m, <2 x i64> %y, <2 x i64> %c, i32 %evl) ret <2 x i64> %u } @@ -1677,10 +1485,8 @@ define <4 x i64> @vnmsac_vv_nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, < ; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1692,11 +1498,9 @@ define <4 x i64> @vnmsac_vv_nxv4i64_unmasked(<4 x i64> %a, <4 x i64> %b, <4 x i6 ; CHECK-NEXT: vnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) - %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> splat (i1 -1), <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1724,10 +1528,8 @@ define <4 x i64> @vnmsac_vx_nxv4i64(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x i1 ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1756,11 +1558,9 @@ define <4 x i64> @vnmsac_vx_nxv4i64_unmasked(<4 x i64> %a, i64 %b, <4 x i64> %c, ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) - %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> %allones, <4 x i64> %y, <4 x i64> %c, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> splat (i1 -1), i32 %evl) + %u = call <4 x i64> @llvm.vp.merge.nxv4i64(<4 x i1> splat (i1 -1), <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1771,10 +1571,8 @@ define <4 x i64> @vnmsac_vv_nxv4i64_ta(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, ; CHECK-NEXT: vnmsac.vv v12, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %b, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1803,10 +1601,8 @@ define <4 x i64> @vnmsac_vx_nxv4i64_ta(<4 x i64> %a, i64 %b, <4 x i64> %c, <4 x ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %splat = insertelement <4 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer - %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> %allones, i32 %evl) - %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> %allones, i32 %evl) + %x = call <4 x i64> @llvm.vp.mul.nxv4i64(<4 x i64> %a, <4 x i64> %vb, <4 x i1> splat (i1 -1), i32 %evl) + %y = call <4 x i64> @llvm.vp.sub.nxv4i64(<4 x i64> %c, <4 x i64> %x, <4 x i1> splat (i1 -1), i32 %evl) %u = call <4 x i64> @llvm.vp.select.nxv4i64(<4 x i1> %m, <4 x i64> %y, <4 x i64> %c, i32 %evl) ret <4 x i64> %u } @@ -1823,10 +1619,8 @@ define <8 x i64> @vnmsac_vv_nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, < ; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1838,11 +1632,9 @@ define <8 x i64> @vnmsac_vv_nxv8i64_unmasked(<8 x i64> %a, <8 x i64> %b, <8 x i6 ; CHECK-NEXT: vnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) - %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> splat (i1 -1), <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1870,10 +1662,8 @@ define <8 x i64> @vnmsac_vx_nxv8i64(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x i1 ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1902,11 +1692,9 @@ define <8 x i64> @vnmsac_vx_nxv8i64_unmasked(<8 x i64> %a, i64 %b, <8 x i64> %c, ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) - %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> %allones, <8 x i64> %y, <8 x i64> %c, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> splat (i1 -1), i32 %evl) + %u = call <8 x i64> @llvm.vp.merge.nxv8i64(<8 x i1> splat (i1 -1), <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1917,10 +1705,8 @@ define <8 x i64> @vnmsac_vv_nxv8i64_ta(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, ; CHECK-NEXT: vnmsac.vv v16, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %b, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } @@ -1949,10 +1735,8 @@ define <8 x i64> @vnmsac_vx_nxv8i64_ta(<8 x i64> %a, i64 %b, <8 x i64> %c, <8 x ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %splat = insertelement <8 x i1> poison, i1 -1, i32 0 - %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer - %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> %allones, i32 %evl) - %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> %allones, i32 %evl) + %x = call <8 x i64> @llvm.vp.mul.nxv8i64(<8 x i64> %a, <8 x i64> %vb, <8 x i1> splat (i1 -1), i32 %evl) + %y = call <8 x i64> @llvm.vp.sub.nxv8i64(<8 x i64> %c, <8 x i64> %x, <8 x i1> splat (i1 -1), i32 %evl) %u = call <8 x i64> @llvm.vp.select.nxv8i64(<8 x i1> %m, <8 x i64> %y, <8 x i64> %c, i32 %evl) ret <8 x i64> %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll index d132608cdf7f5..09c281b525a64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll @@ -34,9 +34,7 @@ define <2 x i8> @vor_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.or.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.or.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -60,9 +58,7 @@ define <2 x i8> @vor_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.or.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.or.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -72,9 +68,7 @@ define <2 x i8> @vor_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 5, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.or.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.or.v2i8(<2 x i8> %va, <2 x i8> splat (i8 5), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -84,11 +78,7 @@ define <2 x i8> @vor_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 5, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.or.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.or.v2i8(<2 x i8> %va, <2 x i8> splat (i8 5), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -110,9 +100,7 @@ define <4 x i8> @vor_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -148,9 +136,7 @@ define <4 x i8> @vor_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -160,9 +146,7 @@ define <4 x i8> @vor_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 5, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %va, <4 x i8> splat (i8 5), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -172,11 +156,7 @@ define <4 x i8> @vor_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 5, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %va, <4 x i8> splat (i8 5), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -198,9 +178,7 @@ define <7 x i8> @vor_vv_v5i8_unmasked(<7 x i8> %va, <7 x i8> %b, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <7 x i1> poison, i1 true, i32 0 - %m = shufflevector <7 x i1> %head, <7 x i1> poison, <7 x i32> zeroinitializer - %v = call <7 x i8> @llvm.vp.or.v5i8(<7 x i8> %va, <7 x i8> %b, <7 x i1> %m, i32 %evl) + %v = call <7 x i8> @llvm.vp.or.v5i8(<7 x i8> %va, <7 x i8> %b, <7 x i1> splat (i1 true), i32 %evl) ret <7 x i8> %v } @@ -224,9 +202,7 @@ define <7 x i8> @vor_vx_v5i8_unmasked(<7 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <7 x i8> poison, i8 %b, i32 0 %vb = shufflevector <7 x i8> %elt.head, <7 x i8> poison, <7 x i32> zeroinitializer - %head = insertelement <7 x i1> poison, i1 true, i32 0 - %m = shufflevector <7 x i1> %head, <7 x i1> poison, <7 x i32> zeroinitializer - %v = call <7 x i8> @llvm.vp.or.v5i8(<7 x i8> %va, <7 x i8> %vb, <7 x i1> %m, i32 %evl) + %v = call <7 x i8> @llvm.vp.or.v5i8(<7 x i8> %va, <7 x i8> %vb, <7 x i1> splat (i1 true), i32 %evl) ret <7 x i8> %v } @@ -236,9 +212,7 @@ define <7 x i8> @vor_vi_v5i8(<7 x i8> %va, <7 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <7 x i8> poison, i8 5, i32 0 - %vb = shufflevector <7 x i8> %elt.head, <7 x i8> poison, <7 x i32> zeroinitializer - %v = call <7 x i8> @llvm.vp.or.v5i8(<7 x i8> %va, <7 x i8> %vb, <7 x i1> %m, i32 %evl) + %v = call <7 x i8> @llvm.vp.or.v5i8(<7 x i8> %va, <7 x i8> splat (i8 5), <7 x i1> %m, i32 %evl) ret <7 x i8> %v } @@ -248,11 +222,7 @@ define <7 x i8> @vor_vi_v5i8_unmasked(<7 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <7 x i8> poison, i8 5, i32 0 - %vb = shufflevector <7 x i8> %elt.head, <7 x i8> poison, <7 x i32> zeroinitializer - %head = insertelement <7 x i1> poison, i1 true, i32 0 - %m = shufflevector <7 x i1> %head, <7 x i1> poison, <7 x i32> zeroinitializer - %v = call <7 x i8> @llvm.vp.or.v5i8(<7 x i8> %va, <7 x i8> %vb, <7 x i1> %m, i32 %evl) + %v = call <7 x i8> @llvm.vp.or.v5i8(<7 x i8> %va, <7 x i8> splat (i8 5), <7 x i1> splat (i1 true), i32 %evl) ret <7 x i8> %v } @@ -274,9 +244,7 @@ define <8 x i8> @vor_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.or.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.or.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -300,9 +268,7 @@ define <8 x i8> @vor_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.or.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.or.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -312,9 +278,7 @@ define <8 x i8> @vor_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 5, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.or.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.or.v8i8(<8 x i8> %va, <8 x i8> splat (i8 5), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -324,11 +288,7 @@ define <8 x i8> @vor_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 5, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.or.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.or.v8i8(<8 x i8> %va, <8 x i8> splat (i8 5), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -350,9 +310,7 @@ define <16 x i8> @vor_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.or.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.or.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -376,9 +334,7 @@ define <16 x i8> @vor_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.or.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.or.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -388,9 +344,7 @@ define <16 x i8> @vor_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 5, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.or.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.or.v16i8(<16 x i8> %va, <16 x i8> splat (i8 5), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -400,11 +354,7 @@ define <16 x i8> @vor_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 5, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.or.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.or.v16i8(<16 x i8> %va, <16 x i8> splat (i8 5), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -426,9 +376,7 @@ define <2 x i16> @vor_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.or.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.or.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -452,9 +400,7 @@ define <2 x i16> @vor_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.or.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.or.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -464,9 +410,7 @@ define <2 x i16> @vor_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 5, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.or.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.or.v2i16(<2 x i16> %va, <2 x i16> splat (i16 5), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -476,11 +420,7 @@ define <2 x i16> @vor_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 5, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.or.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.or.v2i16(<2 x i16> %va, <2 x i16> splat (i16 5), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -502,9 +442,7 @@ define <4 x i16> @vor_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.or.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.or.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -528,9 +466,7 @@ define <4 x i16> @vor_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.or.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.or.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -540,9 +476,7 @@ define <4 x i16> @vor_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 5, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.or.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.or.v4i16(<4 x i16> %va, <4 x i16> splat (i16 5), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -552,11 +486,7 @@ define <4 x i16> @vor_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 5, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.or.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.or.v4i16(<4 x i16> %va, <4 x i16> splat (i16 5), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -578,9 +508,7 @@ define <8 x i16> @vor_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.or.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.or.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -604,9 +532,7 @@ define <8 x i16> @vor_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.or.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.or.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -616,9 +542,7 @@ define <8 x i16> @vor_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 5, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.or.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.or.v8i16(<8 x i16> %va, <8 x i16> splat (i16 5), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -628,11 +552,7 @@ define <8 x i16> @vor_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 5, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.or.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.or.v8i16(<8 x i16> %va, <8 x i16> splat (i16 5), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -654,9 +574,7 @@ define <16 x i16> @vor_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.or.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.or.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -680,9 +598,7 @@ define <16 x i16> @vor_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.or.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.or.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -692,9 +608,7 @@ define <16 x i16> @vor_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 5, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.or.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.or.v16i16(<16 x i16> %va, <16 x i16> splat (i16 5), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -704,11 +618,7 @@ define <16 x i16> @vor_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 5, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.or.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.or.v16i16(<16 x i16> %va, <16 x i16> splat (i16 5), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -730,9 +640,7 @@ define <2 x i32> @vor_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.or.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.or.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -756,9 +664,7 @@ define <2 x i32> @vor_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.or.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.or.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -768,9 +674,7 @@ define <2 x i32> @vor_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 5, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.or.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.or.v2i32(<2 x i32> %va, <2 x i32> splat (i32 5), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -780,11 +684,7 @@ define <2 x i32> @vor_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 5, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.or.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.or.v2i32(<2 x i32> %va, <2 x i32> splat (i32 5), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -806,9 +706,7 @@ define <4 x i32> @vor_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -832,9 +730,7 @@ define <4 x i32> @vor_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -844,9 +740,7 @@ define <4 x i32> @vor_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 5, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %va, <4 x i32> splat (i32 5), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -856,11 +750,7 @@ define <4 x i32> @vor_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 5, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.or.v4i32(<4 x i32> %va, <4 x i32> splat (i32 5), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -882,9 +772,7 @@ define <8 x i32> @vor_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -908,9 +796,7 @@ define <8 x i32> @vor_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -920,9 +806,7 @@ define <8 x i32> @vor_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 5, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %va, <8 x i32> splat (i32 5), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -932,11 +816,7 @@ define <8 x i32> @vor_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 5, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.or.v8i32(<8 x i32> %va, <8 x i32> splat (i32 5), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -958,9 +838,7 @@ define <16 x i32> @vor_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.or.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.or.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -984,9 +862,7 @@ define <16 x i32> @vor_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.or.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.or.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -996,9 +872,7 @@ define <16 x i32> @vor_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 5, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.or.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.or.v16i32(<16 x i32> %va, <16 x i32> splat (i32 5), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -1008,11 +882,7 @@ define <16 x i32> @vor_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 5, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.or.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.or.v16i32(<16 x i32> %va, <16 x i32> splat (i32 5), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1034,9 +904,7 @@ define <2 x i64> @vor_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.or.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.or.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1088,9 +956,7 @@ define <2 x i64> @vor_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl) ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.or.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.or.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1100,9 +966,7 @@ define <2 x i64> @vor_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 5, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.or.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.or.v2i64(<2 x i64> %va, <2 x i64> splat (i64 5), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1112,11 +976,7 @@ define <2 x i64> @vor_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 5, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.or.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.or.v2i64(<2 x i64> %va, <2 x i64> splat (i64 5), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1138,9 +998,7 @@ define <4 x i64> @vor_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.or.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.or.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1192,9 +1050,7 @@ define <4 x i64> @vor_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl) ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.or.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.or.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1204,9 +1060,7 @@ define <4 x i64> @vor_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 5, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.or.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.or.v4i64(<4 x i64> %va, <4 x i64> splat (i64 5), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1216,11 +1070,7 @@ define <4 x i64> @vor_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 5, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.or.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.or.v4i64(<4 x i64> %va, <4 x i64> splat (i64 5), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1242,9 +1092,7 @@ define <8 x i64> @vor_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.or.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.or.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1296,9 +1144,7 @@ define <8 x i64> @vor_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl) ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.or.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.or.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1308,9 +1154,7 @@ define <8 x i64> @vor_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 5, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.or.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.or.v8i64(<8 x i64> %va, <8 x i64> splat (i64 5), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1320,11 +1164,7 @@ define <8 x i64> @vor_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 5, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.or.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.or.v8i64(<8 x i64> %va, <8 x i64> splat (i64 5), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1346,9 +1186,7 @@ define <16 x i64> @vor_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vor.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.or.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.or.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1400,9 +1238,7 @@ define <16 x i64> @vor_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext %e ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.or.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.or.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1412,9 +1248,7 @@ define <16 x i64> @vor_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 5, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.or.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.or.v16i64(<16 x i64> %va, <16 x i64> splat (i64 5), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1424,10 +1258,6 @@ define <16 x i64> @vor_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 5, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.or.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.or.v16i64(<16 x i64> %va, <16 x i64> splat (i64 5), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 4d2f55b172e48..a13f1eed8efb1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -184,9 +184,7 @@ define <3 x i8> @vpgather_truemask_v3i8(<3 x ptr> %ptrs, i32 zeroext %evl) { ; RV64-NEXT: vluxei64.v v10, (zero), v8 ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret - %mhead = insertelement <3 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <3 x i1> %mhead, <3 x i1> poison, <3 x i32> zeroinitializer - %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> %mtrue, i32 %evl) + %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl) ret <3 x i8> %v } @@ -224,9 +222,7 @@ define <4 x i8> @vpgather_truemask_v4i8(<4 x ptr> %ptrs, i32 zeroext %evl) { ; RV64-NEXT: vluxei64.v v10, (zero), v8 ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret <4 x i8> %v } @@ -455,9 +451,7 @@ define <4 x i16> @vpgather_truemask_v4i16(<4 x ptr> %ptrs, i32 zeroext %evl) { ; RV64-NEXT: vluxei64.v v10, (zero), v8 ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret <4 x i16> %v } @@ -665,9 +659,7 @@ define <4 x i32> @vpgather_truemask_v4i32(<4 x ptr> %ptrs, i32 zeroext %evl) { ; RV64-NEXT: vluxei64.v v10, (zero), v8 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret <4 x i32> %v } @@ -905,9 +897,7 @@ define <4 x i64> @vpgather_truemask_v4i64(<4 x ptr> %ptrs, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (zero), v8 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret <4 x i64> %v } @@ -1216,9 +1206,7 @@ define <4 x half> @vpgather_truemask_v4f16(<4 x ptr> %ptrs, i32 zeroext %evl) { ; RV64-NEXT: vluxei64.v v10, (zero), v8 ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret <4 x half> %v } @@ -1384,9 +1372,7 @@ define <4 x float> @vpgather_truemask_v4f32(<4 x ptr> %ptrs, i32 zeroext %evl) { ; RV64-NEXT: vluxei64.v v10, (zero), v8 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret <4 x float> %v } @@ -1624,9 +1610,7 @@ define <4 x double> @vpgather_truemask_v4f64(<4 x ptr> %ptrs, i32 zeroext %evl) ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV64-NEXT: vluxei64.v v8, (zero), v8 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret <4 x double> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 618b875be5665..9ef89352e65e5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -46,9 +46,7 @@ define <4 x i8> @vpload_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %load } @@ -106,9 +104,7 @@ define <8 x i16> @vpload_v8i16_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %load } @@ -154,9 +150,7 @@ define <6 x i32> @vpload_v6i32_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <6 x i1> poison, i1 true, i32 0 - %b = shufflevector <6 x i1> %a, <6 x i1> poison, <6 x i32> zeroinitializer - %load = call <6 x i32> @llvm.vp.load.v6i32.p0(ptr %ptr, <6 x i1> %b, i32 %evl) + %load = call <6 x i32> @llvm.vp.load.v6i32.p0(ptr %ptr, <6 x i1> splat (i1 true), i32 %evl) ret <6 x i32> %load } @@ -178,9 +172,7 @@ define <8 x i32> @vpload_v8i32_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %load } @@ -214,9 +206,7 @@ define <4 x i64> @vpload_v4i64_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %load } @@ -250,9 +240,7 @@ define <2 x half> @vpload_v2f16_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call <2 x half> @llvm.vp.load.v2f16.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call <2 x half> @llvm.vp.load.v2f16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %load } @@ -322,9 +310,7 @@ define <8 x float> @vpload_v8f32_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call <8 x float> @llvm.vp.load.v8f32.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call <8 x float> @llvm.vp.load.v8f32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %load } @@ -358,9 +344,7 @@ define <4 x double> @vpload_v4f64_allones_mask(ptr %ptr, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call <4 x double> @llvm.vp.load.v4f64.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call <4 x double> @llvm.vp.load.v4f64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index 6fe83fed6fd9c..466448a7a05a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -89,9 +89,7 @@ define <2 x i8> @vpmerge_vi_v2i8(<2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 2, i32 0 - %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl) + %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> splat (i8 2), <2 x i8> %vb, i32 %evl) ret <2 x i8> %v } @@ -126,9 +124,7 @@ define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 2, i32 0 - %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl) + %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> splat (i8 2), <4 x i8> %vb, i32 %evl) ret <4 x i8> %v } @@ -163,9 +159,7 @@ define <6 x i8> @vpmerge_vi_v6i8(<6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <6 x i8> poison, i8 2, i32 0 - %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer - %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl) + %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> splat (i8 2), <6 x i8> %vb, i32 %evl) ret <6 x i8> %v } @@ -200,9 +194,7 @@ define <8 x i7> @vpmerge_vi_v8i7(<8 x i7> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i7> poison, i7 2, i32 0 - %va = shufflevector <8 x i7> %elt.head, <8 x i7> poison, <8 x i32> zeroinitializer - %v = call <8 x i7> @llvm.vp.merge.v8i7(<8 x i1> %m, <8 x i7> %va, <8 x i7> %vb, i32 %evl) + %v = call <8 x i7> @llvm.vp.merge.v8i7(<8 x i1> %m, <8 x i7> splat (i7 2), <8 x i7> %vb, i32 %evl) ret <8 x i7> %v } @@ -237,9 +229,7 @@ define <8 x i8> @vpmerge_vi_v8i8(<8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 2, i32 0 - %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl) + %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> splat (i8 2), <8 x i8> %vb, i32 %evl) ret <8 x i8> %v } @@ -274,9 +264,7 @@ define <16 x i8> @vpmerge_vi_v16i8(<16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 2, i32 0 - %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl) + %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> splat (i8 2), <16 x i8> %vb, i32 %evl) ret <16 x i8> %v } @@ -311,9 +299,7 @@ define <2 x i16> @vpmerge_vi_v2i16(<2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 2, i32 0 - %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl) + %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> splat (i16 2), <2 x i16> %vb, i32 %evl) ret <2 x i16> %v } @@ -348,9 +334,7 @@ define <4 x i16> @vpmerge_vi_v4i16(<4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 2, i32 0 - %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl) + %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> splat (i16 2), <4 x i16> %vb, i32 %evl) ret <4 x i16> %v } @@ -385,9 +369,7 @@ define <8 x i16> @vpmerge_vi_v8i16(<8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 2, i32 0 - %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl) + %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> splat (i16 2), <8 x i16> %vb, i32 %evl) ret <8 x i16> %v } @@ -422,9 +404,7 @@ define <16 x i16> @vpmerge_vi_v16i16(<16 x i16> %vb, <16 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 2, i32 0 - %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl) + %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> splat (i16 2), <16 x i16> %vb, i32 %evl) ret <16 x i16> %v } @@ -459,9 +439,7 @@ define <2 x i32> @vpmerge_vi_v2i32(<2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 2, i32 0 - %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl) + %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> splat (i32 2), <2 x i32> %vb, i32 %evl) ret <2 x i32> %v } @@ -496,9 +474,7 @@ define <4 x i32> @vpmerge_vi_v4i32(<4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 2, i32 0 - %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl) + %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> splat (i32 2), <4 x i32> %vb, i32 %evl) ret <4 x i32> %v } @@ -533,9 +509,7 @@ define <8 x i32> @vpmerge_vi_v8i32(<8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 2, i32 0 - %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl) + %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> splat (i32 2), <8 x i32> %vb, i32 %evl) ret <8 x i32> %v } @@ -570,9 +544,7 @@ define <16 x i32> @vpmerge_vi_v16i32(<16 x i32> %vb, <16 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 2, i32 0 - %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl) + %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> splat (i32 2), <16 x i32> %vb, i32 %evl) ret <16 x i32> %v } @@ -641,9 +613,7 @@ define <2 x i64> @vpmerge_vi_v2i64(<2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 2, i32 0 - %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl) + %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> splat (i64 2), <2 x i64> %vb, i32 %evl) ret <2 x i64> %v } @@ -712,9 +682,7 @@ define <4 x i64> @vpmerge_vi_v4i64(<4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 2, i32 0 - %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl) + %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> splat (i64 2), <4 x i64> %vb, i32 %evl) ret <4 x i64> %v } @@ -783,9 +751,7 @@ define <8 x i64> @vpmerge_vi_v8i64(<8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 2, i32 0 - %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl) + %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> splat (i64 2), <8 x i64> %vb, i32 %evl) ret <8 x i64> %v } @@ -854,9 +820,7 @@ define <16 x i64> @vpmerge_vi_v16i64(<16 x i64> %vb, <16 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 2, i32 0 - %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl) + %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> splat (i64 2), <16 x i64> %vb, i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index 83e3422c44b95..cd9a38d5167d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -127,9 +127,7 @@ define void @vpscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, i32 zeroext ; RV64-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret void } @@ -266,9 +264,7 @@ define void @vpscatter_truemask_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, i32 zeroe ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret - %mhead = insertelement <3 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <3 x i1> %mhead, <3 x i1> poison, <3 x i32> zeroinitializer - call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %mtrue, i32 %evl) + call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl) ret void } @@ -302,9 +298,7 @@ define void @vpscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, i32 zeroe ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret void } @@ -486,9 +480,7 @@ define void @vpscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroe ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret void } @@ -722,9 +714,7 @@ define void @vpscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, i32 zeroe ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret void } @@ -1026,9 +1016,7 @@ define void @vpscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs, i32 zero ; RV64-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret void } @@ -1189,9 +1177,7 @@ define void @vpscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs, i32 zer ; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret void } @@ -1425,9 +1411,7 @@ define void @vpscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs, i32 ze ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v10 ; RV64-NEXT: ret - %mhead = insertelement <4 x i1> poison, i1 1, i32 0 - %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer - call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %mtrue, i32 %evl) + call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll index d7643bc304183..c0aa735614b21 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -274,9 +274,7 @@ define void @vpstore_v2i8_allones_mask(<2 x i8> %val, ptr %ptr, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - call void @llvm.vp.store.v2i8.p0(<2 x i8> %val, ptr %ptr, <2 x i1> %b, i32 %evl) + call void @llvm.vp.store.v2i8.p0(<2 x i8> %val, ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll index 0663551989837..4bbbad5ed0e0e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll @@ -39,9 +39,7 @@ define <2 x i8> @vrem_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.srem.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.srem.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -65,9 +63,7 @@ define <2 x i8> @vrem_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.srem.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.srem.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -89,9 +85,7 @@ define <4 x i8> @vrem_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.srem.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.srem.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -115,9 +109,7 @@ define <4 x i8> @vrem_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.srem.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.srem.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -151,9 +143,7 @@ define <8 x i8> @vrem_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.srem.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.srem.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -177,9 +167,7 @@ define <8 x i8> @vrem_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.srem.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.srem.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -201,9 +189,7 @@ define <16 x i8> @vrem_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.srem.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.srem.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -227,9 +213,7 @@ define <16 x i8> @vrem_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.srem.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.srem.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -251,9 +235,7 @@ define <2 x i16> @vrem_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.srem.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.srem.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -277,9 +259,7 @@ define <2 x i16> @vrem_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.srem.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.srem.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -301,9 +281,7 @@ define <4 x i16> @vrem_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.srem.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.srem.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -327,9 +305,7 @@ define <4 x i16> @vrem_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.srem.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.srem.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -351,9 +327,7 @@ define <8 x i16> @vrem_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.srem.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.srem.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -377,9 +351,7 @@ define <8 x i16> @vrem_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.srem.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.srem.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -401,9 +373,7 @@ define <16 x i16> @vrem_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.srem.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.srem.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -427,9 +397,7 @@ define <16 x i16> @vrem_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.srem.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.srem.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -451,9 +419,7 @@ define <2 x i32> @vrem_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.srem.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.srem.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -477,9 +443,7 @@ define <2 x i32> @vrem_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.srem.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.srem.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -501,9 +465,7 @@ define <4 x i32> @vrem_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -527,9 +489,7 @@ define <4 x i32> @vrem_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -551,9 +511,7 @@ define <8 x i32> @vrem_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -577,9 +535,7 @@ define <8 x i32> @vrem_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.srem.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -601,9 +557,7 @@ define <16 x i32> @vrem_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.srem.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.srem.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -627,9 +581,7 @@ define <16 x i32> @vrem_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.srem.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.srem.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -651,9 +603,7 @@ define <2 x i64> @vrem_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.srem.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.srem.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -705,9 +655,7 @@ define <2 x i64> @vrem_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.srem.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.srem.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -729,9 +677,7 @@ define <4 x i64> @vrem_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.srem.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.srem.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -783,9 +729,7 @@ define <4 x i64> @vrem_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.srem.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.srem.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -807,9 +751,7 @@ define <8 x i64> @vrem_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.srem.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.srem.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -861,9 +803,7 @@ define <8 x i64> @vrem_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.srem.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.srem.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -885,9 +825,7 @@ define <16 x i64> @vrem_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.srem.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.srem.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -939,8 +877,6 @@ define <16 x i64> @vrem_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.srem.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.srem.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll index a329e73e5e86e..ee11307bddc88 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll @@ -38,9 +38,7 @@ define <2 x i8> @vremu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.urem.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.urem.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -64,9 +62,7 @@ define <2 x i8> @vremu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.urem.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.urem.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -88,9 +84,7 @@ define <4 x i8> @vremu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.urem.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.urem.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -114,9 +108,7 @@ define <4 x i8> @vremu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.urem.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.urem.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -150,9 +142,7 @@ define <8 x i8> @vremu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.urem.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.urem.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -176,9 +166,7 @@ define <8 x i8> @vremu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.urem.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.urem.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -200,9 +188,7 @@ define <16 x i8> @vremu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.urem.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.urem.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -226,9 +212,7 @@ define <16 x i8> @vremu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.urem.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.urem.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -250,9 +234,7 @@ define <2 x i16> @vremu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.urem.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.urem.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -276,9 +258,7 @@ define <2 x i16> @vremu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.urem.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.urem.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -300,9 +280,7 @@ define <4 x i16> @vremu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.urem.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.urem.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -326,9 +304,7 @@ define <4 x i16> @vremu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.urem.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.urem.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -350,9 +326,7 @@ define <8 x i16> @vremu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.urem.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.urem.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -376,9 +350,7 @@ define <8 x i16> @vremu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.urem.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.urem.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -400,9 +372,7 @@ define <16 x i16> @vremu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.urem.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.urem.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -426,9 +396,7 @@ define <16 x i16> @vremu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.urem.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.urem.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -450,9 +418,7 @@ define <2 x i32> @vremu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.urem.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.urem.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -476,9 +442,7 @@ define <2 x i32> @vremu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.urem.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.urem.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -500,9 +464,7 @@ define <4 x i32> @vremu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -526,9 +488,7 @@ define <4 x i32> @vremu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -550,9 +510,7 @@ define <8 x i32> @vremu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -576,9 +534,7 @@ define <8 x i32> @vremu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.urem.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -600,9 +556,7 @@ define <16 x i32> @vremu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.urem.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.urem.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -626,9 +580,7 @@ define <16 x i32> @vremu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.urem.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.urem.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -650,9 +602,7 @@ define <2 x i64> @vremu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.urem.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.urem.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -704,9 +654,7 @@ define <2 x i64> @vremu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.urem.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.urem.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -728,9 +676,7 @@ define <4 x i64> @vremu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.urem.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.urem.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -782,9 +728,7 @@ define <4 x i64> @vremu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.urem.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.urem.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -806,9 +750,7 @@ define <8 x i64> @vremu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.urem.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.urem.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -860,9 +802,7 @@ define <8 x i64> @vremu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.urem.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.urem.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -884,9 +824,7 @@ define <16 x i64> @vremu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.urem.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.urem.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -938,8 +876,6 @@ define <16 x i64> @vremu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.urem.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.urem.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll index c7c757efc1bad..367c56caf813d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll @@ -66,7 +66,7 @@ define <1 x i8> @vror_vi_v1i8(<1 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <1 x i8> @llvm.fshr.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> shufflevector(<1 x i8> insertelement(<1 x i8> poison, i8 1, i32 0), <1 x i8> poison, <1 x i32> zeroinitializer)) + %x = call <1 x i8> @llvm.fshr.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> splat (i8 1)) ret <1 x i8> %x } @@ -84,7 +84,7 @@ define <1 x i8> @vror_vi_rotl_v1i8(<1 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 7 ; CHECK-ZVKB-NEXT: ret - %x = call <1 x i8> @llvm.fshl.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> shufflevector(<1 x i8> insertelement(<1 x i8> poison, i8 1, i32 0), <1 x i8> poison, <1 x i32> zeroinitializer)) + %x = call <1 x i8> @llvm.fshl.v1i8(<1 x i8> %a, <1 x i8> %a, <1 x i8> splat (i8 1)) ret <1 x i8> %x } @@ -150,7 +150,7 @@ define <2 x i8> @vror_vi_v2i8(<2 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> shufflevector(<2 x i8> insertelement(<2 x i8> poison, i8 1, i32 0), <2 x i8> poison, <2 x i32> zeroinitializer)) + %x = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> splat (i8 1)) ret <2 x i8> %x } @@ -168,7 +168,7 @@ define <2 x i8> @vror_vi_rotl_v2i8(<2 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 7 ; CHECK-ZVKB-NEXT: ret - %x = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> shufflevector(<2 x i8> insertelement(<2 x i8> poison, i8 1, i32 0), <2 x i8> poison, <2 x i32> zeroinitializer)) + %x = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %a, <2 x i8> %a, <2 x i8> splat (i8 1)) ret <2 x i8> %x } @@ -234,7 +234,7 @@ define <4 x i8> @vror_vi_v4i8(<4 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> shufflevector(<4 x i8> insertelement(<4 x i8> poison, i8 1, i32 0), <4 x i8> poison, <4 x i32> zeroinitializer)) + %x = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> splat (i8 1)) ret <4 x i8> %x } @@ -252,7 +252,7 @@ define <4 x i8> @vror_vi_rotl_v4i8(<4 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 7 ; CHECK-ZVKB-NEXT: ret - %x = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> shufflevector(<4 x i8> insertelement(<4 x i8> poison, i8 1, i32 0), <4 x i8> poison, <4 x i32> zeroinitializer)) + %x = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %a, <4 x i8> %a, <4 x i8> splat (i8 1)) ret <4 x i8> %x } @@ -318,7 +318,7 @@ define <8 x i8> @vror_vi_v8i8(<8 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <8 x i8> @llvm.fshr.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> shufflevector(<8 x i8> insertelement(<8 x i8> poison, i8 1, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)) + %x = call <8 x i8> @llvm.fshr.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> splat (i8 1)) ret <8 x i8> %x } @@ -336,7 +336,7 @@ define <8 x i8> @vror_vi_rotl_v8i8(<8 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 7 ; CHECK-ZVKB-NEXT: ret - %x = call <8 x i8> @llvm.fshl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> shufflevector(<8 x i8> insertelement(<8 x i8> poison, i8 1, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)) + %x = call <8 x i8> @llvm.fshl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> splat (i8 1)) ret <8 x i8> %x } @@ -402,7 +402,7 @@ define <16 x i8> @vror_vi_v16i8(<16 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> shufflevector(<16 x i8> insertelement(<16 x i8> poison, i8 1, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)) + %x = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> splat (i8 1)) ret <16 x i8> %x } @@ -420,7 +420,7 @@ define <16 x i8> @vror_vi_rotl_v16i8(<16 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 7 ; CHECK-ZVKB-NEXT: ret - %x = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> shufflevector(<16 x i8> insertelement(<16 x i8> poison, i8 1, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)) + %x = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> splat (i8 1)) ret <16 x i8> %x } @@ -492,7 +492,7 @@ define <32 x i8> @vror_vi_v32i8(<32 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> shufflevector(<32 x i8> insertelement(<32 x i8> poison, i8 1, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)) + %x = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> splat (i8 1)) ret <32 x i8> %x } @@ -512,7 +512,7 @@ define <32 x i8> @vror_vi_rotl_v32i8(<32 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 7 ; CHECK-ZVKB-NEXT: ret - %x = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> shufflevector(<32 x i8> insertelement(<32 x i8> poison, i8 1, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)) + %x = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> splat (i8 1)) ret <32 x i8> %x } @@ -584,7 +584,7 @@ define <64 x i8> @vror_vi_v64i8(<64 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> shufflevector(<64 x i8> insertelement(<64 x i8> poison, i8 1, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)) + %x = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> splat (i8 1)) ret <64 x i8> %x } @@ -604,7 +604,7 @@ define <64 x i8> @vror_vi_rotl_v64i8(<64 x i8> %a) { ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 7 ; CHECK-ZVKB-NEXT: ret - %x = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> shufflevector(<64 x i8> insertelement(<64 x i8> poison, i8 1, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)) + %x = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> splat (i8 1)) ret <64 x i8> %x } @@ -670,7 +670,7 @@ define <1 x i16> @vror_vi_v1i16(<1 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <1 x i16> @llvm.fshr.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> shufflevector(<1 x i16> insertelement(<1 x i16> poison, i16 1, i32 0), <1 x i16> poison, <1 x i32> zeroinitializer)) + %x = call <1 x i16> @llvm.fshr.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> splat (i16 1)) ret <1 x i16> %x } @@ -688,7 +688,7 @@ define <1 x i16> @vror_vi_rotl_v1i16(<1 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 15 ; CHECK-ZVKB-NEXT: ret - %x = call <1 x i16> @llvm.fshl.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> shufflevector(<1 x i16> insertelement(<1 x i16> poison, i16 1, i32 0), <1 x i16> poison, <1 x i32> zeroinitializer)) + %x = call <1 x i16> @llvm.fshl.v1i16(<1 x i16> %a, <1 x i16> %a, <1 x i16> splat (i16 1)) ret <1 x i16> %x } @@ -754,7 +754,7 @@ define <2 x i16> @vror_vi_v2i16(<2 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> shufflevector(<2 x i16> insertelement(<2 x i16> poison, i16 1, i32 0), <2 x i16> poison, <2 x i32> zeroinitializer)) + %x = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> splat (i16 1)) ret <2 x i16> %x } @@ -772,7 +772,7 @@ define <2 x i16> @vror_vi_rotl_v2i16(<2 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 15 ; CHECK-ZVKB-NEXT: ret - %x = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> shufflevector(<2 x i16> insertelement(<2 x i16> poison, i16 1, i32 0), <2 x i16> poison, <2 x i32> zeroinitializer)) + %x = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %a, <2 x i16> %a, <2 x i16> splat (i16 1)) ret <2 x i16> %x } @@ -838,7 +838,7 @@ define <4 x i16> @vror_vi_v4i16(<4 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> shufflevector(<4 x i16> insertelement(<4 x i16> poison, i16 1, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)) + %x = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> splat (i16 1)) ret <4 x i16> %x } @@ -856,7 +856,7 @@ define <4 x i16> @vror_vi_rotl_v4i16(<4 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 15 ; CHECK-ZVKB-NEXT: ret - %x = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> shufflevector(<4 x i16> insertelement(<4 x i16> poison, i16 1, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)) + %x = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %a, <4 x i16> %a, <4 x i16> splat (i16 1)) ret <4 x i16> %x } @@ -922,7 +922,7 @@ define <8 x i16> @vror_vi_v8i16(<8 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> shufflevector(<8 x i16> insertelement(<8 x i16> poison, i16 1, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)) + %x = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> splat (i16 1)) ret <8 x i16> %x } @@ -940,7 +940,7 @@ define <8 x i16> @vror_vi_rotl_v8i16(<8 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 15 ; CHECK-ZVKB-NEXT: ret - %x = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> shufflevector(<8 x i16> insertelement(<8 x i16> poison, i16 1, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)) + %x = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %a, <8 x i16> splat (i16 1)) ret <8 x i16> %x } @@ -1006,7 +1006,7 @@ define <16 x i16> @vror_vi_v16i16(<16 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> shufflevector(<16 x i16> insertelement(<16 x i16> poison, i16 1, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)) + %x = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> splat (i16 1)) ret <16 x i16> %x } @@ -1024,7 +1024,7 @@ define <16 x i16> @vror_vi_rotl_v16i16(<16 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 15 ; CHECK-ZVKB-NEXT: ret - %x = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> shufflevector(<16 x i16> insertelement(<16 x i16> poison, i16 1, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)) + %x = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a, <16 x i16> %a, <16 x i16> splat (i16 1)) ret <16 x i16> %x } @@ -1096,7 +1096,7 @@ define <32 x i16> @vror_vi_v32i16(<32 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> shufflevector(<32 x i16> insertelement(<32 x i16> poison, i16 1, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)) + %x = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> splat (i16 1)) ret <32 x i16> %x } @@ -1116,7 +1116,7 @@ define <32 x i16> @vror_vi_rotl_v32i16(<32 x i16> %a) { ; CHECK-ZVKB-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 15 ; CHECK-ZVKB-NEXT: ret - %x = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> shufflevector(<32 x i16> insertelement(<32 x i16> poison, i16 1, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)) + %x = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a, <32 x i16> %a, <32 x i16> splat (i16 1)) ret <32 x i16> %x } @@ -1184,7 +1184,7 @@ define <1 x i32> @vror_vi_v1i32(<1 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> shufflevector(<1 x i32> insertelement(<1 x i32> poison, i32 1, i32 0), <1 x i32> poison, <1 x i32> zeroinitializer)) + %x = call <1 x i32> @llvm.fshr.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> splat (i32 1)) ret <1 x i32> %x } @@ -1202,7 +1202,7 @@ define <1 x i32> @vror_vi_rotl_v1i32(<1 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 31 ; CHECK-ZVKB-NEXT: ret - %x = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> shufflevector(<1 x i32> insertelement(<1 x i32> poison, i32 1, i32 0), <1 x i32> poison, <1 x i32> zeroinitializer)) + %x = call <1 x i32> @llvm.fshl.v1i32(<1 x i32> %a, <1 x i32> %a, <1 x i32> splat (i32 1)) ret <1 x i32> %x } @@ -1270,7 +1270,7 @@ define <2 x i32> @vror_vi_v2i32(<2 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> shufflevector(<2 x i32> insertelement(<2 x i32> poison, i32 1, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)) + %x = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> splat (i32 1)) ret <2 x i32> %x } @@ -1288,7 +1288,7 @@ define <2 x i32> @vror_vi_rotl_v2i32(<2 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 31 ; CHECK-ZVKB-NEXT: ret - %x = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> shufflevector(<2 x i32> insertelement(<2 x i32> poison, i32 1, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)) + %x = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %a, <2 x i32> %a, <2 x i32> splat (i32 1)) ret <2 x i32> %x } @@ -1356,7 +1356,7 @@ define <4 x i32> @vror_vi_v4i32(<4 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> shufflevector(<4 x i32> insertelement(<4 x i32> poison, i32 1, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)) + %x = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> splat (i32 1)) ret <4 x i32> %x } @@ -1374,7 +1374,7 @@ define <4 x i32> @vror_vi_rotl_v4i32(<4 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 31 ; CHECK-ZVKB-NEXT: ret - %x = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> shufflevector(<4 x i32> insertelement(<4 x i32> poison, i32 1, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)) + %x = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %a, <4 x i32> splat (i32 1)) ret <4 x i32> %x } @@ -1442,7 +1442,7 @@ define <8 x i32> @vror_vi_v8i32(<8 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> shufflevector(<8 x i32> insertelement(<8 x i32> poison, i32 1, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)) + %x = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> splat (i32 1)) ret <8 x i32> %x } @@ -1460,7 +1460,7 @@ define <8 x i32> @vror_vi_rotl_v8i32(<8 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 31 ; CHECK-ZVKB-NEXT: ret - %x = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> shufflevector(<8 x i32> insertelement(<8 x i32> poison, i32 1, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)) + %x = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a, <8 x i32> %a, <8 x i32> splat (i32 1)) ret <8 x i32> %x } @@ -1528,7 +1528,7 @@ define <16 x i32> @vror_vi_v16i32(<16 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> shufflevector(<16 x i32> insertelement(<16 x i32> poison, i32 1, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)) + %x = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> splat (i32 1)) ret <16 x i32> %x } @@ -1546,7 +1546,7 @@ define <16 x i32> @vror_vi_rotl_v16i32(<16 x i32> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 31 ; CHECK-ZVKB-NEXT: ret - %x = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> shufflevector(<16 x i32> insertelement(<16 x i32> poison, i32 1, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)) + %x = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a, <16 x i32> %a, <16 x i32> splat (i32 1)) ret <16 x i32> %x } @@ -1629,7 +1629,7 @@ define <1 x i64> @vror_vi_v1i64(<1 x i64> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <1 x i64> @llvm.fshr.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> shufflevector(<1 x i64> insertelement(<1 x i64> poison, i64 1, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)) + %x = call <1 x i64> @llvm.fshr.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> splat (i64 1)) ret <1 x i64> %x } @@ -1662,7 +1662,7 @@ define <1 x i64> @vror_vi_rotl_v1i64(<1 x i64> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 63 ; CHECK-ZVKB-NEXT: ret - %x = call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> shufflevector(<1 x i64> insertelement(<1 x i64> poison, i64 1, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)) + %x = call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %a, <1 x i64> %a, <1 x i64> splat (i64 1)) ret <1 x i64> %x } @@ -1745,7 +1745,7 @@ define <2 x i64> @vror_vi_v2i64(<2 x i64> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> shufflevector(<2 x i64> insertelement(<2 x i64> poison, i64 1, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)) + %x = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> splat (i64 1)) ret <2 x i64> %x } @@ -1778,7 +1778,7 @@ define <2 x i64> @vror_vi_rotl_v2i64(<2 x i64> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 63 ; CHECK-ZVKB-NEXT: ret - %x = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> shufflevector(<2 x i64> insertelement(<2 x i64> poison, i64 1, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)) + %x = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %a, <2 x i64> splat (i64 1)) ret <2 x i64> %x } @@ -1861,7 +1861,7 @@ define <4 x i64> @vror_vi_v4i64(<4 x i64> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> shufflevector(<4 x i64> insertelement(<4 x i64> poison, i64 1, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)) + %x = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> splat (i64 1)) ret <4 x i64> %x } @@ -1894,7 +1894,7 @@ define <4 x i64> @vror_vi_rotl_v4i64(<4 x i64> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 63 ; CHECK-ZVKB-NEXT: ret - %x = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> shufflevector(<4 x i64> insertelement(<4 x i64> poison, i64 1, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)) + %x = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %a, <4 x i64> splat (i64 1)) ret <4 x i64> %x } @@ -1977,7 +1977,7 @@ define <8 x i64> @vror_vi_v8i64(<8 x i64> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 1 ; CHECK-ZVKB-NEXT: ret - %x = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> shufflevector(<8 x i64> insertelement(<8 x i64> poison, i64 1, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)) + %x = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> splat (i64 1)) ret <8 x i64> %x } @@ -2010,6 +2010,6 @@ define <8 x i64> @vror_vi_rotl_v8i64(<8 x i64> %a) { ; CHECK-ZVKB-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-ZVKB-NEXT: vror.vi v8, v8, 63 ; CHECK-ZVKB-NEXT: ret - %x = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> shufflevector(<8 x i64> insertelement(<8 x i64> poison, i64 1, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)) + %x = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a, <8 x i64> %a, <8 x i64> splat (i64 1)) ret <8 x i64> %x } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll index fe433b80a87fb..563482b88e8bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll @@ -26,9 +26,7 @@ define <2 x i8> @vrsub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -38,9 +36,7 @@ define <2 x i8> @vrsub_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 2, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> splat (i8 2), <2 x i8> %va, <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -50,11 +46,7 @@ define <2 x i8> @vrsub_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 2, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> splat (i8 2), <2 x i8> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -80,9 +72,7 @@ define <4 x i8> @vrsub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -92,9 +82,7 @@ define <4 x i8> @vrsub_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 2, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> splat (i8 2), <4 x i8> %va, <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -104,11 +92,7 @@ define <4 x i8> @vrsub_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 2, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> splat (i8 2), <4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -134,9 +118,7 @@ define <8 x i8> @vrsub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -146,9 +128,7 @@ define <8 x i8> @vrsub_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 2, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> splat (i8 2), <8 x i8> %va, <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -158,11 +138,7 @@ define <8 x i8> @vrsub_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 2, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %vb, <8 x i8> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> splat (i8 2), <8 x i8> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -188,9 +164,7 @@ define <16 x i8> @vrsub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -200,9 +174,7 @@ define <16 x i8> @vrsub_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 2, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> splat (i8 2), <16 x i8> %va, <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -212,11 +184,7 @@ define <16 x i8> @vrsub_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 2, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %vb, <16 x i8> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> splat (i8 2), <16 x i8> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -242,9 +210,7 @@ define <2 x i16> @vrsub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -254,9 +220,7 @@ define <2 x i16> @vrsub_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 2, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> splat (i16 2), <2 x i16> %va, <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -266,11 +230,7 @@ define <2 x i16> @vrsub_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 2, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %vb, <2 x i16> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> splat (i16 2), <2 x i16> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -296,9 +256,7 @@ define <4 x i16> @vrsub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -308,9 +266,7 @@ define <4 x i16> @vrsub_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 2, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> splat (i16 2), <4 x i16> %va, <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -320,11 +276,7 @@ define <4 x i16> @vrsub_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 2, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %vb, <4 x i16> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> splat (i16 2), <4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -350,9 +302,7 @@ define <8 x i16> @vrsub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -362,9 +312,7 @@ define <8 x i16> @vrsub_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 2, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> splat (i16 2), <8 x i16> %va, <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -374,11 +322,7 @@ define <8 x i16> @vrsub_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 2, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> splat (i16 2), <8 x i16> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -404,9 +348,7 @@ define <16 x i16> @vrsub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -416,9 +358,7 @@ define <16 x i16> @vrsub_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 2, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> splat (i16 2), <16 x i16> %va, <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -428,11 +368,7 @@ define <16 x i16> @vrsub_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 2, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %vb, <16 x i16> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> splat (i16 2), <16 x i16> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -458,9 +394,7 @@ define <2 x i32> @vrsub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -470,9 +404,7 @@ define <2 x i32> @vrsub_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 2, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> splat (i32 2), <2 x i32> %va, <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -482,11 +414,7 @@ define <2 x i32> @vrsub_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 2, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %vb, <2 x i32> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> splat (i32 2), <2 x i32> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -512,9 +440,7 @@ define <4 x i32> @vrsub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -524,9 +450,7 @@ define <4 x i32> @vrsub_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 2, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> splat (i32 2), <4 x i32> %va, <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -536,11 +460,7 @@ define <4 x i32> @vrsub_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 2, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %vb, <4 x i32> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> splat (i32 2), <4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -566,9 +486,7 @@ define <8 x i32> @vrsub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -578,9 +496,7 @@ define <8 x i32> @vrsub_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 2, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> splat (i32 2), <8 x i32> %va, <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -590,11 +506,7 @@ define <8 x i32> @vrsub_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 2, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %vb, <8 x i32> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> splat (i32 2), <8 x i32> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -620,9 +532,7 @@ define <16 x i32> @vrsub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -632,9 +542,7 @@ define <16 x i32> @vrsub_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 2, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> splat (i32 2), <16 x i32> %va, <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -644,11 +552,7 @@ define <16 x i32> @vrsub_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 2, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %vb, <16 x i32> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> splat (i32 2), <16 x i32> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -702,9 +606,7 @@ define <2 x i64> @vrsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -714,9 +616,7 @@ define <2 x i64> @vrsub_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 2, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> splat (i64 2), <2 x i64> %va, <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -726,11 +626,7 @@ define <2 x i64> @vrsub_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 2, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %vb, <2 x i64> %va, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> splat (i64 2), <2 x i64> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -784,9 +680,7 @@ define <4 x i64> @vrsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -796,9 +690,7 @@ define <4 x i64> @vrsub_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 2, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> splat (i64 2), <4 x i64> %va, <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -808,11 +700,7 @@ define <4 x i64> @vrsub_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 2, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %vb, <4 x i64> %va, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> splat (i64 2), <4 x i64> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -866,9 +754,7 @@ define <8 x i64> @vrsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -878,9 +764,7 @@ define <8 x i64> @vrsub_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 2, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> splat (i64 2), <8 x i64> %va, <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -890,11 +774,7 @@ define <8 x i64> @vrsub_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 2, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %vb, <8 x i64> %va, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> splat (i64 2), <8 x i64> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -948,9 +828,7 @@ define <16 x i64> @vrsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -960,9 +838,7 @@ define <16 x i64> @vrsub_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 2, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> splat (i64 2), <16 x i64> %va, <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -972,10 +848,6 @@ define <16 x i64> @vrsub_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 2, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %vb, <16 x i64> %va, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> splat (i64 2), <16 x i64> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index d7ed20f4e0986..291629de6dcfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -43,9 +43,7 @@ define <2 x i8> @vsadd_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -69,9 +67,7 @@ define <2 x i8> @vsadd_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -81,9 +77,7 @@ define <2 x i8> @vsadd_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -93,11 +87,7 @@ define <2 x i8> @vsadd_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -119,9 +109,7 @@ define <4 x i8> @vsadd_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -157,9 +145,7 @@ define <4 x i8> @vsadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -169,9 +155,7 @@ define <4 x i8> @vsadd_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -181,11 +165,7 @@ define <4 x i8> @vsadd_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -207,9 +187,7 @@ define <5 x i8> @vsadd_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -233,9 +211,7 @@ define <5 x i8> @vsadd_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -245,9 +221,7 @@ define <5 x i8> @vsadd_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> %m, i32 %evl) ret <5 x i8> %v } @@ -257,11 +231,7 @@ define <5 x i8> @vsadd_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.sadd.sat.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -283,9 +253,7 @@ define <8 x i8> @vsadd_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -309,9 +277,7 @@ define <8 x i8> @vsadd_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -321,9 +287,7 @@ define <8 x i8> @vsadd_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -333,11 +297,7 @@ define <8 x i8> @vsadd_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -359,9 +319,7 @@ define <16 x i8> @vsadd_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -385,9 +343,7 @@ define <16 x i8> @vsadd_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -397,9 +353,7 @@ define <16 x i8> @vsadd_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -409,11 +363,7 @@ define <16 x i8> @vsadd_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -440,9 +390,7 @@ define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) ret <256 x i8> %v } @@ -464,11 +412,7 @@ define <256 x i8> @vsadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v16, v16, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -485,9 +429,7 @@ define <256 x i8> @vsadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) ret <256 x i8> %v } @@ -504,9 +446,7 @@ define <256 x i8> @vsadd_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) ret <256 x i8> %v } @@ -528,9 +468,7 @@ define <2 x i16> @vsadd_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -554,9 +492,7 @@ define <2 x i16> @vsadd_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -566,9 +502,7 @@ define <2 x i16> @vsadd_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -578,11 +512,7 @@ define <2 x i16> @vsadd_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -604,9 +534,7 @@ define <4 x i16> @vsadd_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -630,9 +558,7 @@ define <4 x i16> @vsadd_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -642,9 +568,7 @@ define <4 x i16> @vsadd_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -654,11 +578,7 @@ define <4 x i16> @vsadd_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -680,9 +600,7 @@ define <8 x i16> @vsadd_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -706,9 +624,7 @@ define <8 x i16> @vsadd_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -718,9 +634,7 @@ define <8 x i16> @vsadd_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -730,11 +644,7 @@ define <8 x i16> @vsadd_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -756,9 +666,7 @@ define <16 x i16> @vsadd_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -782,9 +690,7 @@ define <16 x i16> @vsadd_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -794,9 +700,7 @@ define <16 x i16> @vsadd_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -806,11 +710,7 @@ define <16 x i16> @vsadd_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -832,9 +732,7 @@ define <2 x i32> @vsadd_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -858,9 +756,7 @@ define <2 x i32> @vsadd_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -870,9 +766,7 @@ define <2 x i32> @vsadd_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -882,11 +776,7 @@ define <2 x i32> @vsadd_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -908,9 +798,7 @@ define <4 x i32> @vsadd_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -934,9 +822,7 @@ define <4 x i32> @vsadd_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -946,9 +832,7 @@ define <4 x i32> @vsadd_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -958,11 +842,7 @@ define <4 x i32> @vsadd_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -984,9 +864,7 @@ define <8 x i32> @vsadd_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1010,9 +888,7 @@ define <8 x i32> @vsadd_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1022,9 +898,7 @@ define <8 x i32> @vsadd_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -1034,11 +908,7 @@ define <8 x i32> @vsadd_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1060,9 +930,7 @@ define <16 x i32> @vsadd_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1086,9 +954,7 @@ define <16 x i32> @vsadd_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1098,9 +964,7 @@ define <16 x i32> @vsadd_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -1110,11 +974,7 @@ define <16 x i32> @vsadd_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1136,9 +996,7 @@ define <2 x i64> @vsadd_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1190,9 +1048,7 @@ define <2 x i64> @vsadd_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1202,9 +1058,7 @@ define <2 x i64> @vsadd_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1214,11 +1068,7 @@ define <2 x i64> @vsadd_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1240,9 +1090,7 @@ define <4 x i64> @vsadd_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1294,9 +1142,7 @@ define <4 x i64> @vsadd_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1306,9 +1152,7 @@ define <4 x i64> @vsadd_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1318,11 +1162,7 @@ define <4 x i64> @vsadd_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1344,9 +1184,7 @@ define <8 x i64> @vsadd_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1398,9 +1236,7 @@ define <8 x i64> @vsadd_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1410,9 +1246,7 @@ define <8 x i64> @vsadd_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1422,11 +1256,7 @@ define <8 x i64> @vsadd_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1448,9 +1278,7 @@ define <16 x i64> @vsadd_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1502,9 +1330,7 @@ define <16 x i64> @vsadd_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1514,9 +1340,7 @@ define <16 x i64> @vsadd_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1526,11 +1350,7 @@ define <16 x i64> @vsadd_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1542,16 +1362,16 @@ define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-LABEL: vsadd_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1583,24 +1403,22 @@ define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vsadd_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsadd.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1628,11 +1446,7 @@ define <32 x i64> @vsadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsadd.vi v16, v16, -1 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1663,9 +1477,7 @@ define <32 x i64> @vsadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } @@ -1694,8 +1506,6 @@ define <32 x i64> @vsadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vsadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + %v = call <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll index 8e655a7faf3e8..741699289e027 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll @@ -34,9 +34,7 @@ define <2 x i8> @sadd_v2i8_vi(<2 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 5, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + %v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 5)) ret <2 x i8> %v } @@ -70,9 +68,7 @@ define <4 x i8> @sadd_v4i8_vi(<4 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 5, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + %v = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 5)) ret <4 x i8> %v } @@ -106,9 +102,7 @@ define <8 x i8> @sadd_v8i8_vi(<8 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 5, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + %v = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 5)) ret <8 x i8> %v } @@ -142,9 +136,7 @@ define <16 x i8> @sadd_v16i8_vi(<16 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 5, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + %v = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 5)) ret <16 x i8> %v } @@ -178,9 +170,7 @@ define <2 x i16> @sadd_v2i16_vi(<2 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 5, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + %v = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 5)) ret <2 x i16> %v } @@ -214,9 +204,7 @@ define <4 x i16> @sadd_v4i16_vi(<4 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 5, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + %v = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 5)) ret <4 x i16> %v } @@ -250,9 +238,7 @@ define <8 x i16> @sadd_v8i16_vi(<8 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 5, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + %v = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 5)) ret <8 x i16> %v } @@ -286,9 +272,7 @@ define <16 x i16> @sadd_v16i16_vi(<16 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 5, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + %v = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 5)) ret <16 x i16> %v } @@ -334,9 +318,7 @@ define <2 x i32> @sadd_v2i32_vi(<2 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 5, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + %v = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 5)) ret <2 x i32> %v } @@ -370,9 +352,7 @@ define <4 x i32> @sadd_v4i32_vi(<4 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 5, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + %v = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 5)) ret <4 x i32> %v } @@ -406,9 +386,7 @@ define <8 x i32> @sadd_v8i32_vi(<8 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 5, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + %v = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 5)) ret <8 x i32> %v } @@ -442,9 +420,7 @@ define <16 x i32> @sadd_v16i32_vi(<16 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 5, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + %v = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 5)) ret <16 x i32> %v } @@ -491,9 +467,7 @@ define <2 x i64> @sadd_v2i64_vi(<2 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 5, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + %v = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 5)) ret <2 x i64> %v } @@ -540,9 +514,7 @@ define <4 x i64> @sadd_v4i64_vi(<4 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 5, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + %v = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 5)) ret <4 x i64> %v } @@ -589,9 +561,7 @@ define <8 x i64> @sadd_v8i64_vi(<8 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 5, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + %v = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 5)) ret <8 x i64> %v } @@ -638,8 +608,6 @@ define <16 x i64> @sadd_v16i64_vi(<16 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 5, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + %v = call <16 x i64> @llvm.sadd.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 5)) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index ea248010ef09a..d38ee1148e894 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -39,9 +39,7 @@ define <2 x i8> @vsaddu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -65,9 +63,7 @@ define <2 x i8> @vsaddu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -77,9 +73,7 @@ define <2 x i8> @vsaddu_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -89,11 +83,7 @@ define <2 x i8> @vsaddu_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -115,9 +105,7 @@ define <4 x i8> @vsaddu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -153,9 +141,7 @@ define <4 x i8> @vsaddu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -165,9 +151,7 @@ define <4 x i8> @vsaddu_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -177,11 +161,7 @@ define <4 x i8> @vsaddu_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -203,9 +183,7 @@ define <5 x i8> @vsaddu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -229,9 +207,7 @@ define <5 x i8> @vsaddu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -241,9 +217,7 @@ define <5 x i8> @vsaddu_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> %m, i32 %evl) ret <5 x i8> %v } @@ -253,11 +227,7 @@ define <5 x i8> @vsaddu_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.uadd.sat.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -279,9 +249,7 @@ define <8 x i8> @vsaddu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -305,9 +273,7 @@ define <8 x i8> @vsaddu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -317,9 +283,7 @@ define <8 x i8> @vsaddu_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -329,11 +293,7 @@ define <8 x i8> @vsaddu_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -355,9 +315,7 @@ define <16 x i8> @vsaddu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -381,9 +339,7 @@ define <16 x i8> @vsaddu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -393,9 +349,7 @@ define <16 x i8> @vsaddu_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -405,11 +359,7 @@ define <16 x i8> @vsaddu_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -436,9 +386,7 @@ define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) ret <256 x i8> %v } @@ -460,11 +408,7 @@ define <256 x i8> @vsaddu_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v16, v16, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -481,9 +425,7 @@ define <256 x i8> @vsaddu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) ret <256 x i8> %v } @@ -500,9 +442,7 @@ define <256 x i8> @vsaddu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) ret <256 x i8> %v } @@ -524,9 +464,7 @@ define <2 x i16> @vsaddu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -550,9 +488,7 @@ define <2 x i16> @vsaddu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -562,9 +498,7 @@ define <2 x i16> @vsaddu_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -574,11 +508,7 @@ define <2 x i16> @vsaddu_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -600,9 +530,7 @@ define <4 x i16> @vsaddu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -626,9 +554,7 @@ define <4 x i16> @vsaddu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -638,9 +564,7 @@ define <4 x i16> @vsaddu_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -650,11 +574,7 @@ define <4 x i16> @vsaddu_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -676,9 +596,7 @@ define <8 x i16> @vsaddu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -702,9 +620,7 @@ define <8 x i16> @vsaddu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -714,9 +630,7 @@ define <8 x i16> @vsaddu_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -726,11 +640,7 @@ define <8 x i16> @vsaddu_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -752,9 +662,7 @@ define <16 x i16> @vsaddu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -778,9 +686,7 @@ define <16 x i16> @vsaddu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -790,9 +696,7 @@ define <16 x i16> @vsaddu_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -802,11 +706,7 @@ define <16 x i16> @vsaddu_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -828,9 +728,7 @@ define <2 x i32> @vsaddu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -854,9 +752,7 @@ define <2 x i32> @vsaddu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -866,9 +762,7 @@ define <2 x i32> @vsaddu_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -878,11 +772,7 @@ define <2 x i32> @vsaddu_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -904,9 +794,7 @@ define <4 x i32> @vsaddu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -930,9 +818,7 @@ define <4 x i32> @vsaddu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -942,9 +828,7 @@ define <4 x i32> @vsaddu_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -954,11 +838,7 @@ define <4 x i32> @vsaddu_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -980,9 +860,7 @@ define <8 x i32> @vsaddu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1006,9 +884,7 @@ define <8 x i32> @vsaddu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1018,9 +894,7 @@ define <8 x i32> @vsaddu_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -1030,11 +904,7 @@ define <8 x i32> @vsaddu_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1056,9 +926,7 @@ define <16 x i32> @vsaddu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1082,9 +950,7 @@ define <16 x i32> @vsaddu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1094,9 +960,7 @@ define <16 x i32> @vsaddu_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -1106,11 +970,7 @@ define <16 x i32> @vsaddu_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1132,9 +992,7 @@ define <2 x i64> @vsaddu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1186,9 +1044,7 @@ define <2 x i64> @vsaddu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %e ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1198,9 +1054,7 @@ define <2 x i64> @vsaddu_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1210,11 +1064,7 @@ define <2 x i64> @vsaddu_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1236,9 +1086,7 @@ define <4 x i64> @vsaddu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1290,9 +1138,7 @@ define <4 x i64> @vsaddu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %e ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1302,9 +1148,7 @@ define <4 x i64> @vsaddu_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1314,11 +1158,7 @@ define <4 x i64> @vsaddu_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1340,9 +1180,7 @@ define <8 x i64> @vsaddu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1394,9 +1232,7 @@ define <8 x i64> @vsaddu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %e ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1406,9 +1242,7 @@ define <8 x i64> @vsaddu_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1418,11 +1252,7 @@ define <8 x i64> @vsaddu_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1444,9 +1274,7 @@ define <16 x i64> @vsaddu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1498,9 +1326,7 @@ define <16 x i64> @vsaddu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1510,9 +1336,7 @@ define <16 x i64> @vsaddu_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1522,11 +1346,7 @@ define <16 x i64> @vsaddu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1538,16 +1358,16 @@ define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV32-LABEL: vsaddu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1579,24 +1399,22 @@ define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vsaddu_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsaddu.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1624,11 +1442,7 @@ define <32 x i64> @vsaddu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsaddu.vi v16, v16, -1 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1659,9 +1473,7 @@ define <32 x i64> @vsaddu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } @@ -1690,8 +1502,6 @@ define <32 x i64> @vsaddu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vsaddu.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + %v = call <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll index 5764f51c0ff10..7b2cab294aa49 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll @@ -34,9 +34,7 @@ define <2 x i8> @uadd_v2i8_vi(<2 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 8, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + %v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 8)) ret <2 x i8> %v } @@ -70,9 +68,7 @@ define <4 x i8> @uadd_v4i8_vi(<4 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 8, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + %v = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 8)) ret <4 x i8> %v } @@ -106,9 +102,7 @@ define <8 x i8> @uadd_v8i8_vi(<8 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 8, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + %v = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 8)) ret <8 x i8> %v } @@ -142,9 +136,7 @@ define <16 x i8> @uadd_v16i8_vi(<16 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 8, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + %v = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 8)) ret <16 x i8> %v } @@ -178,9 +170,7 @@ define <2 x i16> @uadd_v2i16_vi(<2 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 8, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + %v = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 8)) ret <2 x i16> %v } @@ -214,9 +204,7 @@ define <4 x i16> @uadd_v4i16_vi(<4 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 8, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + %v = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 8)) ret <4 x i16> %v } @@ -250,9 +238,7 @@ define <8 x i16> @uadd_v8i16_vi(<8 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 8, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + %v = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 8)) ret <8 x i16> %v } @@ -286,9 +272,7 @@ define <16 x i16> @uadd_v16i16_vi(<16 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 8, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + %v = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 8)) ret <16 x i16> %v } @@ -334,9 +318,7 @@ define <2 x i32> @uadd_v2i32_vi(<2 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 8, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + %v = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 8)) ret <2 x i32> %v } @@ -370,9 +352,7 @@ define <4 x i32> @uadd_v4i32_vi(<4 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 8, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + %v = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 8)) ret <4 x i32> %v } @@ -406,9 +386,7 @@ define <8 x i32> @uadd_v8i32_vi(<8 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 8, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + %v = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 8)) ret <8 x i32> %v } @@ -442,9 +420,7 @@ define <16 x i32> @uadd_v16i32_vi(<16 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 8, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + %v = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 8)) ret <16 x i32> %v } @@ -491,9 +467,7 @@ define <2 x i64> @uadd_v2i64_vi(<2 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 8, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + %v = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 8)) ret <2 x i64> %v } @@ -540,9 +514,7 @@ define <4 x i64> @uadd_v4i64_vi(<4 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 8, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + %v = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 8)) ret <4 x i64> %v } @@ -589,9 +561,7 @@ define <8 x i64> @uadd_v8i64_vi(<8 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 8, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + %v = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 8)) ret <8 x i64> %v } @@ -638,8 +608,6 @@ define <16 x i64> @uadd_v16i64_vi(<16 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 8, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + %v = call <16 x i64> @llvm.uadd.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 8)) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index ead41b0717ff1..7dcd4c4199827 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -204,10 +204,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret %vb = load <6 x i32>, ptr %b - %a = insertelement <6 x i32> poison, i32 -1, i32 0 - %va = shufflevector <6 x i32> %a, <6 x i32> poison, <6 x i32> zeroinitializer %vcc = load <6 x i1>, ptr %cc - %vsel = select <6 x i1> %vcc, <6 x i32> %va, <6 x i32> %vb + %vsel = select <6 x i1> %vcc, <6 x i32> splat (i32 -1), <6 x i32> %vb store <6 x i32> %vsel, ptr %z ret void } @@ -415,10 +413,8 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vse32.v v8, (a2) ; RV64-NEXT: ret %vb = load <6 x float>, ptr %b - %a = insertelement <6 x float> poison, float 0.0, i32 0 - %va = shufflevector <6 x float> %a, <6 x float> poison, <6 x i32> zeroinitializer %vcc = load <6 x i1>, ptr %cc - %vsel = select <6 x i1> %vcc, <6 x float> %va, <6 x float> %vb + %vsel = select <6 x i1> %vcc, <6 x float> splat (float 0.0), <6 x float> %vb store <6 x float> %vsel, ptr %z ret void } @@ -468,10 +464,8 @@ define void @vselect_vi_v8i32(ptr %b, ptr %cc, ptr %z) { ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret %vb = load <8 x i32>, ptr %b - %a = insertelement <8 x i32> poison, i32 -1, i32 0 - %va = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> zeroinitializer %vcc = load <8 x i1>, ptr %cc - %vsel = select <8 x i1> %vcc, <8 x i32> %va, <8 x i32> %vb + %vsel = select <8 x i1> %vcc, <8 x i32> splat (i32 -1), <8 x i32> %vb store <8 x i32> %vsel, ptr %z ret void } @@ -521,10 +515,8 @@ define void @vselect_vfpzero_v8f32(ptr %b, ptr %cc, ptr %z) { ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret %vb = load <8 x float>, ptr %b - %a = insertelement <8 x float> poison, float 0.0, i32 0 - %va = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> zeroinitializer %vcc = load <8 x i1>, ptr %cc - %vsel = select <8 x i1> %vcc, <8 x float> %va, <8 x float> %vb + %vsel = select <8 x i1> %vcc, <8 x float> splat (float 0.0), <8 x float> %vb store <8 x float> %vsel, ptr %z ret void } @@ -574,10 +566,8 @@ define void @vselect_vi_v16i16(ptr %b, ptr %cc, ptr %z) { ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret %vb = load <16 x i16>, ptr %b - %a = insertelement <16 x i16> poison, i16 4, i32 0 - %va = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> zeroinitializer %vcc = load <16 x i1>, ptr %cc - %vsel = select <16 x i1> %vcc, <16 x i16> %va, <16 x i16> %vb + %vsel = select <16 x i1> %vcc, <16 x i16> splat (i16 4), <16 x i16> %vb store <16 x i16> %vsel, ptr %z ret void } @@ -630,10 +620,8 @@ define void @vselect_vfpzero_v32f16(ptr %b, ptr %cc, ptr %z) { ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret %vb = load <32 x half>, ptr %b - %a = insertelement <32 x half> poison, half 0.0, i32 0 - %va = shufflevector <32 x half> %a, <32 x half> poison, <32 x i32> zeroinitializer %vcc = load <32 x i1>, ptr %cc - %vsel = select <32 x i1> %vcc, <32 x half> %va, <32 x half> %vb + %vsel = select <32 x i1> %vcc, <32 x half> splat (half 0.0), <32 x half> %vb store <32 x half> %vsel, ptr %z ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll index bbbd6d69e37c3..c4b7c1f2f19f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll @@ -37,9 +37,7 @@ define <2 x i8> @vsll_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.shl.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.shl.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -63,9 +61,7 @@ define <2 x i8> @vsll_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.shl.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.shl.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -75,9 +71,7 @@ define <2 x i8> @vsll_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 3, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.shl.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.shl.v2i8(<2 x i8> %va, <2 x i8> splat (i8 3), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -87,11 +81,7 @@ define <2 x i8> @vsll_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 3, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.shl.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.shl.v2i8(<2 x i8> %va, <2 x i8> splat (i8 3), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -125,9 +115,7 @@ define <4 x i8> @vsll_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.shl.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.shl.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -151,9 +139,7 @@ define <4 x i8> @vsll_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.shl.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.shl.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -163,9 +149,7 @@ define <4 x i8> @vsll_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 3, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.shl.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.shl.v4i8(<4 x i8> %va, <4 x i8> splat (i8 3), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -175,11 +159,7 @@ define <4 x i8> @vsll_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 3, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.shl.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.shl.v4i8(<4 x i8> %va, <4 x i8> splat (i8 3), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -201,9 +181,7 @@ define <8 x i8> @vsll_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.shl.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.shl.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -227,9 +205,7 @@ define <8 x i8> @vsll_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.shl.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.shl.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -239,9 +215,7 @@ define <8 x i8> @vsll_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 3, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.shl.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.shl.v8i8(<8 x i8> %va, <8 x i8> splat (i8 3), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -251,11 +225,7 @@ define <8 x i8> @vsll_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 3, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.shl.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.shl.v8i8(<8 x i8> %va, <8 x i8> splat (i8 3), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -277,9 +247,7 @@ define <16 x i8> @vsll_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.shl.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.shl.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -303,9 +271,7 @@ define <16 x i8> @vsll_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.shl.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.shl.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -315,9 +281,7 @@ define <16 x i8> @vsll_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 3, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.shl.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.shl.v16i8(<16 x i8> %va, <16 x i8> splat (i8 3), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -327,11 +291,7 @@ define <16 x i8> @vsll_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 3, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.shl.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.shl.v16i8(<16 x i8> %va, <16 x i8> splat (i8 3), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -353,9 +313,7 @@ define <2 x i16> @vsll_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.shl.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.shl.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -379,9 +337,7 @@ define <2 x i16> @vsll_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.shl.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.shl.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -391,9 +347,7 @@ define <2 x i16> @vsll_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 3, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.shl.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.shl.v2i16(<2 x i16> %va, <2 x i16> splat (i16 3), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -403,11 +357,7 @@ define <2 x i16> @vsll_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 3, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.shl.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.shl.v2i16(<2 x i16> %va, <2 x i16> splat (i16 3), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -429,9 +379,7 @@ define <4 x i16> @vsll_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.shl.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.shl.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -455,9 +403,7 @@ define <4 x i16> @vsll_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.shl.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.shl.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -467,9 +413,7 @@ define <4 x i16> @vsll_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 3, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.shl.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.shl.v4i16(<4 x i16> %va, <4 x i16> splat (i16 3), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -479,11 +423,7 @@ define <4 x i16> @vsll_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 3, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.shl.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.shl.v4i16(<4 x i16> %va, <4 x i16> splat (i16 3), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -505,9 +445,7 @@ define <8 x i16> @vsll_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.shl.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.shl.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -531,9 +469,7 @@ define <8 x i16> @vsll_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.shl.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.shl.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -543,9 +479,7 @@ define <8 x i16> @vsll_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 3, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.shl.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.shl.v8i16(<8 x i16> %va, <8 x i16> splat (i16 3), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -555,11 +489,7 @@ define <8 x i16> @vsll_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 3, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.shl.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.shl.v8i16(<8 x i16> %va, <8 x i16> splat (i16 3), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -581,9 +511,7 @@ define <16 x i16> @vsll_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.shl.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.shl.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -607,9 +535,7 @@ define <16 x i16> @vsll_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.shl.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.shl.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -619,9 +545,7 @@ define <16 x i16> @vsll_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 3, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.shl.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.shl.v16i16(<16 x i16> %va, <16 x i16> splat (i16 3), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -631,11 +555,7 @@ define <16 x i16> @vsll_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 3, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.shl.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.shl.v16i16(<16 x i16> %va, <16 x i16> splat (i16 3), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -657,9 +577,7 @@ define <2 x i32> @vsll_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.shl.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.shl.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -683,9 +601,7 @@ define <2 x i32> @vsll_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.shl.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.shl.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -695,9 +611,7 @@ define <2 x i32> @vsll_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 3, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.shl.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.shl.v2i32(<2 x i32> %va, <2 x i32> splat (i32 3), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -707,11 +621,7 @@ define <2 x i32> @vsll_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 3, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.shl.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.shl.v2i32(<2 x i32> %va, <2 x i32> splat (i32 3), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -733,9 +643,7 @@ define <4 x i32> @vsll_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -759,9 +667,7 @@ define <4 x i32> @vsll_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -771,9 +677,7 @@ define <4 x i32> @vsll_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 3, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %va, <4 x i32> splat (i32 3), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -783,11 +687,7 @@ define <4 x i32> @vsll_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 3, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %va, <4 x i32> splat (i32 3), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -809,9 +709,7 @@ define <8 x i32> @vsll_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -835,9 +733,7 @@ define <8 x i32> @vsll_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -847,9 +743,7 @@ define <8 x i32> @vsll_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 3, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %va, <8 x i32> splat (i32 3), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -859,11 +753,7 @@ define <8 x i32> @vsll_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 3, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.shl.v8i32(<8 x i32> %va, <8 x i32> splat (i32 3), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -885,9 +775,7 @@ define <16 x i32> @vsll_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.shl.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.shl.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -911,9 +799,7 @@ define <16 x i32> @vsll_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.shl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.shl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -923,9 +809,7 @@ define <16 x i32> @vsll_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 3, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.shl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.shl.v16i32(<16 x i32> %va, <16 x i32> splat (i32 3), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -935,11 +819,7 @@ define <16 x i32> @vsll_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 3, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.shl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.shl.v16i32(<16 x i32> %va, <16 x i32> splat (i32 3), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -961,9 +841,7 @@ define <2 x i64> @vsll_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.shl.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.shl.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -999,9 +877,7 @@ define <2 x i64> @vsll_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.shl.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.shl.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1011,9 +887,7 @@ define <2 x i64> @vsll_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 3, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.shl.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.shl.v2i64(<2 x i64> %va, <2 x i64> splat (i64 3), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1023,11 +897,7 @@ define <2 x i64> @vsll_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 3, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.shl.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.shl.v2i64(<2 x i64> %va, <2 x i64> splat (i64 3), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1049,9 +919,7 @@ define <4 x i64> @vsll_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.shl.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.shl.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1087,9 +955,7 @@ define <4 x i64> @vsll_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.shl.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.shl.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1099,9 +965,7 @@ define <4 x i64> @vsll_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 3, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.shl.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.shl.v4i64(<4 x i64> %va, <4 x i64> splat (i64 3), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1111,11 +975,7 @@ define <4 x i64> @vsll_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 3, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.shl.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.shl.v4i64(<4 x i64> %va, <4 x i64> splat (i64 3), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1137,9 +997,7 @@ define <8 x i64> @vsll_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1175,9 +1033,7 @@ define <8 x i64> @vsll_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1187,9 +1043,7 @@ define <8 x i64> @vsll_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 3, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> splat (i64 3), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1199,11 +1053,7 @@ define <8 x i64> @vsll_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 3, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.shl.v8i64(<8 x i64> %va, <8 x i64> splat (i64 3), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1225,9 +1075,7 @@ define <16 x i64> @vsll_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.shl.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.shl.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1263,9 +1111,7 @@ define <16 x i64> @vsll_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.shl.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.shl.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1275,9 +1121,7 @@ define <16 x i64> @vsll_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 3, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.shl.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.shl.v16i64(<16 x i64> %va, <16 x i64> splat (i64 3), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1287,10 +1131,6 @@ define <16 x i64> @vsll_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 3, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.shl.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.shl.v16i64(<16 x i64> %va, <16 x i64> splat (i64 3), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll index fc84e6ce3b6c7..7ea5b1f0b505a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll @@ -39,9 +39,7 @@ define <2 x i8> @vsra_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -65,9 +63,7 @@ define <2 x i8> @vsra_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -77,9 +73,7 @@ define <2 x i8> @vsra_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 5, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> splat (i8 5), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -89,11 +83,7 @@ define <2 x i8> @vsra_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 5, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ashr.v2i8(<2 x i8> %va, <2 x i8> splat (i8 5), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -115,9 +105,7 @@ define <4 x i8> @vsra_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -141,9 +129,7 @@ define <4 x i8> @vsra_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -153,9 +139,7 @@ define <4 x i8> @vsra_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 5, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> splat (i8 5), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -165,11 +149,7 @@ define <4 x i8> @vsra_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 5, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ashr.v4i8(<4 x i8> %va, <4 x i8> splat (i8 5), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -203,9 +183,7 @@ define <8 x i8> @vsra_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -229,9 +207,7 @@ define <8 x i8> @vsra_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -241,9 +217,7 @@ define <8 x i8> @vsra_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 5, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> splat (i8 5), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -253,11 +227,7 @@ define <8 x i8> @vsra_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 5, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ashr.v8i8(<8 x i8> %va, <8 x i8> splat (i8 5), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -279,9 +249,7 @@ define <16 x i8> @vsra_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -305,9 +273,7 @@ define <16 x i8> @vsra_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -317,9 +283,7 @@ define <16 x i8> @vsra_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 5, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> splat (i8 5), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -329,11 +293,7 @@ define <16 x i8> @vsra_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 5, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ashr.v16i8(<16 x i8> %va, <16 x i8> splat (i8 5), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -355,9 +315,7 @@ define <2 x i16> @vsra_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -381,9 +339,7 @@ define <2 x i16> @vsra_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -393,9 +349,7 @@ define <2 x i16> @vsra_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 5, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> splat (i16 5), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -405,11 +359,7 @@ define <2 x i16> @vsra_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 5, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ashr.v2i16(<2 x i16> %va, <2 x i16> splat (i16 5), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -431,9 +381,7 @@ define <4 x i16> @vsra_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -457,9 +405,7 @@ define <4 x i16> @vsra_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -469,9 +415,7 @@ define <4 x i16> @vsra_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 5, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> splat (i16 5), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -481,11 +425,7 @@ define <4 x i16> @vsra_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 5, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ashr.v4i16(<4 x i16> %va, <4 x i16> splat (i16 5), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -507,9 +447,7 @@ define <8 x i16> @vsra_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -533,9 +471,7 @@ define <8 x i16> @vsra_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -545,9 +481,7 @@ define <8 x i16> @vsra_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 5, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> splat (i16 5), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -557,11 +491,7 @@ define <8 x i16> @vsra_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 5, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ashr.v8i16(<8 x i16> %va, <8 x i16> splat (i16 5), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -583,9 +513,7 @@ define <16 x i16> @vsra_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -609,9 +537,7 @@ define <16 x i16> @vsra_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -621,9 +547,7 @@ define <16 x i16> @vsra_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 5, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> splat (i16 5), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -633,11 +557,7 @@ define <16 x i16> @vsra_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 5, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ashr.v16i16(<16 x i16> %va, <16 x i16> splat (i16 5), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -659,9 +579,7 @@ define <2 x i32> @vsra_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -685,9 +603,7 @@ define <2 x i32> @vsra_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -697,9 +613,7 @@ define <2 x i32> @vsra_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 5, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> splat (i32 5), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -709,11 +623,7 @@ define <2 x i32> @vsra_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 5, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ashr.v2i32(<2 x i32> %va, <2 x i32> splat (i32 5), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -735,9 +645,7 @@ define <4 x i32> @vsra_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -761,9 +669,7 @@ define <4 x i32> @vsra_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -773,9 +679,7 @@ define <4 x i32> @vsra_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 5, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> splat (i32 5), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -785,11 +689,7 @@ define <4 x i32> @vsra_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 5, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %va, <4 x i32> splat (i32 5), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -811,9 +711,7 @@ define <8 x i32> @vsra_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -837,9 +735,7 @@ define <8 x i32> @vsra_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -849,9 +745,7 @@ define <8 x i32> @vsra_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 5, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> splat (i32 5), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -861,11 +755,7 @@ define <8 x i32> @vsra_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 5, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ashr.v8i32(<8 x i32> %va, <8 x i32> splat (i32 5), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -887,9 +777,7 @@ define <16 x i32> @vsra_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -913,9 +801,7 @@ define <16 x i32> @vsra_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -925,9 +811,7 @@ define <16 x i32> @vsra_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 5, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> splat (i32 5), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -937,11 +821,7 @@ define <16 x i32> @vsra_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 5, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ashr.v16i32(<16 x i32> %va, <16 x i32> splat (i32 5), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -963,9 +843,7 @@ define <2 x i64> @vsra_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1001,9 +879,7 @@ define <2 x i64> @vsra_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1013,9 +889,7 @@ define <2 x i64> @vsra_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 5, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> splat (i64 5), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1025,11 +899,7 @@ define <2 x i64> @vsra_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 5, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ashr.v2i64(<2 x i64> %va, <2 x i64> splat (i64 5), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1051,9 +921,7 @@ define <4 x i64> @vsra_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1089,9 +957,7 @@ define <4 x i64> @vsra_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1101,9 +967,7 @@ define <4 x i64> @vsra_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 5, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> splat (i64 5), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1113,11 +977,7 @@ define <4 x i64> @vsra_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 5, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ashr.v4i64(<4 x i64> %va, <4 x i64> splat (i64 5), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1139,9 +999,7 @@ define <8 x i64> @vsra_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1177,9 +1035,7 @@ define <8 x i64> @vsra_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1189,9 +1045,7 @@ define <8 x i64> @vsra_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 5, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> splat (i64 5), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1201,11 +1055,7 @@ define <8 x i64> @vsra_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 5, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ashr.v8i64(<8 x i64> %va, <8 x i64> splat (i64 5), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1227,9 +1077,7 @@ define <16 x i64> @vsra_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1265,9 +1113,7 @@ define <16 x i64> @vsra_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1277,9 +1123,7 @@ define <16 x i64> @vsra_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 5, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> splat (i64 5), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1289,10 +1133,6 @@ define <16 x i64> @vsra_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 5, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ashr.v16i64(<16 x i64> %va, <16 x i64> splat (i64 5), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll index ba3287f744626..9f9d4af0cc2f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll @@ -38,9 +38,7 @@ define <2 x i8> @vsrl_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.lshr.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.lshr.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -64,9 +62,7 @@ define <2 x i8> @vsrl_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.lshr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.lshr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -76,9 +72,7 @@ define <2 x i8> @vsrl_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 4, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.lshr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.lshr.v2i8(<2 x i8> %va, <2 x i8> splat (i8 4), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -88,11 +82,7 @@ define <2 x i8> @vsrl_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 4, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.lshr.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.lshr.v2i8(<2 x i8> %va, <2 x i8> splat (i8 4), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -114,9 +104,7 @@ define <4 x i8> @vsrl_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.lshr.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.lshr.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -140,9 +128,7 @@ define <4 x i8> @vsrl_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.lshr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.lshr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -152,9 +138,7 @@ define <4 x i8> @vsrl_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 4, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.lshr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.lshr.v4i8(<4 x i8> %va, <4 x i8> splat (i8 4), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -164,11 +148,7 @@ define <4 x i8> @vsrl_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 4, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.lshr.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.lshr.v4i8(<4 x i8> %va, <4 x i8> splat (i8 4), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -202,9 +182,7 @@ define <8 x i8> @vsrl_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.lshr.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.lshr.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -228,9 +206,7 @@ define <8 x i8> @vsrl_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.lshr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.lshr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -240,9 +216,7 @@ define <8 x i8> @vsrl_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.lshr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.lshr.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -252,11 +226,7 @@ define <8 x i8> @vsrl_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 4, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.lshr.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.lshr.v8i8(<8 x i8> %va, <8 x i8> splat (i8 4), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -278,9 +248,7 @@ define <16 x i8> @vsrl_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.lshr.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.lshr.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -304,9 +272,7 @@ define <16 x i8> @vsrl_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.lshr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.lshr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -316,9 +282,7 @@ define <16 x i8> @vsrl_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 4, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.lshr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.lshr.v16i8(<16 x i8> %va, <16 x i8> splat (i8 4), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -328,11 +292,7 @@ define <16 x i8> @vsrl_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 4, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.lshr.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.lshr.v16i8(<16 x i8> %va, <16 x i8> splat (i8 4), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -354,9 +314,7 @@ define <2 x i16> @vsrl_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.lshr.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.lshr.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -380,9 +338,7 @@ define <2 x i16> @vsrl_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.lshr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.lshr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -392,9 +348,7 @@ define <2 x i16> @vsrl_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 4, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.lshr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.lshr.v2i16(<2 x i16> %va, <2 x i16> splat (i16 4), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -404,11 +358,7 @@ define <2 x i16> @vsrl_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 4, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.lshr.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.lshr.v2i16(<2 x i16> %va, <2 x i16> splat (i16 4), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -430,9 +380,7 @@ define <4 x i16> @vsrl_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.lshr.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.lshr.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -456,9 +404,7 @@ define <4 x i16> @vsrl_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.lshr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.lshr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -468,9 +414,7 @@ define <4 x i16> @vsrl_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 4, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.lshr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.lshr.v4i16(<4 x i16> %va, <4 x i16> splat (i16 4), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -480,11 +424,7 @@ define <4 x i16> @vsrl_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 4, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.lshr.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.lshr.v4i16(<4 x i16> %va, <4 x i16> splat (i16 4), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -506,9 +446,7 @@ define <8 x i16> @vsrl_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.lshr.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.lshr.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -532,9 +470,7 @@ define <8 x i16> @vsrl_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.lshr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.lshr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -544,9 +480,7 @@ define <8 x i16> @vsrl_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 4, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.lshr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.lshr.v8i16(<8 x i16> %va, <8 x i16> splat (i16 4), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -556,11 +490,7 @@ define <8 x i16> @vsrl_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 4, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.lshr.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.lshr.v8i16(<8 x i16> %va, <8 x i16> splat (i16 4), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -582,9 +512,7 @@ define <16 x i16> @vsrl_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.lshr.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.lshr.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -608,9 +536,7 @@ define <16 x i16> @vsrl_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.lshr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.lshr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -620,9 +546,7 @@ define <16 x i16> @vsrl_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 4, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.lshr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.lshr.v16i16(<16 x i16> %va, <16 x i16> splat (i16 4), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -632,11 +556,7 @@ define <16 x i16> @vsrl_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 4, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.lshr.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.lshr.v16i16(<16 x i16> %va, <16 x i16> splat (i16 4), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -658,9 +578,7 @@ define <2 x i32> @vsrl_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.lshr.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.lshr.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -684,9 +602,7 @@ define <2 x i32> @vsrl_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.lshr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.lshr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -696,9 +612,7 @@ define <2 x i32> @vsrl_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 4, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.lshr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.lshr.v2i32(<2 x i32> %va, <2 x i32> splat (i32 4), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -708,11 +622,7 @@ define <2 x i32> @vsrl_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 4, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.lshr.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.lshr.v2i32(<2 x i32> %va, <2 x i32> splat (i32 4), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -734,9 +644,7 @@ define <4 x i32> @vsrl_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -760,9 +668,7 @@ define <4 x i32> @vsrl_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -772,9 +678,7 @@ define <4 x i32> @vsrl_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 4, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %va, <4 x i32> splat (i32 4), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -784,11 +688,7 @@ define <4 x i32> @vsrl_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 4, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %va, <4 x i32> splat (i32 4), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -810,9 +710,7 @@ define <8 x i32> @vsrl_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -836,9 +734,7 @@ define <8 x i32> @vsrl_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -848,9 +744,7 @@ define <8 x i32> @vsrl_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -860,11 +754,7 @@ define <8 x i32> @vsrl_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 4, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.lshr.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -886,9 +776,7 @@ define <16 x i32> @vsrl_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.lshr.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.lshr.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -912,9 +800,7 @@ define <16 x i32> @vsrl_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.lshr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.lshr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -924,9 +810,7 @@ define <16 x i32> @vsrl_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 4, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.lshr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.lshr.v16i32(<16 x i32> %va, <16 x i32> splat (i32 4), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -936,11 +820,7 @@ define <16 x i32> @vsrl_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 4, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.lshr.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.lshr.v16i32(<16 x i32> %va, <16 x i32> splat (i32 4), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -962,9 +842,7 @@ define <2 x i64> @vsrl_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.lshr.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.lshr.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1000,9 +878,7 @@ define <2 x i64> @vsrl_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.lshr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.lshr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1012,9 +888,7 @@ define <2 x i64> @vsrl_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 4, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.lshr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.lshr.v2i64(<2 x i64> %va, <2 x i64> splat (i64 4), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1024,11 +898,7 @@ define <2 x i64> @vsrl_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 4, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.lshr.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.lshr.v2i64(<2 x i64> %va, <2 x i64> splat (i64 4), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1050,9 +920,7 @@ define <4 x i64> @vsrl_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.lshr.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.lshr.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1088,9 +956,7 @@ define <4 x i64> @vsrl_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.lshr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.lshr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1100,9 +966,7 @@ define <4 x i64> @vsrl_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 4, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.lshr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.lshr.v4i64(<4 x i64> %va, <4 x i64> splat (i64 4), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1112,11 +976,7 @@ define <4 x i64> @vsrl_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 4, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.lshr.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.lshr.v4i64(<4 x i64> %va, <4 x i64> splat (i64 4), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1138,9 +998,7 @@ define <8 x i64> @vsrl_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.lshr.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.lshr.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1176,9 +1034,7 @@ define <8 x i64> @vsrl_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.lshr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.lshr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1188,9 +1044,7 @@ define <8 x i64> @vsrl_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.lshr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.lshr.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1200,11 +1054,7 @@ define <8 x i64> @vsrl_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 4, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.lshr.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.lshr.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1226,9 +1076,7 @@ define <16 x i64> @vsrl_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.lshr.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.lshr.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1264,9 +1112,7 @@ define <16 x i64> @vsrl_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.lshr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.lshr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1276,9 +1122,7 @@ define <16 x i64> @vsrl_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 4, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.lshr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.lshr.v16i64(<16 x i64> %va, <16 x i64> splat (i64 4), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1288,10 +1132,6 @@ define <16 x i64> @vsrl_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 4, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.lshr.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.lshr.v16i64(<16 x i64> %va, <16 x i64> splat (i64 4), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 32b8d10d87173..2caa2ff41a7d9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -43,9 +43,7 @@ define <2 x i8> @vssub_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -69,9 +67,7 @@ define <2 x i8> @vssub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -82,9 +78,7 @@ define <2 x i8> @vssub_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -95,11 +89,7 @@ define <2 x i8> @vssub_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -121,9 +111,7 @@ define <4 x i8> @vssub_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -161,9 +149,7 @@ define <4 x i8> @vssub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -174,9 +160,7 @@ define <4 x i8> @vssub_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -187,11 +171,7 @@ define <4 x i8> @vssub_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -213,9 +193,7 @@ define <5 x i8> @vssub_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -239,9 +217,7 @@ define <5 x i8> @vssub_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -252,9 +228,7 @@ define <5 x i8> @vssub_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> %m, i32 %evl) ret <5 x i8> %v } @@ -265,11 +239,7 @@ define <5 x i8> @vssub_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.ssub.sat.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -291,9 +261,7 @@ define <8 x i8> @vssub_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -317,9 +285,7 @@ define <8 x i8> @vssub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -330,9 +296,7 @@ define <8 x i8> @vssub_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -343,11 +307,7 @@ define <8 x i8> @vssub_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -369,9 +329,7 @@ define <16 x i8> @vssub_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -395,9 +353,7 @@ define <16 x i8> @vssub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -408,9 +364,7 @@ define <16 x i8> @vssub_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -421,11 +375,7 @@ define <16 x i8> @vssub_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -453,9 +403,7 @@ define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) ret <256 x i8> %v } @@ -478,11 +426,7 @@ define <256 x i8> @vssub_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v16, v16, a2 ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -500,9 +444,7 @@ define <256 x i8> @vssub_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) ret <256 x i8> %v } @@ -520,9 +462,7 @@ define <256 x i8> @vssub_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) ret <256 x i8> %v } @@ -544,9 +484,7 @@ define <2 x i16> @vssub_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -570,9 +508,7 @@ define <2 x i16> @vssub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -583,9 +519,7 @@ define <2 x i16> @vssub_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -596,11 +530,7 @@ define <2 x i16> @vssub_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -622,9 +552,7 @@ define <4 x i16> @vssub_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -648,9 +576,7 @@ define <4 x i16> @vssub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -661,9 +587,7 @@ define <4 x i16> @vssub_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -674,11 +598,7 @@ define <4 x i16> @vssub_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -700,9 +620,7 @@ define <8 x i16> @vssub_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -726,9 +644,7 @@ define <8 x i16> @vssub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -739,9 +655,7 @@ define <8 x i16> @vssub_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -752,11 +666,7 @@ define <8 x i16> @vssub_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -778,9 +688,7 @@ define <16 x i16> @vssub_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -804,9 +712,7 @@ define <16 x i16> @vssub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -817,9 +723,7 @@ define <16 x i16> @vssub_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -830,11 +734,7 @@ define <16 x i16> @vssub_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -856,9 +756,7 @@ define <2 x i32> @vssub_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -882,9 +780,7 @@ define <2 x i32> @vssub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -895,9 +791,7 @@ define <2 x i32> @vssub_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -908,11 +802,7 @@ define <2 x i32> @vssub_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -934,9 +824,7 @@ define <4 x i32> @vssub_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -960,9 +848,7 @@ define <4 x i32> @vssub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -973,9 +859,7 @@ define <4 x i32> @vssub_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -986,11 +870,7 @@ define <4 x i32> @vssub_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -1012,9 +892,7 @@ define <8 x i32> @vssub_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1038,9 +916,7 @@ define <8 x i32> @vssub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1051,9 +927,7 @@ define <8 x i32> @vssub_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -1064,11 +938,7 @@ define <8 x i32> @vssub_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1090,9 +960,7 @@ define <16 x i32> @vssub_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1116,9 +984,7 @@ define <16 x i32> @vssub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1129,9 +995,7 @@ define <16 x i32> @vssub_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -1142,11 +1006,7 @@ define <16 x i32> @vssub_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1168,9 +1028,7 @@ define <2 x i64> @vssub_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1222,9 +1080,7 @@ define <2 x i64> @vssub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1235,9 +1091,7 @@ define <2 x i64> @vssub_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1248,11 +1102,7 @@ define <2 x i64> @vssub_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1274,9 +1124,7 @@ define <4 x i64> @vssub_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1328,9 +1176,7 @@ define <4 x i64> @vssub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1341,9 +1187,7 @@ define <4 x i64> @vssub_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1354,11 +1198,7 @@ define <4 x i64> @vssub_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1380,9 +1220,7 @@ define <8 x i64> @vssub_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1434,9 +1272,7 @@ define <8 x i64> @vssub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %ev ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1447,9 +1283,7 @@ define <8 x i64> @vssub_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1460,11 +1294,7 @@ define <8 x i64> @vssub_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1486,9 +1316,7 @@ define <16 x i64> @vssub_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1540,9 +1368,7 @@ define <16 x i64> @vssub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1553,9 +1379,7 @@ define <16 x i64> @vssub_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1566,11 +1390,7 @@ define <16 x i64> @vssub_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1582,16 +1402,16 @@ define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-LABEL: vssub_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1624,24 +1444,22 @@ define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vssub.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vssub_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vssub.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1670,11 +1488,7 @@ define <32 x i64> @vssub_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vssub.vx v16, v16, a2 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1706,9 +1520,7 @@ define <32 x i64> @vssub_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vssub.vx v16, v16, a0, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } @@ -1738,8 +1550,6 @@ define <32 x i64> @vssub_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vssub.vx v16, v16, a0, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + %v = call <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll index 941be4aba1b9f..efe28eb9021ce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll @@ -35,9 +35,7 @@ define <2 x i8> @ssub_v2i8_vi(<2 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + %v = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 1)) ret <2 x i8> %v } @@ -72,9 +70,7 @@ define <4 x i8> @ssub_v4i8_vi(<4 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + %v = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 1)) ret <4 x i8> %v } @@ -109,9 +105,7 @@ define <8 x i8> @ssub_v8i8_vi(<8 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + %v = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 1)) ret <8 x i8> %v } @@ -146,9 +140,7 @@ define <16 x i8> @ssub_v16i8_vi(<16 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + %v = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 1)) ret <16 x i8> %v } @@ -183,9 +175,7 @@ define <2 x i16> @ssub_v2i16_vi(<2 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + %v = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 1)) ret <2 x i16> %v } @@ -220,9 +210,7 @@ define <4 x i16> @ssub_v4i16_vi(<4 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + %v = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 1)) ret <4 x i16> %v } @@ -257,9 +245,7 @@ define <8 x i16> @ssub_v8i16_vi(<8 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + %v = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 1)) ret <8 x i16> %v } @@ -294,9 +280,7 @@ define <16 x i16> @ssub_v16i16_vi(<16 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + %v = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 1)) ret <16 x i16> %v } @@ -331,9 +315,7 @@ define <2 x i32> @ssub_v2i32_vi(<2 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + %v = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 1)) ret <2 x i32> %v } @@ -368,9 +350,7 @@ define <4 x i32> @ssub_v4i32_vi(<4 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + %v = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 1)) ret <4 x i32> %v } @@ -405,9 +385,7 @@ define <8 x i32> @ssub_v8i32_vi(<8 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + %v = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 1)) ret <8 x i32> %v } @@ -442,9 +420,7 @@ define <16 x i32> @ssub_v16i32_vi(<16 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + %v = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 1)) ret <16 x i32> %v } @@ -492,9 +468,7 @@ define <2 x i64> @ssub_v2i64_vi(<2 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + %v = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 1)) ret <2 x i64> %v } @@ -542,9 +516,7 @@ define <4 x i64> @ssub_v4i64_vi(<4 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + %v = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 1)) ret <4 x i64> %v } @@ -592,9 +564,7 @@ define <8 x i64> @ssub_v8i64_vi(<8 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + %v = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 1)) ret <8 x i64> %v } @@ -642,8 +612,6 @@ define <16 x i64> @ssub_v16i64_vi(<16 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + %v = call <16 x i64> @llvm.ssub.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 1)) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index 60c16ef543a0f..6313f31bc1a61 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -38,9 +38,7 @@ define <2 x i8> @vssubu_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -64,9 +62,7 @@ define <2 x i8> @vssubu_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -77,9 +73,7 @@ define <2 x i8> @vssubu_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -90,11 +84,7 @@ define <2 x i8> @vssubu_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.usub.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -116,9 +106,7 @@ define <4 x i8> @vssubu_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -156,9 +144,7 @@ define <4 x i8> @vssubu_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -169,9 +155,7 @@ define <4 x i8> @vssubu_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -182,11 +166,7 @@ define <4 x i8> @vssubu_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.usub.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -208,9 +188,7 @@ define <5 x i8> @vssubu_vv_v5i8_unmasked(<5 x i8> %va, <5 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %b, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -234,9 +212,7 @@ define <5 x i8> @vssubu_vx_v5i8_unmasked(<5 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <5 x i8> poison, i8 %b, i32 0 %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -247,9 +223,7 @@ define <5 x i8> @vssubu_vi_v5i8(<5 x i8> %va, <5 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> %m, i32 %evl) ret <5 x i8> %v } @@ -260,11 +234,7 @@ define <5 x i8> @vssubu_vi_v5i8_unmasked(<5 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <5 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <5 x i8> %elt.head, <5 x i8> poison, <5 x i32> zeroinitializer - %head = insertelement <5 x i1> poison, i1 true, i32 0 - %m = shufflevector <5 x i1> %head, <5 x i1> poison, <5 x i32> zeroinitializer - %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> %vb, <5 x i1> %m, i32 %evl) + %v = call <5 x i8> @llvm.vp.usub.sat.v5i8(<5 x i8> %va, <5 x i8> splat (i8 -1), <5 x i1> splat (i1 true), i32 %evl) ret <5 x i8> %v } @@ -286,9 +256,7 @@ define <8 x i8> @vssubu_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -312,9 +280,7 @@ define <8 x i8> @vssubu_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -325,9 +291,7 @@ define <8 x i8> @vssubu_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -338,11 +302,7 @@ define <8 x i8> @vssubu_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.usub.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -364,9 +324,7 @@ define <16 x i8> @vssubu_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -390,9 +348,7 @@ define <16 x i8> @vssubu_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %ev ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -403,9 +359,7 @@ define <16 x i8> @vssubu_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -416,11 +370,7 @@ define <16 x i8> @vssubu_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.usub.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -448,9 +398,7 @@ define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) ret <256 x i8> %v } @@ -473,11 +421,7 @@ define <256 x i8> @vssubu_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v16, v16, a2 ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %head = insertelement <256 x i1> poison, i1 true, i32 0 - %m = shufflevector <256 x i1> %head, <256 x i1> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 %evl) + %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> splat (i1 true), i32 %evl) ret <256 x i8> %v } @@ -495,9 +439,7 @@ define <256 x i8> @vssubu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 129) + %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) ret <256 x i8> %v } @@ -515,9 +457,7 @@ define <256 x i8> @vssubu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer - %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 128) + %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) ret <256 x i8> %v } @@ -539,9 +479,7 @@ define <2 x i16> @vssubu_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -565,9 +503,7 @@ define <2 x i16> @vssubu_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -578,9 +514,7 @@ define <2 x i16> @vssubu_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -591,11 +525,7 @@ define <2 x i16> @vssubu_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.usub.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -617,9 +547,7 @@ define <4 x i16> @vssubu_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -643,9 +571,7 @@ define <4 x i16> @vssubu_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -656,9 +582,7 @@ define <4 x i16> @vssubu_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -669,11 +593,7 @@ define <4 x i16> @vssubu_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.usub.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -695,9 +615,7 @@ define <8 x i16> @vssubu_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -721,9 +639,7 @@ define <8 x i16> @vssubu_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -734,9 +650,7 @@ define <8 x i16> @vssubu_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -747,11 +661,7 @@ define <8 x i16> @vssubu_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.usub.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -773,9 +683,7 @@ define <16 x i16> @vssubu_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -799,9 +707,7 @@ define <16 x i16> @vssubu_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -812,9 +718,7 @@ define <16 x i16> @vssubu_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -825,11 +729,7 @@ define <16 x i16> @vssubu_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.usub.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -851,9 +751,7 @@ define <2 x i32> @vssubu_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -877,9 +775,7 @@ define <2 x i32> @vssubu_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -890,9 +786,7 @@ define <2 x i32> @vssubu_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -903,11 +797,7 @@ define <2 x i32> @vssubu_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.usub.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -929,9 +819,7 @@ define <4 x i32> @vssubu_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -955,9 +843,7 @@ define <4 x i32> @vssubu_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -968,9 +854,7 @@ define <4 x i32> @vssubu_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -981,11 +865,7 @@ define <4 x i32> @vssubu_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -1007,9 +887,7 @@ define <8 x i32> @vssubu_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1033,9 +911,7 @@ define <8 x i32> @vssubu_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %e ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1046,9 +922,7 @@ define <8 x i32> @vssubu_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -1059,11 +933,7 @@ define <8 x i32> @vssubu_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.usub.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1085,9 +955,7 @@ define <16 x i32> @vssubu_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1111,9 +979,7 @@ define <16 x i32> @vssubu_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1124,9 +990,7 @@ define <16 x i32> @vssubu_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -1137,11 +1001,7 @@ define <16 x i32> @vssubu_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.usub.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1163,9 +1023,7 @@ define <2 x i64> @vssubu_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1217,9 +1075,7 @@ define <2 x i64> @vssubu_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %e ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1230,9 +1086,7 @@ define <2 x i64> @vssubu_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1243,11 +1097,7 @@ define <2 x i64> @vssubu_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.usub.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1269,9 +1119,7 @@ define <4 x i64> @vssubu_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1323,9 +1171,7 @@ define <4 x i64> @vssubu_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %e ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1336,9 +1182,7 @@ define <4 x i64> @vssubu_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1349,11 +1193,7 @@ define <4 x i64> @vssubu_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.usub.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1375,9 +1215,7 @@ define <8 x i64> @vssubu_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1429,9 +1267,7 @@ define <8 x i64> @vssubu_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %e ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1442,9 +1278,7 @@ define <8 x i64> @vssubu_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1455,11 +1289,7 @@ define <8 x i64> @vssubu_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.usub.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1481,9 +1311,7 @@ define <16 x i64> @vssubu_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1535,9 +1363,7 @@ define <16 x i64> @vssubu_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1548,9 +1374,7 @@ define <16 x i64> @vssubu_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1561,11 +1385,7 @@ define <16 x i64> @vssubu_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.usub.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1577,16 +1397,16 @@ define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV32-LABEL: vssubu_vx_v32i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB108_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB108_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, a0, -16 @@ -1619,24 +1439,22 @@ define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %e ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vssubu.vx v16, v16, a2, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 %evl) ret <32 x i64> %v } define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vssubu_vi_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB109_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB109_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vssubu.vv v8, v8, v24 ; RV32-NEXT: addi a1, a0, -16 @@ -1665,11 +1483,7 @@ define <32 x i64> @vssubu_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vssubu.vx v16, v16, a2 ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %head = insertelement <32 x i1> poison, i1 true, i32 0 - %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } @@ -1701,9 +1515,7 @@ define <32 x i64> @vssubu_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 12) ret <32 x i64> %v } @@ -1733,8 +1545,6 @@ define <32 x i64> @vssubu_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vssubu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret - %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer - %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) + %v = call <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64> %va, <32 x i64> splat (i64 -1), <32 x i1> %m, i32 27) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll index f31ee23fe7dcd..dc9279f6e7fa0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll @@ -35,9 +35,7 @@ define <2 x i8> @usub_v2i8_vi(<2 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 2, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %va, <2 x i8> %vb) + %v = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %va, <2 x i8> splat (i8 2)) ret <2 x i8> %v } @@ -72,9 +70,7 @@ define <4 x i8> @usub_v4i8_vi(<4 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 2, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %va, <4 x i8> %vb) + %v = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %va, <4 x i8> splat (i8 2)) ret <4 x i8> %v } @@ -109,9 +105,7 @@ define <8 x i8> @usub_v8i8_vi(<8 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 2, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %va, <8 x i8> %vb) + %v = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %va, <8 x i8> splat (i8 2)) ret <8 x i8> %v } @@ -146,9 +140,7 @@ define <16 x i8> @usub_v16i8_vi(<16 x i8> %va) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 2, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %va, <16 x i8> %vb) + %v = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %va, <16 x i8> splat (i8 2)) ret <16 x i8> %v } @@ -183,9 +175,7 @@ define <2 x i16> @usub_v2i16_vi(<2 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 2, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %va, <2 x i16> %vb) + %v = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %va, <2 x i16> splat (i16 2)) ret <2 x i16> %v } @@ -220,9 +210,7 @@ define <4 x i16> @usub_v4i16_vi(<4 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 2, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %va, <4 x i16> %vb) + %v = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %va, <4 x i16> splat (i16 2)) ret <4 x i16> %v } @@ -257,9 +245,7 @@ define <8 x i16> @usub_v8i16_vi(<8 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 2, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %va, <8 x i16> %vb) + %v = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %va, <8 x i16> splat (i16 2)) ret <8 x i16> %v } @@ -294,9 +280,7 @@ define <16 x i16> @usub_v16i16_vi(<16 x i16> %va) { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 2, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %va, <16 x i16> %vb) + %v = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %va, <16 x i16> splat (i16 2)) ret <16 x i16> %v } @@ -331,9 +315,7 @@ define <2 x i32> @usub_v2i32_vi(<2 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 2, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %va, <2 x i32> %vb) + %v = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %va, <2 x i32> splat (i32 2)) ret <2 x i32> %v } @@ -368,9 +350,7 @@ define <4 x i32> @usub_v4i32_vi(<4 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 2, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %va, <4 x i32> %vb) + %v = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %va, <4 x i32> splat (i32 2)) ret <4 x i32> %v } @@ -405,9 +385,7 @@ define <8 x i32> @usub_v8i32_vi(<8 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 2, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %va, <8 x i32> %vb) + %v = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %va, <8 x i32> splat (i32 2)) ret <8 x i32> %v } @@ -442,9 +420,7 @@ define <16 x i32> @usub_v16i32_vi(<16 x i32> %va) { ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 2, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %va, <16 x i32> %vb) + %v = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %va, <16 x i32> splat (i32 2)) ret <16 x i32> %v } @@ -492,9 +468,7 @@ define <2 x i64> @usub_v2i64_vi(<2 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 2, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %va, <2 x i64> %vb) + %v = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %va, <2 x i64> splat (i64 2)) ret <2 x i64> %v } @@ -542,9 +516,7 @@ define <4 x i64> @usub_v4i64_vi(<4 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 2, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %va, <4 x i64> %vb) + %v = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %va, <4 x i64> splat (i64 2)) ret <4 x i64> %v } @@ -592,9 +564,7 @@ define <8 x i64> @usub_v8i64_vi(<8 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 2, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %va, <8 x i64> %vb) + %v = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %va, <8 x i64> splat (i64 2)) ret <8 x i64> %v } @@ -642,8 +612,6 @@ define <16 x i64> @usub_v16i64_vi(<16 x i64> %va) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 2, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> %va, <16 x i64> %vb) + %v = call <16 x i64> @llvm.usub.sat.v16i64(<16 x i64> %va, <16 x i64> splat (i64 2)) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll index 1c12614940082..6052c9ee20fe1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll @@ -34,9 +34,7 @@ define <2 x i8> @vsub_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -60,9 +58,7 @@ define <2 x i8> @vsub_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -84,9 +80,7 @@ define <3 x i8> @vsub_vv_v3i8_unmasked(<3 x i8> %va, <3 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <3 x i1> poison, i1 true, i32 0 - %m = shufflevector <3 x i1> %head, <3 x i1> poison, <3 x i32> zeroinitializer - %v = call <3 x i8> @llvm.vp.sub.v3i8(<3 x i8> %va, <3 x i8> %b, <3 x i1> %m, i32 %evl) + %v = call <3 x i8> @llvm.vp.sub.v3i8(<3 x i8> %va, <3 x i8> %b, <3 x i1> splat (i1 true), i32 %evl) ret <3 x i8> %v } @@ -110,9 +104,7 @@ define <3 x i8> @vsub_vx_v3i8_unmasked(<3 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <3 x i8> poison, i8 %b, i32 0 %vb = shufflevector <3 x i8> %elt.head, <3 x i8> poison, <3 x i32> zeroinitializer - %head = insertelement <3 x i1> poison, i1 true, i32 0 - %m = shufflevector <3 x i1> %head, <3 x i1> poison, <3 x i32> zeroinitializer - %v = call <3 x i8> @llvm.vp.sub.v3i8(<3 x i8> %va, <3 x i8> %vb, <3 x i1> %m, i32 %evl) + %v = call <3 x i8> @llvm.vp.sub.v3i8(<3 x i8> %va, <3 x i8> %vb, <3 x i1> splat (i1 true), i32 %evl) ret <3 x i8> %v } @@ -134,9 +126,7 @@ define <4 x i8> @vsub_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -160,9 +150,7 @@ define <4 x i8> @vsub_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.sub.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -184,9 +172,7 @@ define <8 x i8> @vsub_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -210,9 +196,7 @@ define <8 x i8> @vsub_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.sub.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -234,9 +218,7 @@ define <16 x i8> @vsub_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -260,9 +242,7 @@ define <16 x i8> @vsub_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.sub.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -284,9 +264,7 @@ define <2 x i16> @vsub_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -310,9 +288,7 @@ define <2 x i16> @vsub_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.sub.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -334,9 +310,7 @@ define <4 x i16> @vsub_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -360,9 +334,7 @@ define <4 x i16> @vsub_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.sub.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -384,9 +356,7 @@ define <8 x i16> @vsub_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -410,9 +380,7 @@ define <8 x i16> @vsub_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.sub.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -434,9 +402,7 @@ define <16 x i16> @vsub_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -460,9 +426,7 @@ define <16 x i16> @vsub_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.sub.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -484,9 +448,7 @@ define <2 x i32> @vsub_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -510,9 +472,7 @@ define <2 x i32> @vsub_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -534,9 +494,7 @@ define <4 x i32> @vsub_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -560,9 +518,7 @@ define <4 x i32> @vsub_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -584,9 +540,7 @@ define <8 x i32> @vsub_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -610,9 +564,7 @@ define <8 x i32> @vsub_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.sub.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -634,9 +586,7 @@ define <16 x i32> @vsub_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -660,9 +610,7 @@ define <16 x i32> @vsub_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.sub.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -684,9 +632,7 @@ define <2 x i64> @vsub_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -738,9 +684,7 @@ define <2 x i64> @vsub_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.sub.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -762,9 +706,7 @@ define <4 x i64> @vsub_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -816,9 +758,7 @@ define <4 x i64> @vsub_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.sub.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -840,9 +780,7 @@ define <8 x i64> @vsub_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -894,9 +832,7 @@ define <8 x i64> @vsub_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.sub.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -918,9 +854,7 @@ define <16 x i64> @vsub_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -972,8 +906,6 @@ define <16 x i64> @vsub_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.sub.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll index 83d1d1b3f94c7..af67b9920ed1e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB-RV64 ; ============================================================================== ; i32 -> i64 @@ -111,8 +111,7 @@ define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) { ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_zext: ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 ; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i32> poison, i32 %b, i32 0 @@ -371,8 +370,7 @@ define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) { ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_zext: ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 ; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i16> poison, i16 %b, i32 0 @@ -499,6 +497,55 @@ define <16 x i16> @vwsll_vv_v16i16_zext(<16 x i8> %a, <16 x i8> %b) { } define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) { +; RV32-LABEL: vwsll_vx_i64_v16i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vrgather.vi v24, v16, 0 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vzext.vf2 v10, v8 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vnsrl.wi v12, v24, 0 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vnsrl.wi v8, v12, 0 +; RV32-NEXT: vsll.vv v8, v10, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: vwsll_vx_i64_v16i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vzext.vf2 v10, v8 +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV64-NEXT: vnsrl.wi v12, v16, 0 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vnsrl.wi v8, v12, 0 +; RV64-NEXT: vsll.vv v8, v10, v8 +; RV64-NEXT: ret +; +; CHECK-ZVBB-RV32-LABEL: vwsll_vx_i64_v16i16: +; CHECK-ZVBB-RV32: # %bb.0: +; CHECK-ZVBB-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-ZVBB-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-ZVBB-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-ZVBB-RV32-NEXT: vrgather.vi v24, v16, 0 +; CHECK-ZVBB-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-ZVBB-RV32-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-ZVBB-RV32-NEXT: vnsrl.wi v12, v24, 0 +; CHECK-ZVBB-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-ZVBB-RV32-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-ZVBB-RV32-NEXT: vsll.vv v8, v10, v8 +; CHECK-ZVBB-RV32-NEXT: ret +; +; CHECK-ZVBB-RV64-LABEL: vwsll_vx_i64_v16i16: +; CHECK-ZVBB-RV64: # %bb.0: +; CHECK-ZVBB-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-RV64-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-RV64-NEXT: vmv2r.v v8, v10 +; CHECK-ZVBB-RV64-NEXT: ret %head = insertelement <8 x i64> poison, i64 %b, i32 0 %splat = shufflevector <8 x i64> %head, <8 x i64> poison, <16 x i32> zeroinitializer %x = zext <16 x i8> %a to <16 x i16> @@ -593,8 +640,7 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) { ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_zext: ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 ; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i8> poison, i8 %b, i32 0 @@ -661,10 +707,10 @@ define <4 x i64> @vwsll_vv_v4i64_v4i8_zext(<4 x i8> %a, <4 x i8> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_v4i8_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf4 v11, v9 +; CHECK-ZVBB-NEXT: vwsll.vv v8, v10, v11 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i8> %a to <4 x i64> %y = zext <4 x i8> %b to <4 x i64> @@ -735,11 +781,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_zext(<4 x i8> %a, i32 %b) { ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_zext: ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 +; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i32> poison, i32 %b, i32 0 %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer @@ -790,12 +833,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_v4i8_zext(<4 x i8> %a, i16 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_v4i8_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 +; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i16> poison, i16 %b, i32 0 %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer @@ -846,12 +886,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_v4i8_zext(<4 x i8> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_v4i8_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 +; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i8> poison, i8 %b, i32 0 %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer @@ -869,12 +906,19 @@ define <4 x i64> @vwsll_vi_v4i64_v4i8(<4 x i8> %a) { ; CHECK-NEXT: vsll.vi v8, v10, 2 ; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vwsll_vi_v4i64_v4i8: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 -; CHECK-ZVBB-NEXT: ret +; CHECK-ZVBB-RV32-LABEL: vwsll_vi_v4i64_v4i8: +; CHECK-ZVBB-RV32: # %bb.0: +; CHECK-ZVBB-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-RV32-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-RV32-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-RV32-NEXT: ret +; +; CHECK-ZVBB-RV64-LABEL: vwsll_vi_v4i64_v4i8: +; CHECK-ZVBB-RV64: # %bb.0: +; CHECK-ZVBB-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-RV64-NEXT: vzext.vf4 v10, v8 +; CHECK-ZVBB-RV64-NEXT: vwsll.vi v8, v10, 2 +; CHECK-ZVBB-RV64-NEXT: ret %x = zext <4 x i8> %a to <4 x i64> %z = shl <4 x i64> %x, splat (i64 2) ret <4 x i64> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll index 68cffa98cd3d4..16487a0784125 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll @@ -34,9 +34,7 @@ define <2 x i8> @vxor_vv_v2i8_unmasked(<2 x i8> %va, <2 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -72,9 +70,7 @@ define <2 x i8> @vxor_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <2 x i8> poison, i8 %b, i32 0 %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -84,9 +80,7 @@ define <2 x i8> @vxor_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 7, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> splat (i8 7), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -96,11 +90,7 @@ define <2 x i8> @vxor_vi_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 7, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> splat (i8 7), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -110,9 +100,7 @@ define <2 x i8> @vxor_vi_v2i8_1(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> %m, i32 %evl) ret <2 x i8> %v } @@ -122,11 +110,7 @@ define <2 x i8> @vxor_vi_v2i8_unmasked_1(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %va, <2 x i8> splat (i8 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i8> %v } @@ -148,9 +132,7 @@ define <4 x i8> @vxor_vv_v4i8_unmasked(<4 x i8> %va, <4 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -174,9 +156,7 @@ define <4 x i8> @vxor_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <4 x i8> poison, i8 %b, i32 0 %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -186,9 +166,7 @@ define <4 x i8> @vxor_vi_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 7, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> splat (i8 7), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -198,11 +176,7 @@ define <4 x i8> @vxor_vi_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 7, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> splat (i8 7), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -212,9 +186,7 @@ define <4 x i8> @vxor_vi_v4i8_1(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> %m, i32 %evl) ret <4 x i8> %v } @@ -224,11 +196,7 @@ define <4 x i8> @vxor_vi_v4i8_unmasked_1(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i8> @llvm.vp.xor.v4i8(<4 x i8> %va, <4 x i8> splat (i8 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i8> %v } @@ -250,9 +218,7 @@ define <8 x i8> @vxor_vv_v8i8_unmasked(<8 x i8> %va, <8 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -276,9 +242,7 @@ define <8 x i8> @vxor_vx_v8i8_unmasked(<8 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <8 x i8> poison, i8 %b, i32 0 %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -288,9 +252,7 @@ define <8 x i8> @vxor_vi_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 7, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> splat (i8 7), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -300,11 +262,7 @@ define <8 x i8> @vxor_vi_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 7, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> splat (i8 7), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -314,9 +272,7 @@ define <8 x i8> @vxor_vi_v8i8_1(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> %m, i32 %evl) ret <8 x i8> %v } @@ -326,11 +282,7 @@ define <8 x i8> @vxor_vi_v8i8_unmasked_1(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i8> @llvm.vp.xor.v8i8(<8 x i8> %va, <8 x i8> splat (i8 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i8> %v } @@ -352,9 +304,7 @@ define <9 x i8> @vxor_vv_v9i8_unmasked(<9 x i8> %va, <9 x i8> %b, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <9 x i1> poison, i1 true, i32 0 - %m = shufflevector <9 x i1> %head, <9 x i1> poison, <9 x i32> zeroinitializer - %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> %b, <9 x i1> %m, i32 %evl) + %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> %b, <9 x i1> splat (i1 true), i32 %evl) ret <9 x i8> %v } @@ -378,9 +328,7 @@ define <9 x i8> @vxor_vx_v9i8_unmasked(<9 x i8> %va, i8 %b, i32 zeroext %evl) { ; CHECK-NEXT: ret %elt.head = insertelement <9 x i8> poison, i8 %b, i32 0 %vb = shufflevector <9 x i8> %elt.head, <9 x i8> poison, <9 x i32> zeroinitializer - %head = insertelement <9 x i1> poison, i1 true, i32 0 - %m = shufflevector <9 x i1> %head, <9 x i1> poison, <9 x i32> zeroinitializer - %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> %vb, <9 x i1> %m, i32 %evl) + %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> %vb, <9 x i1> splat (i1 true), i32 %evl) ret <9 x i8> %v } @@ -390,9 +338,7 @@ define <9 x i8> @vxor_vi_v9i8(<9 x i8> %va, <9 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <9 x i8> poison, i8 7, i32 0 - %vb = shufflevector <9 x i8> %elt.head, <9 x i8> poison, <9 x i32> zeroinitializer - %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> %vb, <9 x i1> %m, i32 %evl) + %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> splat (i8 7), <9 x i1> %m, i32 %evl) ret <9 x i8> %v } @@ -402,11 +348,7 @@ define <9 x i8> @vxor_vi_v9i8_unmasked(<9 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <9 x i8> poison, i8 7, i32 0 - %vb = shufflevector <9 x i8> %elt.head, <9 x i8> poison, <9 x i32> zeroinitializer - %head = insertelement <9 x i1> poison, i1 true, i32 0 - %m = shufflevector <9 x i1> %head, <9 x i1> poison, <9 x i32> zeroinitializer - %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> %vb, <9 x i1> %m, i32 %evl) + %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> splat (i8 7), <9 x i1> splat (i1 true), i32 %evl) ret <9 x i8> %v } @@ -416,9 +358,7 @@ define <9 x i8> @vxor_vi_v9i8_1(<9 x i8> %va, <9 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <9 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <9 x i8> %elt.head, <9 x i8> poison, <9 x i32> zeroinitializer - %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> %vb, <9 x i1> %m, i32 %evl) + %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> splat (i8 -1), <9 x i1> %m, i32 %evl) ret <9 x i8> %v } @@ -428,11 +368,7 @@ define <9 x i8> @vxor_vi_v9i8_unmasked_1(<9 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <9 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <9 x i8> %elt.head, <9 x i8> poison, <9 x i32> zeroinitializer - %head = insertelement <9 x i1> poison, i1 true, i32 0 - %m = shufflevector <9 x i1> %head, <9 x i1> poison, <9 x i32> zeroinitializer - %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> %vb, <9 x i1> %m, i32 %evl) + %v = call <9 x i8> @llvm.vp.xor.v9i8(<9 x i8> %va, <9 x i8> splat (i8 -1), <9 x i1> splat (i1 true), i32 %evl) ret <9 x i8> %v } @@ -454,9 +390,7 @@ define <16 x i8> @vxor_vv_v16i8_unmasked(<16 x i8> %va, <16 x i8> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -480,9 +414,7 @@ define <16 x i8> @vxor_vx_v16i8_unmasked(<16 x i8> %va, i8 %b, i32 zeroext %evl) ; CHECK-NEXT: ret %elt.head = insertelement <16 x i8> poison, i8 %b, i32 0 %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -492,9 +424,7 @@ define <16 x i8> @vxor_vi_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 7, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> splat (i8 7), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -504,11 +434,7 @@ define <16 x i8> @vxor_vi_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 7, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> splat (i8 7), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -518,9 +444,7 @@ define <16 x i8> @vxor_vi_v16i8_1(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> %m, i32 %evl) ret <16 x i8> %v } @@ -530,11 +454,7 @@ define <16 x i8> @vxor_vi_v16i8_unmasked_1(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i8> poison, i8 -1, i32 0 - %vb = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i8> @llvm.vp.xor.v16i8(<16 x i8> %va, <16 x i8> splat (i8 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i8> %v } @@ -556,9 +476,7 @@ define <2 x i16> @vxor_vv_v2i16_unmasked(<2 x i16> %va, <2 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -582,9 +500,7 @@ define <2 x i16> @vxor_vx_v2i16_unmasked(<2 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i16> poison, i16 %b, i32 0 %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -594,9 +510,7 @@ define <2 x i16> @vxor_vi_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 7, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> splat (i16 7), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -606,11 +520,7 @@ define <2 x i16> @vxor_vi_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 7, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> splat (i16 7), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -620,9 +530,7 @@ define <2 x i16> @vxor_vi_v2i16_1(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> %m, i32 %evl) ret <2 x i16> %v } @@ -632,11 +540,7 @@ define <2 x i16> @vxor_vi_v2i16_unmasked_1(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i16> @llvm.vp.xor.v2i16(<2 x i16> %va, <2 x i16> splat (i16 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i16> %v } @@ -658,9 +562,7 @@ define <4 x i16> @vxor_vv_v4i16_unmasked(<4 x i16> %va, <4 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -684,9 +586,7 @@ define <4 x i16> @vxor_vx_v4i16_unmasked(<4 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i16> poison, i16 %b, i32 0 %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -696,9 +596,7 @@ define <4 x i16> @vxor_vi_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 7, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> splat (i16 7), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -708,11 +606,7 @@ define <4 x i16> @vxor_vi_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 7, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> splat (i16 7), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -722,9 +616,7 @@ define <4 x i16> @vxor_vi_v4i16_1(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> %m, i32 %evl) ret <4 x i16> %v } @@ -734,11 +626,7 @@ define <4 x i16> @vxor_vi_v4i16_unmasked_1(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i16> @llvm.vp.xor.v4i16(<4 x i16> %va, <4 x i16> splat (i16 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -760,9 +648,7 @@ define <8 x i16> @vxor_vv_v8i16_unmasked(<8 x i16> %va, <8 x i16> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -786,9 +672,7 @@ define <8 x i16> @vxor_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i16> poison, i16 %b, i32 0 %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -798,9 +682,7 @@ define <8 x i16> @vxor_vi_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 7, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> splat (i16 7), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -810,11 +692,7 @@ define <8 x i16> @vxor_vi_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 7, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> splat (i16 7), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -824,9 +702,7 @@ define <8 x i16> @vxor_vi_v8i16_1(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> %m, i32 %evl) ret <8 x i16> %v } @@ -836,11 +712,7 @@ define <8 x i16> @vxor_vi_v8i16_unmasked_1(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i16> @llvm.vp.xor.v8i16(<8 x i16> %va, <8 x i16> splat (i16 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i16> %v } @@ -862,9 +734,7 @@ define <16 x i16> @vxor_vv_v16i16_unmasked(<16 x i16> %va, <16 x i16> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -888,9 +758,7 @@ define <16 x i16> @vxor_vx_v16i16_unmasked(<16 x i16> %va, i16 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i16> poison, i16 %b, i32 0 %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -900,9 +768,7 @@ define <16 x i16> @vxor_vi_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 7, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> splat (i16 7), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -912,11 +778,7 @@ define <16 x i16> @vxor_vi_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 7, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> splat (i16 7), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -926,9 +788,7 @@ define <16 x i16> @vxor_vi_v16i16_1(<16 x i16> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> %m, i32 %evl) ret <16 x i16> %v } @@ -938,11 +798,7 @@ define <16 x i16> @vxor_vi_v16i16_unmasked_1(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i16> poison, i16 -1, i32 0 - %vb = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i16> @llvm.vp.xor.v16i16(<16 x i16> %va, <16 x i16> splat (i16 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i16> %v } @@ -964,9 +820,7 @@ define <2 x i32> @vxor_vv_v2i32_unmasked(<2 x i32> %va, <2 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -990,9 +844,7 @@ define <2 x i32> @vxor_vx_v2i32_unmasked(<2 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <2 x i32> poison, i32 %b, i32 0 %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -1002,9 +854,7 @@ define <2 x i32> @vxor_vi_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 7, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> splat (i32 7), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -1014,11 +864,7 @@ define <2 x i32> @vxor_vi_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 7, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> splat (i32 7), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -1028,9 +874,7 @@ define <2 x i32> @vxor_vi_v2i32_1(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> %m, i32 %evl) ret <2 x i32> %v } @@ -1040,11 +884,7 @@ define <2 x i32> @vxor_vi_v2i32_unmasked_1(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i32> @llvm.vp.xor.v2i32(<2 x i32> %va, <2 x i32> splat (i32 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i32> %v } @@ -1066,9 +906,7 @@ define <4 x i32> @vxor_vv_v4i32_unmasked(<4 x i32> %va, <4 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -1092,9 +930,7 @@ define <4 x i32> @vxor_vx_v4i32_unmasked(<4 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <4 x i32> poison, i32 %b, i32 0 %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -1104,9 +940,7 @@ define <4 x i32> @vxor_vi_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 7, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> splat (i32 7), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -1116,11 +950,7 @@ define <4 x i32> @vxor_vi_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 7, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> splat (i32 7), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -1130,9 +960,7 @@ define <4 x i32> @vxor_vi_v4i32_1(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> %m, i32 %evl) ret <4 x i32> %v } @@ -1142,11 +970,7 @@ define <4 x i32> @vxor_vi_v4i32_unmasked_1(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i32> @llvm.vp.xor.v4i32(<4 x i32> %va, <4 x i32> splat (i32 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -1168,9 +992,7 @@ define <8 x i32> @vxor_vv_v8i32_unmasked(<8 x i32> %va, <8 x i32> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1194,9 +1016,7 @@ define <8 x i32> @vxor_vx_v8i32_unmasked(<8 x i32> %va, i32 %b, i32 zeroext %evl ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1206,9 +1026,7 @@ define <8 x i32> @vxor_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 7, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> splat (i32 7), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -1218,11 +1036,7 @@ define <8 x i32> @vxor_vi_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 7, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> splat (i32 7), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1232,9 +1046,7 @@ define <8 x i32> @vxor_vi_v8i32_1(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> %m, i32 %evl) ret <8 x i32> %v } @@ -1244,11 +1056,7 @@ define <8 x i32> @vxor_vi_v8i32_unmasked_1(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i32> @llvm.vp.xor.v8i32(<8 x i32> %va, <8 x i32> splat (i32 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i32> %v } @@ -1270,9 +1078,7 @@ define <16 x i32> @vxor_vv_v16i32_unmasked(<16 x i32> %va, <16 x i32> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1296,9 +1102,7 @@ define <16 x i32> @vxor_vx_v16i32_unmasked(<16 x i32> %va, i32 %b, i32 zeroext % ; CHECK-NEXT: ret %elt.head = insertelement <16 x i32> poison, i32 %b, i32 0 %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1308,9 +1112,7 @@ define <16 x i32> @vxor_vi_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 7, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> splat (i32 7), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -1320,11 +1122,7 @@ define <16 x i32> @vxor_vi_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 7, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> splat (i32 7), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1334,9 +1132,7 @@ define <16 x i32> @vxor_vi_v16i32_1(<16 x i32> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> %m, i32 %evl) ret <16 x i32> %v } @@ -1346,11 +1142,7 @@ define <16 x i32> @vxor_vi_v16i32_unmasked_1(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i32> poison, i32 -1, i32 0 - %vb = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i32> @llvm.vp.xor.v16i32(<16 x i32> %va, <16 x i32> splat (i32 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i32> %v } @@ -1372,9 +1164,7 @@ define <2 x i64> @vxor_vv_v2i64_unmasked(<2 x i64> %va, <2 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> %b, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1426,9 +1216,7 @@ define <2 x i64> @vxor_vx_v2i64_unmasked(<2 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <2 x i64> poison, i64 %b, i32 0 %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1438,9 +1226,7 @@ define <2 x i64> @vxor_vi_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 7, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> splat (i64 7), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1450,11 +1236,7 @@ define <2 x i64> @vxor_vi_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 7, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> splat (i64 7), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1464,9 +1246,7 @@ define <2 x i64> @vxor_vi_v2i64_1(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> %m, i32 %evl) ret <2 x i64> %v } @@ -1476,11 +1256,7 @@ define <2 x i64> @vxor_vi_v2i64_unmasked_1(<2 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <2 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer - %head = insertelement <2 x i1> poison, i1 true, i32 0 - %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 %evl) + %v = call <2 x i64> @llvm.vp.xor.v2i64(<2 x i64> %va, <2 x i64> splat (i64 -1), <2 x i1> splat (i1 true), i32 %evl) ret <2 x i64> %v } @@ -1502,9 +1278,7 @@ define <4 x i64> @vxor_vv_v4i64_unmasked(<4 x i64> %va, <4 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> %b, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1556,9 +1330,7 @@ define <4 x i64> @vxor_vx_v4i64_unmasked(<4 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <4 x i64> poison, i64 %b, i32 0 %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1568,9 +1340,7 @@ define <4 x i64> @vxor_vi_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 7, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> splat (i64 7), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1580,11 +1350,7 @@ define <4 x i64> @vxor_vi_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 7, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> splat (i64 7), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1594,9 +1360,7 @@ define <4 x i64> @vxor_vi_v4i64_1(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> %m, i32 %evl) ret <4 x i64> %v } @@ -1606,11 +1370,7 @@ define <4 x i64> @vxor_vi_v4i64_unmasked_1(<4 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <4 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer - %head = insertelement <4 x i1> poison, i1 true, i32 0 - %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer - %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 %evl) + %v = call <4 x i64> @llvm.vp.xor.v4i64(<4 x i64> %va, <4 x i64> splat (i64 -1), <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -1632,9 +1392,7 @@ define <8 x i64> @vxor_vv_v8i64_unmasked(<8 x i64> %va, <8 x i64> %b, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> %b, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1686,9 +1444,7 @@ define <8 x i64> @vxor_vx_v8i64_unmasked(<8 x i64> %va, i64 %b, i32 zeroext %evl ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1698,9 +1454,7 @@ define <8 x i64> @vxor_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 7, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> splat (i64 7), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1710,11 +1464,7 @@ define <8 x i64> @vxor_vi_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 7, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> splat (i64 7), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1724,9 +1474,7 @@ define <8 x i64> @vxor_vi_v8i64_1(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> %m, i32 %evl) ret <8 x i64> %v } @@ -1736,11 +1484,7 @@ define <8 x i64> @vxor_vi_v8i64_unmasked_1(<8 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <8 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer - %head = insertelement <8 x i1> poison, i1 true, i32 0 - %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer - %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 %evl) + %v = call <8 x i64> @llvm.vp.xor.v8i64(<8 x i64> %va, <8 x i64> splat (i64 -1), <8 x i1> splat (i1 true), i32 %evl) ret <8 x i64> %v } @@ -1762,9 +1506,7 @@ define <16 x i64> @vxor_vv_v16i64_unmasked(<16 x i64> %va, <16 x i64> %b, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> %b, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1816,9 +1558,7 @@ define <16 x i64> @vxor_vx_v16i64_unmasked(<16 x i64> %va, i64 %b, i32 zeroext % ; RV64-NEXT: ret %elt.head = insertelement <16 x i64> poison, i64 %b, i32 0 %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1828,9 +1568,7 @@ define <16 x i64> @vxor_vi_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 7, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> splat (i64 7), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1840,11 +1578,7 @@ define <16 x i64> @vxor_vi_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 7, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> splat (i64 7), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } @@ -1854,9 +1588,7 @@ define <16 x i64> @vxor_vi_v16i64_1(<16 x i64> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> %m, i32 %evl) ret <16 x i64> %v } @@ -1866,10 +1598,6 @@ define <16 x i64> @vxor_vi_v16i64_unmasked_1(<16 x i64> %va, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement <16 x i64> poison, i64 -1, i32 0 - %vb = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer - %head = insertelement <16 x i1> poison, i1 true, i32 0 - %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer - %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 %evl) + %v = call <16 x i64> @llvm.vp.xor.v16i64(<16 x i64> %va, <16 x i64> splat (i64 -1), <16 x i1> splat (i1 true), i32 %evl) ret <16 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp-mask.ll index d734966830480..d292978c1d5eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp-mask.ll @@ -22,7 +22,7 @@ define <4 x i16> @vzext_v4i16_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: ret - %v = call <4 x i16> @llvm.vp.zext.v4i16.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.zext.v4i16.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -46,7 +46,7 @@ define <4 x i32> @vzext_v4i32_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.zext.v4i32.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.zext.v4i32.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -70,6 +70,6 @@ define <4 x i64> @vzext_v4i64_v4i1_unmasked(<4 x i1> %va, i32 zeroext %evl) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i1(<4 x i1> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i1(<4 x i1> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll index e99a9b800d76d..f4d679cd57cac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll @@ -22,7 +22,7 @@ define <4 x i16> @vzext_v4i16_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vzext.vf2 v9, v8 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x i16> @llvm.vp.zext.v4i16.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i16> @llvm.vp.zext.v4i16.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i16> %v } @@ -46,7 +46,7 @@ define <4 x i32> @vzext_v4i32_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vzext.vf4 v9, v8 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.zext.v4i32.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.zext.v4i32.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -70,7 +70,7 @@ define <4 x i64> @vzext_v4i64_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-NEXT: vzext.vf8 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i8(<4 x i8> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i8(<4 x i8> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -94,7 +94,7 @@ define <4 x i32> @vzext_v4i32_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vzext.vf2 v9, v8 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %v = call <4 x i32> @llvm.vp.zext.v4i32.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i32> @llvm.vp.zext.v4i32.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i32> %v } @@ -118,7 +118,7 @@ define <4 x i64> @vzext_v4i64_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-NEXT: vzext.vf4 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i16(<4 x i16> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i16(<4 x i16> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -142,7 +142,7 @@ define <4 x i64> @vzext_v4i64_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-NEXT: vzext.vf2 v10, v8 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + %v = call <4 x i64> @llvm.vp.zext.v4i64.v4i32(<4 x i32> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x i64> %v } @@ -197,7 +197,7 @@ define <32 x i64> @vzext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl ; CHECK-NEXT: vzext.vf2 v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) + %v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index d1cd82d9f7c18..9c4706b2bda76 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -42,9 +42,7 @@ define @vp_floor_nxv1f16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -86,9 +84,7 @@ define @vp_floor_nxv2f16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -130,9 +126,7 @@ define @vp_floor_nxv4f16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -176,9 +170,7 @@ define @vp_floor_nxv8f16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -222,9 +214,7 @@ define @vp_floor_nxv16f16_unmasked( %va ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -268,9 +258,7 @@ define @vp_floor_nxv32f16_unmasked( %va ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -312,9 +300,7 @@ define @vp_floor_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -356,9 +342,7 @@ define @vp_floor_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -402,9 +386,7 @@ define @vp_floor_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -448,9 +430,7 @@ define @vp_floor_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -494,9 +474,7 @@ define @vp_floor_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -538,9 +516,7 @@ define @vp_floor_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -584,9 +560,7 @@ define @vp_floor_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -630,9 +604,7 @@ define @vp_floor_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -676,9 +648,7 @@ define @vp_floor_nxv7f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -722,9 +692,7 @@ define @vp_floor_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -837,8 +805,6 @@ define @vp_floor_nxv16f64_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.floor.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.floor.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index 7774e3e4775af..b78b8663eac90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -76,9 +76,7 @@ define @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -150,9 +148,7 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -226,9 +222,7 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -304,9 +298,7 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -408,9 +400,7 @@ define @vfmax_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -626,8 +616,6 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -635,6 +623,8 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -715,9 +705,7 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -751,9 +739,7 @@ define @vfmax_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -787,9 +773,7 @@ define @vfmax_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -825,9 +809,7 @@ define @vfmax_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v14 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -863,9 +845,7 @@ define @vfmax_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v20 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -899,9 +879,7 @@ define @vfmax_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -937,9 +915,7 @@ define @vfmax_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v14 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -975,9 +951,7 @@ define @vfmax_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v20 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1026,9 +1000,7 @@ define @vfmax_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1238,8 +1210,6 @@ define @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maximum.nxv16f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maximum.nxv16f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll index 4e98d0581f896..69c76152910e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll @@ -76,9 +76,7 @@ define @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -150,9 +148,7 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -226,9 +222,7 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -304,9 +298,7 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -408,9 +400,7 @@ define @vfmin_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -626,8 +616,6 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 5 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -635,6 +623,8 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -715,9 +705,7 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -751,9 +739,7 @@ define @vfmin_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -787,9 +773,7 @@ define @vfmin_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -825,9 +809,7 @@ define @vfmin_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v14 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -863,9 +845,7 @@ define @vfmin_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v20 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -899,9 +879,7 @@ define @vfmin_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v11 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -937,9 +915,7 @@ define @vfmin_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v14 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -975,9 +951,7 @@ define @vfmin_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v20 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1026,9 +1000,7 @@ define @vfmin_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1238,8 +1210,6 @@ define @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minimum.nxv16f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minimum.nxv16f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll index f773de3b518c6..1d4d554d3a47d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-vp-fadd-and-vp-fmul.ll @@ -38,9 +38,7 @@ define @fma_true( %x, poison, i1 true, i32 0 - %true = shufflevector %head, poison, zeroinitializer - %1 = call fast @llvm.vp.fmul.nxv1f64( %x, %y, %true, i32 %vl) + %1 = call fast @llvm.vp.fmul.nxv1f64( %x, %y, splat (i1 true), i32 %vl) %2 = call fast @llvm.vp.fadd.nxv1f64( %1, %z, %m, i32 %vl) ret %2 } diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll index e75ea700df4f1..a9668dff6055a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/lrint-sdnode.ll @@ -102,7 +102,26 @@ define @lrint_nxv8f32( %x) { } declare @llvm.lrint.nxv8iXLen.nxv8f32() -define @lrint_nxv16iXLen_nxv16f32( %x) { +define @lrint_nxv16f32( %x) { +; RV32-LABEL: lrint_nxv16f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8 +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv16f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8 +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv16f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v24, v8 +; RV64-i64-NEXT: vfwcvt.x.f.v v16, v12 +; RV64-i64-NEXT: vmv8r.v v8, v24 +; RV64-i64-NEXT: ret %a = call @llvm.lrint.nxv16iXLen.nxv16f32( %x) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll index 8a826fb3ac1ea..9fa8807ed4add 100644 --- a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll @@ -102,7 +102,41 @@ define @lrint_nxv8f32( %x, @llvm.vp.lrint.nxv8iXLen.nxv8f32(, , i32) -define @lrint_nxv16iXLen_nxv16f32( %x, %m, i32 zeroext %evl) { +define @lrint_nxv16f32( %x, %m, i32 zeroext %evl) { +; RV32-LABEL: lrint_nxv16f32: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV32-NEXT: ret +; +; RV64-i32-LABEL: lrint_nxv16f32: +; RV64-i32: # %bb.0: +; RV64-i32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV64-i32-NEXT: vfcvt.x.f.v v8, v8, v0.t +; RV64-i32-NEXT: ret +; +; RV64-i64-LABEL: lrint_nxv16f32: +; RV64-i64: # %bb.0: +; RV64-i64-NEXT: vmv1r.v v24, v0 +; RV64-i64-NEXT: csrr a1, vlenb +; RV64-i64-NEXT: srli a2, a1, 3 +; RV64-i64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-i64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-i64-NEXT: sub a2, a0, a1 +; RV64-i64-NEXT: sltu a3, a0, a2 +; RV64-i64-NEXT: addi a3, a3, -1 +; RV64-i64-NEXT: and a2, a3, a2 +; RV64-i64-NEXT: vsetvli zero, a2, e32, m4, ta, ma +; RV64-i64-NEXT: vfwcvt.x.f.v v16, v12, v0.t +; RV64-i64-NEXT: bltu a0, a1, .LBB4_2 +; RV64-i64-NEXT: # %bb.1: +; RV64-i64-NEXT: mv a0, a1 +; RV64-i64-NEXT: .LBB4_2: +; RV64-i64-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; RV64-i64-NEXT: vmv1r.v v0, v24 +; RV64-i64-NEXT: vfwcvt.x.f.v v24, v8, v0.t +; RV64-i64-NEXT: vmv8r.v v8, v24 +; RV64-i64-NEXT: ret %a = call @llvm.vp.lrint.nxv16iXLen.nxv16f32( %x, %m, i32 %evl) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll index 9e2a33f54f420..f8c1d5e45bc28 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll @@ -258,8 +258,6 @@ define @masked_load_allones_mask(ptr %a, %ma ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: ret - %insert = insertelement poison, i1 1, i32 0 - %mask = shufflevector %insert, poison, zeroinitializer - %load = call @llvm.masked.load.nxv2i8(ptr %a, i32 1, %mask, %maskedoff) + %load = call @llvm.masked.load.nxv2i8(ptr %a, i32 1, splat (i1 1), %maskedoff) ret %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll index a2fec5ab0798a..32414feab722a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll @@ -258,8 +258,6 @@ define void @masked_store_allones_mask( %val, ptr %a) nounwind ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - %insert = insertelement poison, i1 1, i32 0 - %mask = shufflevector %insert, poison, zeroinitializer - call void @llvm.masked.store.v2i8.p0( %val, ptr %a, i32 1, %mask) + call void @llvm.masked.store.v2i8.p0( %val, ptr %a, i32 1, splat (i1 1)) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index 0b236f6d3ff38..e12f1cf7603b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -206,9 +206,7 @@ define @mgather_truemask_nxv4i8( %ptrs, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.masked.gather.nxv4i8.nxv4p0( %ptrs, i32 1, %mtrue, %passthru) + %v = call @llvm.masked.gather.nxv4i8.nxv4p0( %ptrs, i32 1, splat (i1 1), %passthru) ret %v } @@ -429,9 +427,7 @@ define @mgather_truemask_nxv4i16( %ptrs, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.masked.gather.nxv4i16.nxv4p0( %ptrs, i32 2, %mtrue, %passthru) + %v = call @llvm.masked.gather.nxv4i16.nxv4p0( %ptrs, i32 2, splat (i1 1), %passthru) ret %v } @@ -675,9 +671,7 @@ define @mgather_truemask_nxv4i32( %ptrs, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.masked.gather.nxv4i32.nxv4p0( %ptrs, i32 4, %mtrue, %passthru) + %v = call @llvm.masked.gather.nxv4i32.nxv4p0( %ptrs, i32 4, splat (i1 1), %passthru) ret %v } @@ -940,9 +934,7 @@ define @mgather_truemask_nxv4i64( %ptrs, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.masked.gather.nxv4i64.nxv4p0( %ptrs, i32 8, %mtrue, %passthru) + %v = call @llvm.masked.gather.nxv4i64.nxv4p0( %ptrs, i32 8, splat (i1 1), %passthru) ret %v } @@ -1340,9 +1332,7 @@ define @mgather_truemask_nxv4f16( %ptrs, < ; RV64-NEXT: vluxei64.v v12, (zero), v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.masked.gather.nxv4f16.nxv4p0( %ptrs, i32 2, %mtrue, %passthru) + %v = call @llvm.masked.gather.nxv4f16.nxv4p0( %ptrs, i32 2, splat (i1 1), %passthru) ret %v } @@ -1542,9 +1532,7 @@ define @mgather_truemask_nxv4f32( %ptrs, ; RV64-NEXT: vluxei64.v v12, (zero), v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.masked.gather.nxv4f32.nxv4p0( %ptrs, i32 4, %mtrue, %passthru) + %v = call @llvm.masked.gather.nxv4f32.nxv4p0( %ptrs, i32 4, splat (i1 1), %passthru) ret %v } @@ -1807,9 +1795,7 @@ define @mgather_truemask_nxv4f64( %ptrs, ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (zero), v8 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.masked.gather.nxv4f64.nxv4p0( %ptrs, i32 8, %mtrue, %passthru) + %v = call @llvm.masked.gather.nxv4f64.nxv4p0( %ptrs, i32 8, splat (i1 1), %passthru) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 652e7a128a960..0e09f59b6a20f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -139,9 +139,7 @@ define void @mscatter_truemask_nxv4i8( %val, ; RV64-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v12 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.masked.scatter.nxv4i8.nxv4p0( %val, %ptrs, i32 1, %mtrue) + call void @llvm.masked.scatter.nxv4i8.nxv4p0( %val, %ptrs, i32 1, splat (i1 1)) ret void } @@ -300,9 +298,7 @@ define void @mscatter_truemask_nxv4i16( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.masked.scatter.nxv4i16.nxv4p0( %val, %ptrs, i32 2, %mtrue) + call void @llvm.masked.scatter.nxv4i16.nxv4p0( %val, %ptrs, i32 2, splat (i1 1)) ret void } @@ -499,9 +495,7 @@ define void @mscatter_truemask_nxv4i32( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.masked.scatter.nxv4i32.nxv4p0( %val, %ptrs, i32 4, %mtrue) + call void @llvm.masked.scatter.nxv4i32.nxv4p0( %val, %ptrs, i32 4, splat (i1 1)) ret void } @@ -737,9 +731,7 @@ define void @mscatter_truemask_nxv4i64( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.masked.scatter.nxv4i64.nxv4p0( %val, %ptrs, i32 8, %mtrue) + call void @llvm.masked.scatter.nxv4i64.nxv4p0( %val, %ptrs, i32 8, splat (i1 1)) ret void } @@ -1041,9 +1033,7 @@ define void @mscatter_truemask_nxv4f16( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.masked.scatter.nxv4f16.nxv4p0( %val, %ptrs, i32 2, %mtrue) + call void @llvm.masked.scatter.nxv4f16.nxv4p0( %val, %ptrs, i32 2, splat (i1 1)) ret void } @@ -1221,9 +1211,7 @@ define void @mscatter_truemask_nxv4f32( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.masked.scatter.nxv4f32.nxv4p0( %val, %ptrs, i32 4, %mtrue) + call void @llvm.masked.scatter.nxv4f32.nxv4p0( %val, %ptrs, i32 4, splat (i1 1)) ret void } @@ -1459,9 +1447,7 @@ define void @mscatter_truemask_nxv4f64( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.masked.scatter.nxv4f64.nxv4p0( %val, %ptrs, i32 8, %mtrue) + call void @llvm.masked.scatter.nxv4f64.nxv4p0( %val, %ptrs, i32 8, splat (i1 1)) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll index f608a63d6bb9b..e47517abacb4d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll +++ b/llvm/test/CodeGen/RISCV/rvv/narrow-shift-extend.ll @@ -18,9 +18,7 @@ define @test_vloxei(ptr %ptr, %offset, i64 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( undef, ptr %ptr, @@ -40,9 +38,7 @@ define @test_vloxei2(ptr %ptr, %offset, i64 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 14, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 14) %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( undef, ptr %ptr, @@ -62,9 +58,7 @@ define @test_vloxei3(ptr %ptr, %offset, i64 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 26, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 26) %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( undef, ptr %ptr, @@ -87,9 +81,7 @@ define @test_vloxei4(ptr %ptr, %offset, @llvm.vp.zext.nxvi64.nxv1i8( %offset, %m, i32 %vl) - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) %vl.i64 = zext i32 %vl to i64 %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( undef, @@ -116,9 +108,7 @@ define @test_vloxei5(ptr %ptr, %offset, i64 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i16 12, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i16 12) %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i16( undef, ptr %ptr, @@ -141,9 +131,7 @@ define @test_vloxei6(ptr %ptr, %offset, i64 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( undef, ptr %ptr, @@ -164,9 +152,7 @@ define @test_vloxei7(ptr %ptr, %offset, i64 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 2, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 2) %res = call @llvm.riscv.vloxei.nxv4i32.nxv4i64( undef, ptr %ptr, @@ -194,9 +180,7 @@ define @test_vloxei_mask(ptr %ptr, %offset, ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) %res = call @llvm.riscv.vloxei.mask.nxv4i32.nxv4i64( undef, ptr %ptr, @@ -223,9 +207,7 @@ define @test_vluxei(ptr %ptr, %offset, i64 ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) %res = call @llvm.riscv.vluxei.nxv4i32.nxv4i64( undef, ptr %ptr, @@ -253,9 +235,7 @@ define @test_vluxei_mask(ptr %ptr, %offset, ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) %res = call @llvm.riscv.vluxei.mask.nxv4i32.nxv4i64( undef, ptr %ptr, @@ -282,9 +262,7 @@ define void @test_vsoxei( %val, ptr %ptr, %o ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) call void @llvm.riscv.vsoxei.nxv4i32.nxv4i64( %val, ptr %ptr, @@ -311,9 +289,7 @@ define void @test_vsoxei_mask( %val, ptr %ptr, %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) call void @llvm.riscv.vsoxei.mask.nxv4i32.nxv4i64( %val, ptr %ptr, @@ -340,9 +316,7 @@ define void @test_vsuxei( %val, ptr %ptr, %o ; CHECK-NEXT: ret entry: %offset.ext = zext %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) call void @llvm.riscv.vsuxei.nxv4i32.nxv4i64( %val, ptr %ptr, @@ -369,9 +343,7 @@ define void @test_vsuxei_mask( %val, ptr %ptr, %offset to - %shamt = insertelement undef, i64 4, i32 0 - %shamt.vec = shufflevector %shamt, poison, zeroinitializer - %shl = shl %offset.ext, %shamt.vec + %shl = shl %offset.ext, splat (i64 4) call void @llvm.riscv.vsuxei.mask.nxv4i32.nxv4i64( %val, ptr %ptr, diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 126836cd9390b..8bc2334282653 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -86,9 +86,7 @@ define @vp_nearbyint_nxv1f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -170,9 +168,7 @@ define @vp_nearbyint_nxv2f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -256,9 +252,7 @@ define @vp_nearbyint_nxv4f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -344,9 +338,7 @@ define @vp_nearbyint_nxv8f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -432,9 +424,7 @@ define @vp_nearbyint_nxv16f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -554,8 +544,6 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -563,6 +551,8 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -609,9 +599,7 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -653,9 +641,7 @@ define @vp_nearbyint_nxv1f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -697,9 +683,7 @@ define @vp_nearbyint_nxv2f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -743,9 +727,7 @@ define @vp_nearbyint_nxv4f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -789,9 +771,7 @@ define @vp_nearbyint_nxv8f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -835,9 +815,7 @@ define @vp_nearbyint_nxv16f32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -879,9 +857,7 @@ define @vp_nearbyint_nxv1f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -925,9 +901,7 @@ define @vp_nearbyint_nxv2f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -971,9 +945,7 @@ define @vp_nearbyint_nxv4f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1017,9 +989,7 @@ define @vp_nearbyint_nxv7f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1063,9 +1033,7 @@ define @vp_nearbyint_nxv8f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1189,8 +1157,6 @@ define @vp_nearbyint_nxv16f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.nearbyint.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.nearbyint.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index 04761d4e7bfc4..f934127f978dc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -78,9 +78,7 @@ define @vp_rint_nxv1f16_unmasked( %va, i3 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -154,9 +152,7 @@ define @vp_rint_nxv2f16_unmasked( %va, i3 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -232,9 +228,7 @@ define @vp_rint_nxv4f16_unmasked( %va, i3 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -312,9 +306,7 @@ define @vp_rint_nxv8f16_unmasked( %va, i3 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -392,9 +384,7 @@ define @vp_rint_nxv16f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -507,8 +497,6 @@ define @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -516,6 +504,8 @@ define @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -558,9 +548,7 @@ define @vp_rint_nxv32f16_unmasked( %va, ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -598,9 +586,7 @@ define @vp_rint_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -638,9 +624,7 @@ define @vp_rint_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -680,9 +664,7 @@ define @vp_rint_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -722,9 +704,7 @@ define @vp_rint_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -764,9 +744,7 @@ define @vp_rint_nxv16f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -804,9 +782,7 @@ define @vp_rint_nxv1f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -846,9 +822,7 @@ define @vp_rint_nxv2f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -888,9 +862,7 @@ define @vp_rint_nxv4f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -930,9 +902,7 @@ define @vp_rint_nxv7f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -972,9 +942,7 @@ define @vp_rint_nxv8f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1079,8 +1047,6 @@ define @vp_rint_nxv16f64_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.rint.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.rint.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 16bd665dd0de9..eb4994914fad9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -86,9 +86,7 @@ define @vp_round_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -170,9 +168,7 @@ define @vp_round_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -256,9 +252,7 @@ define @vp_round_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -344,9 +338,7 @@ define @vp_round_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -432,9 +424,7 @@ define @vp_round_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -555,8 +545,6 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -564,6 +552,8 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -610,9 +600,7 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -654,9 +642,7 @@ define @vp_round_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -698,9 +684,7 @@ define @vp_round_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -744,9 +728,7 @@ define @vp_round_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -790,9 +772,7 @@ define @vp_round_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -836,9 +816,7 @@ define @vp_round_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -880,9 +858,7 @@ define @vp_round_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +902,7 @@ define @vp_round_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -972,9 +946,7 @@ define @vp_round_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1018,9 +990,7 @@ define @vp_round_nxv7f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1064,9 +1034,7 @@ define @vp_round_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1179,8 +1147,6 @@ define @vp_round_nxv16f64_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.round.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.round.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index 429ddb6c71be3..f366a2922d079 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -86,9 +86,7 @@ define @vp_roundeven_nxv1f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -170,9 +168,7 @@ define @vp_roundeven_nxv2f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -256,9 +252,7 @@ define @vp_roundeven_nxv4f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -344,9 +338,7 @@ define @vp_roundeven_nxv8f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -432,9 +424,7 @@ define @vp_roundeven_nxv16f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -555,8 +545,6 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -564,6 +552,8 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -610,9 +600,7 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -654,9 +642,7 @@ define @vp_roundeven_nxv1f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -698,9 +684,7 @@ define @vp_roundeven_nxv2f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -744,9 +728,7 @@ define @vp_roundeven_nxv4f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -790,9 +772,7 @@ define @vp_roundeven_nxv8f32_unmasked( ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -836,9 +816,7 @@ define @vp_roundeven_nxv16f32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -880,9 +858,7 @@ define @vp_roundeven_nxv1f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +902,7 @@ define @vp_roundeven_nxv2f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -972,9 +946,7 @@ define @vp_roundeven_nxv4f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1018,9 +990,7 @@ define @vp_roundeven_nxv7f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1064,9 +1034,7 @@ define @vp_roundeven_nxv8f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1179,8 +1147,6 @@ define @vp_roundeven_nxv16f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundeven.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundeven.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index c854e0fb8a05d..79c940bdf089a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -86,9 +86,7 @@ define @vp_roundtozero_nxv1f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -170,9 +168,7 @@ define @vp_roundtozero_nxv2f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -256,9 +252,7 @@ define @vp_roundtozero_nxv4f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -344,9 +338,7 @@ define @vp_roundtozero_nxv8f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -432,9 +424,7 @@ define @vp_roundtozero_nxv16f16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -555,8 +545,6 @@ define @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -654,9 +642,7 @@ define @vp_roundtozero_nxv1f32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -698,9 +684,7 @@ define @vp_roundtozero_nxv2f32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -744,9 +728,7 @@ define @vp_roundtozero_nxv4f32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -790,9 +772,7 @@ define @vp_roundtozero_nxv8f32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -836,9 +816,7 @@ define @vp_roundtozero_nxv16f32_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -880,9 +858,7 @@ define @vp_roundtozero_nxv1f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +902,7 @@ define @vp_roundtozero_nxv2f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -972,9 +946,7 @@ define @vp_roundtozero_nxv4f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1018,9 +990,7 @@ define @vp_roundtozero_nxv7f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1064,9 +1034,7 @@ define @vp_roundtozero_nxv8f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -1179,8 +1147,6 @@ define @vp_roundtozero_nxv16f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.roundtozero.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.roundtozero.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll index 5f381a307099d..8cefbac59ce67 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-masked-vops.ll @@ -11,9 +11,7 @@ define @vpmerge_vadd( %passthru, @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 %vl, i64 1) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } declare @llvm.riscv.vadd.mask.nxv2i32.nxv2i32(, , , , i64, i64) @@ -25,9 +23,7 @@ define @vpmerge_vsub( %passthru, @llvm.riscv.vsub.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 %vl, i64 1) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } declare @llvm.riscv.vsub.mask.nxv2i32.nxv2i32(, , , , i64, i64) @@ -39,9 +35,7 @@ define @vpmerge_vfadd( %passthru, @llvm.riscv.vfadd.mask.nxv2f32.nxv2f32( %passthru, %x, %y, %m, i64 7, i64 %vl, i64 1) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } @@ -54,9 +48,7 @@ define @vpmerge_vfsub( %passthru, @llvm.riscv.vfsub.mask.nxv2f32.nxv2f32( %passthru, %x, %y, %m, i64 7, i64 %vl, i64 1) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } declare @llvm.riscv.vfsub.mask.nxv2f32.nxv2f32(, , , , i64, i64, i64) @@ -68,9 +60,7 @@ define @vpmerge_vwadd( %passthru, @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.nxv2i16( %passthru, %x, %y, %m, i64 %vl, i64 1) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } declare @llvm.riscv.vwadd.mask.nxv2i32.nxv2i16.nxv2i16(, , , , i64, i64) @@ -86,9 +76,7 @@ define @vpmerge_vle( %passthru, ptr %p, %m, i64 %vl, i64 1) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } declare @llvm.riscv.vle.mask.nxv2i32(, ptr, , i64, i64) @@ -101,9 +89,7 @@ define @vpmerge_vslideup( %passthru, @llvm.riscv.vslideup.mask.nxv2i32( %passthru, %v, i64 %x, %m, i64 %vl, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } @@ -115,9 +101,7 @@ define @vpmerge_vslidedown( %passthru, @llvm.riscv.vslidedown.mask.nxv2i32( %passthru, %v, i64 %x, %m, i64 %vl, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } @@ -129,9 +113,7 @@ define @vpmerge_vslide1up( %passthru, @llvm.riscv.vslide1up.mask.nxv2i32( %passthru, %v, i32 %x, %m, i64 %vl, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } @@ -143,9 +125,7 @@ define @vpmerge_vslide1down( %passthru, @llvm.riscv.vslide1down.mask.nxv2i32( %passthru, %v, i32 %x, %m, i64 %vl, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } @@ -159,9 +139,7 @@ define @vmerge_smaller_vl_same_passthru( %p ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 3, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 2) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 2) ret %b } @@ -173,9 +151,7 @@ define @vmerge_larger_vl_same_passthru( %pa ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %passthru, %x, %y, %m, i64 2, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 3) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 3) ret %b } @@ -190,9 +166,7 @@ define @vmerge_smaller_vl_different_passthru( @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %pt1, %x, %y, %m, i64 3, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, %mask, i64 2) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, splat (i1 -1), i64 2) ret %b } @@ -207,9 +181,7 @@ define @vmerge_larger_vl_different_passthru( @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( %pt1, %x, %y, %m, i64 2, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, %mask, i64 3) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %pt2, %pt2, %a, splat (i1 -1), i64 3) ret %b } @@ -221,9 +193,7 @@ define @vmerge_smaller_vl_poison_passthru( ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( poison, %x, %y, %m, i64 3, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 2) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 2) ret %b } @@ -235,9 +205,7 @@ define @vmerge_larger_vl_poison_passthru( % ; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %a = call @llvm.riscv.vadd.mask.nxv2i32.nxv2i32( poison, %x, %y, %m, i64 2, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 3) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 3) ret %b } @@ -269,8 +237,6 @@ define @vpmerge_viota( %passthru, @llvm.riscv.viota.mask.nxv2i32( undef, %vm, %m, i64 %1, i64 0) - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %1) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %1) ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll index 52bd15742ef4b..31fd5bdbd31fd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll @@ -19,9 +19,7 @@ define void @vpmerge_vpload_store( %passthru, ptr %p, ) into %ir.p) ; CHECK-NEXT: PseudoRET - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, %mask, i32 %vl) + %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) store %b, ptr %p ret void @@ -40,9 +38,7 @@ define void @vpselect_vpload_store( %passthru, ptr %p, ) into %ir.p) ; CHECK-NEXT: PseudoRET - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, %mask, i32 %vl) + %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) store %b, ptr %p ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 7cc4a9da3d429..970581b4d80a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -14,9 +14,7 @@ define @vpmerge_vpadd( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) + %a = call @llvm.vp.add.nxv2i32( %x, %y, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -31,10 +29,8 @@ define @vpmerge_vpadd2( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) - %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) + %a = call @llvm.vp.add.nxv2i32( %x, %y, splat (i1 -1), i32 %vl) + %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -46,10 +42,8 @@ define @vpmerge_vpadd3( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) - %b = call @llvm.vp.merge.nxv2i32( %mask, %a, %passthru, i32 %vl) + %a = call @llvm.vp.add.nxv2i32( %x, %y, splat (i1 -1), i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( splat (i1 -1), %a, %passthru, i32 %vl) ret %b } @@ -61,9 +55,7 @@ define @vpmerge_vpfadd( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.fadd.nxv2f32( %x, %y, %mask, i32 %vl) + %a = call @llvm.vp.fadd.nxv2f32( %x, %y, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } @@ -90,9 +82,7 @@ define @vpmerge_vpfptosi( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.fptosi.nxv2i16.nxv2f32( %x, %mask, i32 %vl) + %a = call @llvm.vp.fptosi.nxv2i16.nxv2f32( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i16( %m, %a, %passthru, i32 %vl) ret %b } @@ -105,9 +95,7 @@ define @vpmerge_vpsitofp( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.sitofp.nxv2f32.nxv2i64( %x, %mask, i32 %vl) + %a = call @llvm.vp.sitofp.nxv2f32.nxv2i64( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } @@ -120,9 +108,7 @@ define @vpmerge_vpzext( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, %mask, i32 %vl) + %a = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -135,9 +121,7 @@ define @vpmerge_vptrunc( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.trunc.nxv2i32.nxv2i64( %x, %mask, i32 %vl) + %a = call @llvm.vp.trunc.nxv2i32.nxv2i64( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -150,9 +134,7 @@ define @vpmerge_vpfpext( %passthru, < ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.fpext.nxv2f64.nxv2f32( %x, %mask, i32 %vl) + %a = call @llvm.vp.fpext.nxv2f64.nxv2f32( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2f64( %m, %a, %passthru, i32 %vl) ret %b } @@ -165,9 +147,7 @@ define @vpmerge_vpfptrunc( %passthru, < ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( %x, %mask, i32 %vl) + %a = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } @@ -180,9 +160,7 @@ define @vpmerge_vpload( %passthru, ptr %p, ; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, %mask, i32 %vl) + %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -196,10 +174,8 @@ define @vpmerge_vpload2( %passthru, ptr %p, ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, %mask, i32 %vl) - %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) + %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) + %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -212,9 +188,7 @@ define void @vpmerge_vpload_store( %passthru, ptr %p, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, %mask, i32 %vl) + %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) %b = call @llvm.vp.merge.nxv2i32( %m, %a, %passthru, i32 %vl) store %b, ptr %p ret void @@ -304,9 +278,7 @@ define @vpmerge_viota2( %passthru, @llvm.riscv.viota.nxv2i32( undef, %vm, i64 %1) - %splat = insertelement poison, i1 -1, i32 0 - %true = shufflevector %splat, poison, zeroinitializer - %b = call @llvm.vp.merge.nxv2i32( %true, %a, %passthru, i32 %vl) + %b = call @llvm.vp.merge.nxv2i32( splat (i1 -1), %a, %passthru, i32 %vl) ret %b } @@ -494,9 +466,7 @@ define @vpselect_vpadd( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) + %a = call @llvm.vp.add.nxv2i32( %x, %y, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -509,10 +479,8 @@ define @vpselect_vpadd2( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) - %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) + %a = call @llvm.vp.add.nxv2i32( %x, %y, splat (i1 -1), i32 %vl) + %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -524,10 +492,8 @@ define @vpselect_vpadd3( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.add.nxv2i32( %x, %y, %mask, i32 %vl) - %b = call @llvm.vp.select.nxv2i32( %mask, %a, %passthru, i32 %vl) + %a = call @llvm.vp.add.nxv2i32( %x, %y, splat (i1 -1), i32 %vl) + %b = call @llvm.vp.select.nxv2i32( splat (i1 -1), %a, %passthru, i32 %vl) ret %b } @@ -538,9 +504,7 @@ define @vpselect_vpfadd( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.fadd.nxv2f32( %x, %y, %mask, i32 %vl) + %a = call @llvm.vp.fadd.nxv2f32( %x, %y, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } @@ -565,9 +529,7 @@ define @vpselect_vpfptosi( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.fptosi.nxv2i16.nxv2f32( %x, %mask, i32 %vl) + %a = call @llvm.vp.fptosi.nxv2i16.nxv2f32( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i16( %m, %a, %passthru, i32 %vl) ret %b } @@ -579,9 +541,7 @@ define @vpselect_vpsitofp( %passthru, < ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfncvt.f.x.w v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.sitofp.nxv2f32.nxv2i64( %x, %mask, i32 %vl) + %a = call @llvm.vp.sitofp.nxv2f32.nxv2i64( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } @@ -593,9 +553,7 @@ define @vpselect_vpzext( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, %mask, i32 %vl) + %a = call @llvm.vp.zext.nxv2i32.nxv2i8( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -607,9 +565,7 @@ define @vpselect_vptrunc( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.trunc.nxv2i32.nxv2i64( %x, %mask, i32 %vl) + %a = call @llvm.vp.trunc.nxv2i32.nxv2i64( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -621,9 +577,7 @@ define @vpselect_vpfpext( %passthru, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.fpext.nxv2f64.nxv2f32( %x, %mask, i32 %vl) + %a = call @llvm.vp.fpext.nxv2f64.nxv2f32( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2f64( %m, %a, %passthru, i32 %vl) ret %b } @@ -635,9 +589,7 @@ define @vpselect_vpfptrunc( %passthru, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vfncvt.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( %x, %mask, i32 %vl) + %a = call @llvm.vp.fptrunc.nxv2f32.nxv2f64( %x, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2f32( %m, %a, %passthru, i32 %vl) ret %b } @@ -649,9 +601,7 @@ define @vpselect_vpload( %passthru, ptr %p, ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, %mask, i32 %vl) + %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -664,10 +614,8 @@ define @vpselect_vpload2( %passthru, ptr %p ; CHECK-NEXT: vmseq.vv v0, v9, v10 ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, %mask, i32 %vl) - %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", %mask, i32 %vl) + %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) + %m = call @llvm.vp.icmp.nxv2i32( %x, %y, metadata !"eq", splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) ret %b } @@ -680,9 +628,7 @@ define void @vpselect_vpload_store( %passthru, ptr %p, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, %mask, i32 %vl) + %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) %b = call @llvm.vp.select.nxv2i32( %m, %a, %passthru, i32 %vl) store %b, ptr %p ret void @@ -1052,15 +998,12 @@ define @vredsum_allones_mask( %passthru, poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.riscv.vredsum.nxv2i32.nxv2i32( %passthru, %x, %y, i64 %vl) - %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } @@ -1072,15 +1015,12 @@ define @vfredusum_allones_mask( %passth ; CHECK-NEXT: vfredusum.vs v8, v9, v10 ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %mask = shufflevector %splat, poison, zeroinitializer - %a = call @llvm.riscv.vfredusum.nxv2f32.nxv2f32( %passthru, %x, %y, i64 0, i64 %vl) - %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %mask, i64 %vl) + %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, splat (i1 -1), i64 %vl) ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll index bd431186f0562..3aeb4e864627c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vmerge-to-vmv.ll @@ -7,9 +7,7 @@ define @vpmerge_mf8( %x, %y ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, tu, ma ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i8 0 - %allones = shufflevector %splat, poison, zeroinitializer - %1 = call @llvm.vp.merge.nxv1i8( %allones, %y, %x, i32 %vl) + %1 = call @llvm.vp.merge.nxv1i8( splat (i1 -1), %y, %x, i32 %vl) ret %1 } @@ -19,9 +17,7 @@ define @vpmerge_mf4( %x, %y ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, tu, ma ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i8 0 - %allones = shufflevector %splat, poison, zeroinitializer - %1 = call @llvm.vp.merge.nxv2i8( %allones, %y, %x, i32 %vl) + %1 = call @llvm.vp.merge.nxv2i8( splat (i1 -1), %y, %x, i32 %vl) ret %1 } @@ -31,9 +27,7 @@ define @vpmerge_mf2( %x, %y ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i8 0 - %allones = shufflevector %splat, poison, zeroinitializer - %1 = call @llvm.vp.merge.nxv4i8( %allones, %y, %x, i32 %vl) + %1 = call @llvm.vp.merge.nxv4i8( splat (i1 -1), %y, %x, i32 %vl) ret %1 } @@ -43,9 +37,7 @@ define @vpmerge_m1( %x, %y, ; CHECK-NEXT: vsetvli zero, a0, e8, m1, tu, ma ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i8 0 - %allones = shufflevector %splat, poison, zeroinitializer - %1 = call @llvm.vp.merge.nxv8i8( %allones, %y, %x, i32 %vl) + %1 = call @llvm.vp.merge.nxv8i8( splat (i1 -1), %y, %x, i32 %vl) ret %1 } @@ -55,9 +47,7 @@ define @vpmerge_m2( %x, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i16 0 - %allones = shufflevector %splat, poison, zeroinitializer - %1 = call @llvm.vp.merge.nxv8i16( %allones, %y, %x, i32 %vl) + %1 = call @llvm.vp.merge.nxv8i16( splat (i1 -1), %y, %x, i32 %vl) ret %1 } @@ -67,9 +57,7 @@ define @vpmerge_m4( %x, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %1 = call @llvm.vp.merge.nxv8i32( %allones, %y, %x, i32 %vl) + %1 = call @llvm.vp.merge.nxv8i32( splat (i1 -1), %y, %x, i32 %vl) ret %1 } @@ -79,9 +67,7 @@ define @vpmerge_m8( %x, ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i64 0 - %allones = shufflevector %splat, poison, zeroinitializer - %1 = call @llvm.vp.merge.nxv8i64( %allones, %y, %x, i32 %vl) + %1 = call @llvm.vp.merge.nxv8i64( splat (i1 -1), %y, %x, i32 %vl) ret %1 } diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index b1f1a4dceccfb..7fd77c050b295 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -49,9 +49,7 @@ define @icmp_eq_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"eq", %m, i32 %evl) ret %v } @@ -61,9 +59,7 @@ define @icmp_eq_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"eq", %m, i32 %evl) ret %v } @@ -107,9 +103,7 @@ define @icmp_ne_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"ne", %m, i32 %evl) ret %v } @@ -119,9 +113,7 @@ define @icmp_ne_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"ne", %m, i32 %evl) ret %v } @@ -165,9 +157,7 @@ define @icmp_ugt_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"ugt", %m, i32 %evl) ret %v } @@ -177,9 +167,7 @@ define @icmp_ugt_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"ugt", %m, i32 %evl) ret %v } @@ -225,9 +213,7 @@ define @icmp_uge_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"uge", %m, i32 %evl) ret %v } @@ -237,9 +223,7 @@ define @icmp_uge_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"uge", %m, i32 %evl) ret %v } @@ -283,9 +267,7 @@ define @icmp_ult_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"ult", %m, i32 %evl) ret %v } @@ -295,9 +277,7 @@ define @icmp_ult_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"ult", %m, i32 %evl) ret %v } @@ -341,9 +321,7 @@ define @icmp_sgt_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"sgt", %m, i32 %evl) ret %v } @@ -353,9 +331,7 @@ define @icmp_sgt_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"sgt", %m, i32 %evl) ret %v } @@ -401,9 +377,7 @@ define @icmp_sge_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"sge", %m, i32 %evl) ret %v } @@ -413,9 +387,7 @@ define @icmp_sge_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"sge", %m, i32 %evl) ret %v } @@ -459,9 +431,7 @@ define @icmp_slt_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"slt", %m, i32 %evl) ret %v } @@ -471,9 +441,7 @@ define @icmp_slt_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"slt", %m, i32 %evl) ret %v } @@ -519,9 +487,7 @@ define @icmp_sle_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %va, %vb, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( %va, splat (i8 4), metadata !"sle", %m, i32 %evl) ret %v } @@ -531,9 +497,7 @@ define @icmp_sle_vi_swap_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i8( %vb, %va, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i8( splat (i8 4), %va, metadata !"sle", %m, i32 %evl) ret %v } @@ -665,9 +629,7 @@ define @icmp_eq_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"eq", %m, i32 %evl) ret %v } @@ -677,9 +639,7 @@ define @icmp_eq_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"eq", %m, i32 %evl) ret %v } @@ -723,9 +683,7 @@ define @icmp_ne_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"ne", %m, i32 %evl) ret %v } @@ -735,9 +693,7 @@ define @icmp_ne_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"ne", %m, i32 %evl) ret %v } @@ -781,9 +737,7 @@ define @icmp_ugt_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"ugt", %m, i32 %evl) ret %v } @@ -793,9 +747,7 @@ define @icmp_ugt_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"ugt", %m, i32 %evl) ret %v } @@ -841,9 +793,7 @@ define @icmp_uge_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"uge", %m, i32 %evl) ret %v } @@ -853,9 +803,7 @@ define @icmp_uge_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"uge", %m, i32 %evl) ret %v } @@ -899,9 +847,7 @@ define @icmp_ult_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"ult", %m, i32 %evl) ret %v } @@ -911,9 +857,7 @@ define @icmp_ult_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"ult", %m, i32 %evl) ret %v } @@ -957,9 +901,7 @@ define @icmp_sgt_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"sgt", %m, i32 %evl) ret %v } @@ -969,9 +911,7 @@ define @icmp_sgt_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"sgt", %m, i32 %evl) ret %v } @@ -1017,9 +957,7 @@ define @icmp_sge_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"sge", %m, i32 %evl) ret %v } @@ -1029,9 +967,7 @@ define @icmp_sge_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"sge", %m, i32 %evl) ret %v } @@ -1075,9 +1011,7 @@ define @icmp_slt_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"slt", %m, i32 %evl) ret %v } @@ -1087,9 +1021,7 @@ define @icmp_slt_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"slt", %m, i32 %evl) ret %v } @@ -1135,9 +1067,7 @@ define @icmp_sle_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %va, %vb, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( %va, splat (i8 4), metadata !"sle", %m, i32 %evl) ret %v } @@ -1147,9 +1077,7 @@ define @icmp_sle_vi_swap_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i8( %vb, %va, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i8( splat (i8 4), %va, metadata !"sle", %m, i32 %evl) ret %v } @@ -1312,9 +1240,7 @@ define @icmp_eq_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"eq", %m, i32 %evl) ret %v } @@ -1324,9 +1250,7 @@ define @icmp_eq_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"eq", %m, i32 %evl) ret %v } @@ -1370,9 +1294,7 @@ define @icmp_ne_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"ne", %m, i32 %evl) ret %v } @@ -1382,9 +1304,7 @@ define @icmp_ne_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"ne", %m, i32 %evl) ret %v } @@ -1428,9 +1348,7 @@ define @icmp_ugt_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"ugt", %m, i32 %evl) ret %v } @@ -1440,9 +1358,7 @@ define @icmp_ugt_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"ugt", %m, i32 %evl) ret %v } @@ -1488,9 +1404,7 @@ define @icmp_uge_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"uge", %m, i32 %evl) ret %v } @@ -1500,9 +1414,7 @@ define @icmp_uge_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"uge", %m, i32 %evl) ret %v } @@ -1546,9 +1458,7 @@ define @icmp_ult_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"ult", %m, i32 %evl) ret %v } @@ -1558,9 +1468,7 @@ define @icmp_ult_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"ult", %m, i32 %evl) ret %v } @@ -1604,9 +1512,7 @@ define @icmp_sgt_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"sgt", %m, i32 %evl) ret %v } @@ -1616,9 +1522,7 @@ define @icmp_sgt_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"sgt", %m, i32 %evl) ret %v } @@ -1664,9 +1568,7 @@ define @icmp_sge_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"sge", %m, i32 %evl) ret %v } @@ -1676,9 +1578,7 @@ define @icmp_sge_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"sge", %m, i32 %evl) ret %v } @@ -1722,9 +1622,7 @@ define @icmp_slt_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"slt", %m, i32 %evl) ret %v } @@ -1734,9 +1632,7 @@ define @icmp_slt_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"slt", %m, i32 %evl) ret %v } @@ -1782,9 +1678,7 @@ define @icmp_sle_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %va, %vb, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( %va, splat (i32 4), metadata !"sle", %m, i32 %evl) ret %v } @@ -1794,9 +1688,7 @@ define @icmp_sle_vi_swap_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i32( %vb, %va, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i32( splat (i32 4), %va, metadata !"sle", %m, i32 %evl) ret %v } @@ -1846,9 +1738,7 @@ define @icmp_eq_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"eq", %m, i32 %evl) ret %v } @@ -1859,9 +1749,7 @@ define @icmp_eq_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"eq", %m, i32 %evl) ret %v } @@ -1909,9 +1797,7 @@ define @icmp_ne_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"ne", %m, i32 %evl) ret %v } @@ -1922,9 +1808,7 @@ define @icmp_ne_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"ne", %m, i32 %evl) ret %v } @@ -1972,9 +1856,7 @@ define @icmp_ugt_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"ugt", %m, i32 %evl) ret %v } @@ -1985,9 +1867,7 @@ define @icmp_ugt_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"ugt", %m, i32 %evl) ret %v } @@ -2037,9 +1917,7 @@ define @icmp_uge_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"uge", %m, i32 %evl) ret %v } @@ -2050,9 +1928,7 @@ define @icmp_uge_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"uge", %m, i32 %evl) ret %v } @@ -2100,9 +1976,7 @@ define @icmp_ult_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"ult", %m, i32 %evl) ret %v } @@ -2113,9 +1987,7 @@ define @icmp_ult_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"ult", %m, i32 %evl) ret %v } @@ -2163,9 +2035,7 @@ define @icmp_sgt_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"sgt", %m, i32 %evl) ret %v } @@ -2176,9 +2046,7 @@ define @icmp_sgt_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"sgt", %m, i32 %evl) ret %v } @@ -2228,9 +2096,7 @@ define @icmp_sge_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"sge", %m, i32 %evl) ret %v } @@ -2241,9 +2107,7 @@ define @icmp_sge_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"sge", %m, i32 %evl) ret %v } @@ -2291,9 +2155,7 @@ define @icmp_slt_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"slt", %m, i32 %evl) ret %v } @@ -2304,9 +2166,7 @@ define @icmp_slt_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"slt", %m, i32 %evl) ret %v } @@ -2356,9 +2216,7 @@ define @icmp_sle_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %va, %vb, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( %va, splat (i32 4), metadata !"sle", %m, i32 %evl) ret %v } @@ -2369,9 +2227,7 @@ define @icmp_sle_vi_swap_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i32( %vb, %va, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i32( splat (i32 4), %va, metadata !"sle", %m, i32 %evl) ret %v } @@ -2572,9 +2428,7 @@ define @icmp_eq_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"eq", %m, i32 %evl) ret %v } @@ -2584,9 +2438,7 @@ define @icmp_eq_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"eq", %m, i32 %evl) ret %v } @@ -2658,9 +2510,7 @@ define @icmp_ne_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"ne", %m, i32 %evl) ret %v } @@ -2670,9 +2520,7 @@ define @icmp_ne_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"ne", %m, i32 %evl) ret %v } @@ -2744,9 +2592,7 @@ define @icmp_ugt_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"ugt", %m, i32 %evl) ret %v } @@ -2756,9 +2602,7 @@ define @icmp_ugt_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"ugt", %m, i32 %evl) ret %v } @@ -2832,9 +2676,7 @@ define @icmp_uge_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"uge", %m, i32 %evl) ret %v } @@ -2844,9 +2686,7 @@ define @icmp_uge_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"uge", %m, i32 %evl) ret %v } @@ -2918,9 +2758,7 @@ define @icmp_ult_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"ult", %m, i32 %evl) ret %v } @@ -2930,9 +2768,7 @@ define @icmp_ult_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"ult", %m, i32 %evl) ret %v } @@ -3004,9 +2840,7 @@ define @icmp_sgt_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"sgt", %m, i32 %evl) ret %v } @@ -3016,9 +2850,7 @@ define @icmp_sgt_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"sgt", %m, i32 %evl) ret %v } @@ -3092,9 +2924,7 @@ define @icmp_sge_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"sge", %m, i32 %evl) ret %v } @@ -3104,9 +2934,7 @@ define @icmp_sge_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"sge", %m, i32 %evl) ret %v } @@ -3178,9 +3006,7 @@ define @icmp_slt_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"slt", %m, i32 %evl) ret %v } @@ -3190,9 +3016,7 @@ define @icmp_slt_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"slt", %m, i32 %evl) ret %v } @@ -3266,9 +3090,7 @@ define @icmp_sle_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %va, %vb, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( %va, splat (i64 4), metadata !"sle", %m, i32 %evl) ret %v } @@ -3278,9 +3100,7 @@ define @icmp_sle_vi_swap_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv1i64( %vb, %va, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv1i64( splat (i64 4), %va, metadata !"sle", %m, i32 %evl) ret %v } @@ -3360,9 +3180,7 @@ define @icmp_eq_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"eq", %m, i32 %evl) ret %v } @@ -3373,9 +3191,7 @@ define @icmp_eq_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"eq", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"eq", %m, i32 %evl) ret %v } @@ -3453,9 +3269,7 @@ define @icmp_ne_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"ne", %m, i32 %evl) ret %v } @@ -3466,9 +3280,7 @@ define @icmp_ne_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"ne", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"ne", %m, i32 %evl) ret %v } @@ -3546,9 +3358,7 @@ define @icmp_ugt_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"ugt", %m, i32 %evl) ret %v } @@ -3559,9 +3369,7 @@ define @icmp_ugt_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"ugt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"ugt", %m, i32 %evl) ret %v } @@ -3641,9 +3449,7 @@ define @icmp_uge_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"uge", %m, i32 %evl) ret %v } @@ -3654,9 +3460,7 @@ define @icmp_uge_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"uge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"uge", %m, i32 %evl) ret %v } @@ -3734,9 +3538,7 @@ define @icmp_ult_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"ult", %m, i32 %evl) ret %v } @@ -3747,9 +3549,7 @@ define @icmp_ult_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"ult", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"ult", %m, i32 %evl) ret %v } @@ -3827,9 +3627,7 @@ define @icmp_sgt_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"sgt", %m, i32 %evl) ret %v } @@ -3840,9 +3638,7 @@ define @icmp_sgt_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"sgt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"sgt", %m, i32 %evl) ret %v } @@ -3922,9 +3718,7 @@ define @icmp_sge_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"sge", %m, i32 %evl) ret %v } @@ -3935,9 +3729,7 @@ define @icmp_sge_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"sge", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"sge", %m, i32 %evl) ret %v } @@ -4015,9 +3807,7 @@ define @icmp_slt_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"slt", %m, i32 %evl) ret %v } @@ -4028,9 +3818,7 @@ define @icmp_slt_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"slt", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"slt", %m, i32 %evl) ret %v } @@ -4110,9 +3898,7 @@ define @icmp_sle_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( %va, splat (i64 4), metadata !"sle", %m, i32 %evl) ret %v } @@ -4123,8 +3909,6 @@ define @icmp_sle_vi_swap_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.icmp.nxv8i64( %vb, %va, metadata !"sle", %m, i32 %evl) + %v = call @llvm.vp.icmp.nxv8i64( splat (i64 4), %va, metadata !"sle", %m, i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll index 5f35a4e50a952..90ffeff9689e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll @@ -78,9 +78,7 @@ define @icmp_eq_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %va, %splat + %vc = icmp eq %va, splat (i8 0) ret %vc } @@ -90,9 +88,7 @@ define @icmp_eq_vi_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %va, %splat + %vc = icmp eq %va, splat (i8 5) ret %vc } @@ -102,9 +98,7 @@ define @icmp_eq_iv_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %splat, %va + %vc = icmp eq splat (i8 5), %va ret %vc } @@ -148,9 +142,7 @@ define @icmp_ne_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ne %va, %splat + %vc = icmp ne %va, splat (i8 5) ret %vc } @@ -194,9 +186,7 @@ define @icmp_ugt_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ugt %va, %splat + %vc = icmp ugt %va, splat (i8 5) ret %vc } @@ -242,9 +232,7 @@ define @icmp_uge_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vmv.v.i v9, -16 ; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i8 -16) ret %vc } @@ -254,9 +242,7 @@ define @icmp_uge_vi_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 14 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i8 15) ret %vc } @@ -266,9 +252,7 @@ define @icmp_uge_iv_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %splat, %va + %vc = icmp uge splat (i8 15), %va ret %vc } @@ -278,9 +262,7 @@ define @icmp_uge_vi_nxv8i8_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i8 0) ret %vc } @@ -290,9 +272,7 @@ define @icmp_uge_vi_nxv8i8_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i8 1) ret %vc } @@ -302,9 +282,7 @@ define @icmp_uge_vi_nxv8i8_4( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i8 -15) ret %vc } @@ -314,9 +292,7 @@ define @icmp_uge_vi_nxv8i8_5( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i8 16) ret %vc } @@ -375,9 +351,7 @@ define @icmp_ult_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsltu.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i8 -16) ret %vc } @@ -387,9 +361,7 @@ define @icmp_ult_vi_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i8 -15) ret %vc } @@ -399,9 +371,7 @@ define @icmp_ult_iv_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %splat, %va + %vc = icmp ult splat (i8 -15), %va ret %vc } @@ -411,9 +381,7 @@ define @icmp_ult_vi_nxv8i8_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i8 0) ret %vc } @@ -423,9 +391,7 @@ define @icmp_ult_vi_nxv8i8_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i8 1) ret %vc } @@ -435,9 +401,7 @@ define @icmp_ult_vi_nxv8i8_4( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i8 16) ret %vc } @@ -496,9 +460,7 @@ define @icmp_ule_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ule %va, %splat + %vc = icmp ule %va, splat (i8 5) ret %vc } @@ -542,9 +504,7 @@ define @icmp_sgt_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sgt %va, %splat + %vc = icmp sgt %va, splat (i8 5) ret %vc } @@ -590,9 +550,7 @@ define @icmp_sge_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vmv.v.i v9, -16 ; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i8 -16) ret %vc } @@ -602,9 +560,7 @@ define @icmp_sge_vi_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i8 -15) ret %vc } @@ -614,9 +570,7 @@ define @icmp_sge_iv_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %splat, %va + %vc = icmp sge splat (i8 -15), %va ret %vc } @@ -626,9 +580,7 @@ define @icmp_sge_vi_nxv8i8_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i8 0) ret %vc } @@ -638,9 +590,7 @@ define @icmp_sge_vi_nxv8i8_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i8 16) ret %vc } @@ -685,9 +635,7 @@ define @icmp_slt_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i8 -16) ret %vc } @@ -697,9 +645,7 @@ define @icmp_slt_vi_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i8 -15) ret %vc } @@ -709,9 +655,7 @@ define @icmp_slt_iv_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %splat, %va + %vc = icmp slt splat (i8 -15), %va ret %vc } @@ -721,9 +665,7 @@ define @icmp_slt_vi_nxv8i8_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i8 0) ret %vc } @@ -733,9 +675,7 @@ define @icmp_slt_vi_nxv8i8_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i8 16) ret %vc } @@ -780,9 +720,7 @@ define @icmp_sle_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sle %va, %splat + %vc = icmp sle %va, splat (i8 5) ret %vc } @@ -826,9 +764,7 @@ define @icmp_eq_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %va, %splat + %vc = icmp eq %va, splat (i16 0) ret %vc } @@ -838,9 +774,7 @@ define @icmp_eq_vi_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i16 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %va, %splat + %vc = icmp eq %va, splat (i16 5) ret %vc } @@ -850,9 +784,7 @@ define @icmp_eq_iv_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i16 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %splat, %va + %vc = icmp eq splat (i16 5), %va ret %vc } @@ -896,9 +828,7 @@ define @icmp_ne_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i16 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ne %va, %splat + %vc = icmp ne %va, splat (i16 5) ret %vc } @@ -942,9 +872,7 @@ define @icmp_ugt_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i16 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ugt %va, %splat + %vc = icmp ugt %va, splat (i16 5) ret %vc } @@ -990,9 +918,7 @@ define @icmp_uge_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vmv.v.i v10, -16 ; CHECK-NEXT: vmsleu.vv v0, v10, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i16 -16) ret %vc } @@ -1002,9 +928,7 @@ define @icmp_uge_vi_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 14 ; CHECK-NEXT: ret - %head = insertelement poison, i16 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i16 15) ret %vc } @@ -1014,9 +938,7 @@ define @icmp_uge_iv_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %splat, %va + %vc = icmp uge splat (i16 15), %va ret %vc } @@ -1026,9 +948,7 @@ define @icmp_uge_vi_nxv8i16_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i16 0) ret %vc } @@ -1038,9 +958,7 @@ define @icmp_uge_vi_nxv8i16_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i16 1) ret %vc } @@ -1050,9 +968,7 @@ define @icmp_uge_vi_nxv8i16_4( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i16 -15) ret %vc } @@ -1062,9 +978,7 @@ define @icmp_uge_vi_nxv8i16_5( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i16 16) ret %vc } @@ -1109,9 +1023,7 @@ define @icmp_ult_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsltu.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i16 -16) ret %vc } @@ -1121,9 +1033,7 @@ define @icmp_ult_vi_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i16 -15) ret %vc } @@ -1133,9 +1043,7 @@ define @icmp_ult_iv_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %splat, %va + %vc = icmp ult splat (i16 -15), %va ret %vc } @@ -1145,9 +1053,7 @@ define @icmp_ult_vi_nxv8i16_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i16 0) ret %vc } @@ -1157,9 +1063,7 @@ define @icmp_ult_vi_nxv8i16_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i16 1) ret %vc } @@ -1169,9 +1073,7 @@ define @icmp_ult_vi_nxv8i16_4( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i16 16) ret %vc } @@ -1216,9 +1118,7 @@ define @icmp_ule_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i16 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ule %va, %splat + %vc = icmp ule %va, splat (i16 5) ret %vc } @@ -1262,9 +1162,7 @@ define @icmp_sgt_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i16 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sgt %va, %splat + %vc = icmp sgt %va, splat (i16 5) ret %vc } @@ -1310,9 +1208,7 @@ define @icmp_sge_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vmv.v.i v10, -16 ; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i16 -16) ret %vc } @@ -1322,9 +1218,7 @@ define @icmp_sge_vi_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i16 -15) ret %vc } @@ -1334,9 +1228,7 @@ define @icmp_sge_iv_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %splat, %va + %vc = icmp sge splat (i16 -15), %va ret %vc } @@ -1346,9 +1238,7 @@ define @icmp_sge_vi_nxv8i16_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i16 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i16 0) ret %vc } @@ -1358,9 +1248,7 @@ define @icmp_sge_vi_nxv8i16_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i16 16) ret %vc } @@ -1405,9 +1293,7 @@ define @icmp_slt_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i16 -16) ret %vc } @@ -1417,9 +1303,7 @@ define @icmp_slt_vi_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i16 -15) ret %vc } @@ -1429,9 +1313,7 @@ define @icmp_slt_iv_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %splat, %va + %vc = icmp slt splat (i16 -15), %va ret %vc } @@ -1441,9 +1323,7 @@ define @icmp_slt_vi_nxv8i16_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i16 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i16 0) ret %vc } @@ -1453,9 +1333,7 @@ define @icmp_slt_vi_nxv8i16_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i16 16) ret %vc } @@ -1500,9 +1378,7 @@ define @icmp_sle_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i16 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sle %va, %splat + %vc = icmp sle %va, splat (i16 5) ret %vc } @@ -1546,9 +1422,7 @@ define @icmp_eq_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %va, %splat + %vc = icmp eq %va, splat (i32 0) ret %vc } @@ -1558,9 +1432,7 @@ define @icmp_eq_vi_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i32 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %va, %splat + %vc = icmp eq %va, splat (i32 5) ret %vc } @@ -1570,9 +1442,7 @@ define @icmp_eq_iv_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i32 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %splat, %va + %vc = icmp eq splat (i32 5), %va ret %vc } @@ -1616,9 +1486,7 @@ define @icmp_ne_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i32 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ne %va, %splat + %vc = icmp ne %va, splat (i32 5) ret %vc } @@ -1662,9 +1530,7 @@ define @icmp_ugt_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i32 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ugt %va, %splat + %vc = icmp ugt %va, splat (i32 5) ret %vc } @@ -1710,9 +1576,7 @@ define @icmp_uge_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vmv.v.i v12, -16 ; CHECK-NEXT: vmsleu.vv v0, v12, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i32 -16) ret %vc } @@ -1722,9 +1586,7 @@ define @icmp_uge_vi_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 14 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i32 15) ret %vc } @@ -1734,9 +1596,7 @@ define @icmp_uge_iv_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %splat, %va + %vc = icmp uge splat (i32 15), %va ret %vc } @@ -1746,9 +1606,7 @@ define @icmp_uge_vi_nxv8i32_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i32 0) ret %vc } @@ -1758,9 +1616,7 @@ define @icmp_uge_vi_nxv8i32_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i32 1) ret %vc } @@ -1770,9 +1626,7 @@ define @icmp_uge_vi_nxv8i32_4( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i32 -15) ret %vc } @@ -1782,9 +1636,7 @@ define @icmp_uge_vi_nxv8i32_5( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i32 16) ret %vc } @@ -1829,9 +1681,7 @@ define @icmp_ult_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsltu.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i32 -16) ret %vc } @@ -1841,9 +1691,7 @@ define @icmp_ult_vi_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i32 -15) ret %vc } @@ -1853,9 +1701,7 @@ define @icmp_ult_iv_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %splat, %va + %vc = icmp ult splat (i32 -15), %va ret %vc } @@ -1865,9 +1711,7 @@ define @icmp_ult_vi_nxv8i32_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i32 0) ret %vc } @@ -1877,9 +1721,7 @@ define @icmp_ult_vi_nxv8i32_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i32 1) ret %vc } @@ -1889,9 +1731,7 @@ define @icmp_ult_vi_nxv8i32_4( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i32 16) ret %vc } @@ -1936,9 +1776,7 @@ define @icmp_ule_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i32 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ule %va, %splat + %vc = icmp ule %va, splat (i32 5) ret %vc } @@ -1982,9 +1820,7 @@ define @icmp_sgt_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i32 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sgt %va, %splat + %vc = icmp sgt %va, splat (i32 5) ret %vc } @@ -2030,9 +1866,7 @@ define @icmp_sge_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vmv.v.i v12, -16 ; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i32 -16) ret %vc } @@ -2042,9 +1876,7 @@ define @icmp_sge_vi_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i32 -15) ret %vc } @@ -2054,9 +1886,7 @@ define @icmp_sge_iv_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %splat, %va + %vc = icmp sge splat (i32 -15), %va ret %vc } @@ -2066,9 +1896,7 @@ define @icmp_sge_vi_nxv8i32_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i32 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i32 0) ret %vc } @@ -2078,9 +1906,7 @@ define @icmp_sge_vi_nxv8i32_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i32 16) ret %vc } @@ -2125,9 +1951,7 @@ define @icmp_slt_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i32 -16) ret %vc } @@ -2137,9 +1961,7 @@ define @icmp_slt_vi_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i32 -15) ret %vc } @@ -2149,9 +1971,7 @@ define @icmp_slt_iv_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %splat, %va + %vc = icmp slt splat (i32 -15), %va ret %vc } @@ -2161,9 +1981,7 @@ define @icmp_slt_vi_nxv8i32_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i32 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i32 0) ret %vc } @@ -2173,9 +1991,7 @@ define @icmp_slt_vi_nxv8i32_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i32 16) ret %vc } @@ -2220,9 +2036,7 @@ define @icmp_sle_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i32 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sle %va, %splat + %vc = icmp sle %va, splat (i32 5) ret %vc } @@ -2292,9 +2106,7 @@ define @icmp_eq_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %va, %splat + %vc = icmp eq %va, splat (i64 0) ret %vc } @@ -2304,9 +2116,7 @@ define @icmp_eq_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i64 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %va, %splat + %vc = icmp eq %va, splat (i64 5) ret %vc } @@ -2316,9 +2126,7 @@ define @icmp_eq_iv_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i64 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp eq %splat, %va + %vc = icmp eq splat (i64 5), %va ret %vc } @@ -2388,9 +2196,7 @@ define @icmp_ne_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i64 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ne %va, %splat + %vc = icmp ne %va, splat (i64 5) ret %vc } @@ -2460,9 +2266,7 @@ define @icmp_ugt_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i64 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ugt %va, %splat + %vc = icmp ugt %va, splat (i64 5) ret %vc } @@ -2534,9 +2338,7 @@ define @icmp_uge_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vmv.v.i v16, -16 ; CHECK-NEXT: vmsleu.vv v0, v16, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i64 -16) ret %vc } @@ -2546,9 +2348,7 @@ define @icmp_uge_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 14 ; CHECK-NEXT: ret - %head = insertelement poison, i64 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i64 15) ret %vc } @@ -2558,9 +2358,7 @@ define @icmp_uge_iv_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %splat, %va + %vc = icmp uge splat (i64 15), %va ret %vc } @@ -2570,9 +2368,7 @@ define @icmp_uge_vi_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i64 0) ret %vc } @@ -2582,9 +2378,7 @@ define @icmp_uge_vi_nxv8i64_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i64 1) ret %vc } @@ -2594,9 +2388,7 @@ define @icmp_uge_vi_nxv8i64_4( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i64 -15) ret %vc } @@ -2606,9 +2398,7 @@ define @icmp_uge_vi_nxv8i64_5( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp uge %va, %splat + %vc = icmp uge %va, splat (i64 16) ret %vc } @@ -2679,9 +2469,7 @@ define @icmp_ult_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsltu.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i64 -16) ret %vc } @@ -2691,9 +2479,7 @@ define @icmp_ult_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i64 -15) ret %vc } @@ -2703,9 +2489,7 @@ define @icmp_ult_iv_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgtu.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %splat, %va + %vc = icmp ult splat (i64 -15), %va ret %vc } @@ -2715,9 +2499,7 @@ define @icmp_ult_vi_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i64 0) ret %vc } @@ -2727,9 +2509,7 @@ define @icmp_ult_vi_nxv8i64_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i64 1) ret %vc } @@ -2739,9 +2519,7 @@ define @icmp_ult_vi_nxv8i64_4( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ult %va, %splat + %vc = icmp ult %va, splat (i64 16) ret %vc } @@ -2812,9 +2590,7 @@ define @icmp_ule_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsleu.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i64 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp ule %va, %splat + %vc = icmp ule %va, splat (i64 5) ret %vc } @@ -2884,9 +2660,7 @@ define @icmp_sgt_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i64 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sgt %va, %splat + %vc = icmp sgt %va, splat (i64 5) ret %vc } @@ -2958,9 +2732,7 @@ define @icmp_sge_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vmv.v.i v16, -16 ; CHECK-NEXT: vmsle.vv v0, v16, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i64 -16) ret %vc } @@ -2970,9 +2742,7 @@ define @icmp_sge_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i64 -15) ret %vc } @@ -2982,9 +2752,7 @@ define @icmp_sge_iv_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %splat, %va + %vc = icmp sge splat (i64 -15), %va ret %vc } @@ -2994,9 +2762,7 @@ define @icmp_sge_vi_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i64 0) ret %vc } @@ -3006,9 +2772,7 @@ define @icmp_sge_vi_nxv8i64_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sge %va, %splat + %vc = icmp sge %va, splat (i64 16) ret %vc } @@ -3079,9 +2843,7 @@ define @icmp_slt_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i64 -16) ret %vc } @@ -3091,9 +2853,7 @@ define @icmp_slt_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -16 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i64 -15) ret %vc } @@ -3103,9 +2863,7 @@ define @icmp_slt_iv_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsgt.vi v0, v8, -15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %splat, %va + %vc = icmp slt splat (i64 -15), %va ret %vc } @@ -3115,9 +2873,7 @@ define @icmp_slt_vi_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i64 0) ret %vc } @@ -3127,9 +2883,7 @@ define @icmp_slt_vi_nxv8i64_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp slt %va, %splat + %vc = icmp slt %va, splat (i64 16) ret %vc } @@ -3200,9 +2954,7 @@ define @icmp_sle_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i64 5, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = icmp sle %va, %splat + %vc = icmp sle %va, splat (i64 5) ret %vc } @@ -3216,11 +2968,7 @@ define @icmp_eq_ii_nxv8i8() { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %heada = insertelement poison, i8 5, i32 0 - %splata = shufflevector %heada, poison, zeroinitializer - %headb = insertelement poison, i8 2, i32 0 - %splatb = shufflevector %headb, poison, zeroinitializer - %vc = icmp eq %splata, %splatb + %vc = icmp eq splat (i8 5), splat (i8 2) ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 5d09c39dfd6e6..9046c861c3367 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -1214,8 +1214,6 @@ vector.ph: ; preds = %entry %3 = shl i64 %2, 2 %n.mod.vf = urem i64 1024, %3 %n.vec = sub nsw i64 1024, %n.mod.vf - %broadcast.splatinsert = insertelement poison, i32 2, i32 0 - %broadcast.splat = shufflevector %broadcast.splatinsert, poison, zeroinitializer %4 = call i64 @llvm.vscale.i64() %5 = shl i64 %4, 2 br label %vector.body @@ -1224,7 +1222,7 @@ vector.body: ; preds = %vector.body, %vecto %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] %6 = getelementptr inbounds i32, ptr %a, i64 %index %wide.load = load , ptr %6, align 4 - %7 = ashr %wide.load, %broadcast.splat + %7 = ashr %wide.load, splat (i32 2) store %7, ptr %6, align 4 %index.next = add nuw i64 %index, %5 %8 = icmp eq i64 %index.next, %n.vec diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll index 8f02ca6535810..eff8c26d4d061 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -85,10 +85,8 @@ define @mul_stepvector_nxv8i8() { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret entry: - %0 = insertelement poison, i8 3, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv8i8() - %3 = mul %2, %1 + %3 = mul %2, splat (i8 3) ret %3 } @@ -100,10 +98,8 @@ define @shl_stepvector_nxv8i8() { ; CHECK-NEXT: vsll.vi v8, v8, 2 ; CHECK-NEXT: ret entry: - %0 = insertelement poison, i8 2, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv8i8() - %3 = shl %2, %1 + %3 = shl %2, splat (i8 2) ret %3 } @@ -250,10 +246,8 @@ define @mul_stepvector_nxv16i16() { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret entry: - %0 = insertelement poison, i16 3, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv16i16() - %3 = mul %2, %1 + %3 = mul %2, splat (i16 3) ret %3 } @@ -265,10 +259,8 @@ define @shl_stepvector_nxv16i16() { ; CHECK-NEXT: vsll.vi v8, v8, 2 ; CHECK-NEXT: ret entry: - %0 = insertelement poison, i16 2, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv16i16() - %3 = shl %2, %1 + %3 = shl %2, splat (i16 2) ret %3 } @@ -379,10 +371,8 @@ define @mul_stepvector_nxv16i32() { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret entry: - %0 = insertelement poison, i32 3, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv16i32() - %3 = mul %2, %1 + %3 = mul %2, splat (i32 3) ret %3 } @@ -394,10 +384,8 @@ define @shl_stepvector_nxv16i32() { ; CHECK-NEXT: vsll.vi v8, v8, 2 ; CHECK-NEXT: ret entry: - %0 = insertelement poison, i32 2, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv16i32() - %3 = shl %2, %1 + %3 = shl %2, splat (i32 2) ret %3 } @@ -484,10 +472,8 @@ define @mul_stepvector_nxv8i64() { ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret entry: - %0 = insertelement poison, i64 3, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv8i64() - %3 = mul %2, %1 + %3 = mul %2, splat (i64 3) ret %3 } @@ -520,10 +506,8 @@ define @mul_bigimm_stepvector_nxv8i64() { ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: ret entry: - %0 = insertelement poison, i64 33333333333, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv8i64() - %3 = mul %2, %1 + %3 = mul %2, splat (i64 33333333333) ret %3 } @@ -535,10 +519,8 @@ define @shl_stepvector_nxv8i64() { ; CHECK-NEXT: vsll.vi v8, v8, 2 ; CHECK-NEXT: ret entry: - %0 = insertelement poison, i64 2, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv8i64() - %3 = shl %2, %1 + %3 = shl %2, splat (i64 2) ret %3 } @@ -637,10 +619,8 @@ define @mul_stepvector_nxv16i64() { ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: - %0 = insertelement poison, i64 3, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv16i64() - %3 = mul %2, %1 + %3 = mul %2, splat (i64 3) ret %3 } @@ -692,10 +672,8 @@ define @mul_bigimm_stepvector_nxv16i64() { ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: - %0 = insertelement poison, i64 33333333333, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv16i64() - %3 = mul %2, %1 + %3 = mul %2, splat (i64 33333333333) ret %3 } @@ -727,10 +705,8 @@ define @shl_stepvector_nxv16i64() { ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: - %0 = insertelement poison, i64 2, i32 0 - %1 = shufflevector %0, poison, zeroinitializer %2 = call @llvm.experimental.stepvector.nxv16i64() - %3 = shl %2, %1 + %3 = shl %2, splat (i64 2) ret %3 } diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll index 6b584cfb22a52..7047c482c1895 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll @@ -206,9 +206,7 @@ define @straightline_offset_shl(ptr %p) { ; CHECK-NEXT: ret [[X]] ; %step = call @llvm.experimental.stepvector.nxv1i64() - %splat.insert = insertelement poison, i64 3, i64 0 - %splat = shufflevector %splat.insert, poison, zeroinitializer - %offset = shl %step, %splat + %offset = shl %step, splat (i64 3) %ptrs = getelementptr i32, ptr %p, %offset %x = call @llvm.masked.gather.nxv1i64.nxv1p0( %ptrs, diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index a834630e7ebea..0e2105d5cba86 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -60,9 +60,7 @@ define @strided_vpload_nxv1i8_i64_allones_mask(ptr %ptr, i64 s ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-RV64-NEXT: vlse8.v v8, (a0), a1 ; CHECK-RV64-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv1i8.p0.i64(ptr %ptr, i64 %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv1i8.p0.i64(ptr %ptr, i64 %stride, splat (i1 true), i32 %evl) ret %load } @@ -84,9 +82,7 @@ define @strided_vpload_nxv1i8_allones_mask(ptr %ptr, i32 signe ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv1i8.p0.i32(ptr %ptr, i32 signext %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv1i8.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) ret %load } @@ -142,9 +138,7 @@ define @strided_vpload_nxv8i8_allones_mask(ptr %ptr, i32 signe ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlse8.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr %ptr, i32 signext %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv8i8.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) ret %load } @@ -178,9 +172,7 @@ define @strided_vpload_nxv2i16_allones_mask(ptr %ptr, i32 sig ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlse16.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv2i16.p0.i32(ptr %ptr, i32 signext %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv2i16.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) ret %load } @@ -270,9 +262,7 @@ define @strided_vpload_nxv4i32_allones_mask(ptr %ptr, i32 sig ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr %ptr, i32 signext %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv4i32.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) ret %load } @@ -316,9 +306,7 @@ define @strided_vpload_nxv1i64_allones_mask(ptr %ptr, i32 sig ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 signext %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv1i64.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) ret %load } @@ -388,9 +376,7 @@ define @strided_vpload_nxv2f16_allones_mask(ptr %ptr, i32 si ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlse16.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv2f16.p0.i32(ptr %ptr, i32 signext %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv2f16.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) ret %load } @@ -492,9 +478,7 @@ define @strided_vpload_nxv8f32_allones_mask(ptr %ptr, i32 s ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv8f32.p0.i32(ptr %ptr, i32 signext %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv8f32.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) ret %load } @@ -550,9 +534,7 @@ define @strided_vpload_nxv4f64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.experimental.vp.strided.load.nxv4f64.p0.i32(ptr %ptr, i32 signext %stride, %b, i32 %evl) + %load = call @llvm.experimental.vp.strided.load.nxv4f64.p0.i32(ptr %ptr, i32 signext %stride, splat (i1 true), i32 %evl) ret %load } @@ -585,9 +567,7 @@ define @strided_vpload_nxv3f64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; CHECK-NEXT: vlse64.v v8, (a0), a1 ; CHECK-NEXT: ret - %one = insertelement poison, i1 true, i32 0 - %allones = shufflevector %one, poison, zeroinitializer - %v = call @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(ptr %ptr, i32 %stride, %allones, i32 %evl) + %v = call @llvm.experimental.vp.strided.load.nxv3f64.p0.i32(ptr %ptr, i32 %stride, splat (i1 true), i32 %evl) ret %v } @@ -686,9 +666,7 @@ define @strided_load_nxv16f64_allones_mask(ptr %ptr, i64 ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1 ; CHECK-RV64-NEXT: ret - %one = insertelement poison, i1 true, i32 0 - %allones = shufflevector %one, poison, zeroinitializer - %v = call @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, %allones, i32 %evl) + %v = call @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll index cf6ce89b9b5a4..9378bb3d3ca61 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -460,9 +460,7 @@ define void @strided_vpstore_nxv1i8_allones_mask( %val, ptr %pt ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vsse8.v v8, (a0), a1 ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - call void @llvm.experimental.vp.strided.store.nxv1i8.p0.i32( %val, ptr %ptr, i32 %strided, %b, i32 %evl) + call void @llvm.experimental.vp.strided.store.nxv1i8.p0.i32( %val, ptr %ptr, i32 %strided, splat (i1 true), i32 %evl) ret void } @@ -483,9 +481,7 @@ define void @strided_vpstore_nxv3f32_allones_mask( %v, ptr % ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vsse32.v v8, (a0), a1 ; CHECK-NEXT: ret - %one = insertelement poison, i1 true, i32 0 - %allones = shufflevector %one, poison, zeroinitializer - call void @llvm.experimental.vp.strided.store.nxv3f32.p0.i32( %v, ptr %ptr, i32 %stride, %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.nxv3f32.p0.i32( %v, ptr %ptr, i32 %stride, splat (i1 true), i32 %evl) ret void } @@ -539,9 +535,7 @@ define void @strided_store_nxv16f64_allones_mask( %v, ptr ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1 ; CHECK-NEXT: ret - %one = insertelement poison, i1 true, i32 0 - %allones = shufflevector %one, poison, zeroinitializer - call void @llvm.experimental.vp.strided.store.nxv16f64.p0.i32( %v, ptr %ptr, i32 %stride, %allones, i32 %evl) + call void @llvm.experimental.vp.strided.store.nxv16f64.p0.i32( %v, ptr %ptr, i32 %stride, splat (i1 true), i32 %evl) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll index bfbbb4b4067f8..52c2cace185f7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll @@ -34,12 +34,8 @@ define @test_urem_vec_even_divisor_eq0( %x) ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: ret - %ins1 = insertelement poison, i16 6, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %urem = urem %x, %splat1 - %ins2 = insertelement poison, i16 0, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %cmp = icmp ne %urem, %splat2 + %urem = urem %x, splat (i16 6) + %cmp = icmp ne %urem, splat (i16 0) %ext = sext %cmp to ret %ext } @@ -70,12 +66,8 @@ define @test_urem_vec_odd_divisor_eq0( %x) ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: ret - %ins1 = insertelement poison, i16 5, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %urem = urem %x, %splat1 - %ins2 = insertelement poison, i16 0, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %cmp = icmp ne %urem, %splat2 + %urem = urem %x, splat (i16 5) + %cmp = icmp ne %urem, splat (i16 0) %ext = sext %cmp to ret %ext } @@ -116,12 +108,8 @@ define @test_urem_vec_even_divisor_eq1( %x) ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: ret - %ins1 = insertelement poison, i16 6, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %urem = urem %x, %splat1 - %ins2 = insertelement poison, i16 1, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %cmp = icmp ne %urem, %splat2 + %urem = urem %x, splat (i16 6) + %cmp = icmp ne %urem, splat (i16 1) %ext = sext %cmp to ret %ext } @@ -156,12 +144,8 @@ define @test_urem_vec_odd_divisor_eq1( %x) ; RV64-NEXT: vmv.v.i v8, 0 ; RV64-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64-NEXT: ret - %ins1 = insertelement poison, i16 5, i32 0 - %splat1 = shufflevector %ins1, poison, zeroinitializer - %urem = urem %x, %splat1 - %ins2 = insertelement poison, i16 1, i32 0 - %splat2 = shufflevector %ins2, poison, zeroinitializer - %cmp = icmp ne %urem, %splat2 + %urem = urem %x, splat (i16 5) + %cmp = icmp ne %urem, splat (i16 1) %ext = sext %cmp to ret %ext } diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll index 1cf57371455cf..5b14014a252f1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-sdnode.ll @@ -12,9 +12,7 @@ define @vaaddu_vv_nxv8i8_floor( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i16 1) %ret = trunc %div to ret %ret } @@ -31,9 +29,7 @@ define @vaaddu_vx_nxv8i8_floor( %x, i8 %y) { %ysplat = shufflevector %yhead, poison, zeroinitializer %yzv = zext %ysplat to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i16 1) %ret = trunc %div to ret %ret } @@ -48,9 +44,7 @@ define @vaaddu_vv_nxv8i8_floor_sexti16( %x, < %xzv = sext %x to %yzv = sext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i16 1) %ret = trunc %div to ret %ret } @@ -65,9 +59,7 @@ define @vaaddu_vv_nxv8i8_floor_zexti32( %x, < %xzv = zext %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i32 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i32 1) %ret = trunc %div to ret %ret } @@ -82,9 +74,7 @@ define @vaaddu_vv_nxv8i8_floor_lshr2( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 2, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i16 2) %ret = trunc %div to ret %ret } @@ -99,9 +89,7 @@ define @vaaddu_vv_nxv8i16_floor( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i32 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i32 1) %ret = trunc %div to ret %ret } @@ -118,9 +106,7 @@ define @vaaddu_vx_nxv8i16_floor( %x, i16 %y %ysplat = shufflevector %yhead, poison, zeroinitializer %yzv = zext %ysplat to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i32 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i32 1) %ret = trunc %div to ret %ret } @@ -135,9 +121,7 @@ define @vaaddu_vv_nxv8i32_floor( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i64 1, i64 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i64 1) %ret = trunc %div to ret %ret } @@ -154,9 +138,7 @@ define @vaaddu_vx_nxv8i32_floor( %x, i32 %y %ysplat = shufflevector %yhead, poison, zeroinitializer %yzv = zext %ysplat to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i64 1, i64 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i64 1) %ret = trunc %div to ret %ret } @@ -171,9 +153,7 @@ define @vaaddu_vv_nxv8i64_floor( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i128 1, i128 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i128 1) %ret = trunc %div to ret %ret } @@ -204,9 +184,7 @@ define @vaaddu_vx_nxv8i64_floor( %x, i64 %y %ysplat = shufflevector %yhead, poison, zeroinitializer %yzv = zext %ysplat to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i128 1, i128 0 - %splat = shufflevector %one, poison, zeroinitializer - %div = lshr %add, %splat + %div = lshr %add, splat (i128 1) %ret = trunc %div to ret %ret } @@ -221,10 +199,8 @@ define @vaaddu_vv_nxv8i8_ceil( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i16 1) + %div = lshr %add1, splat (i16 1) %ret = trunc %div to ret %ret } @@ -241,10 +217,8 @@ define @vaaddu_vx_nxv8i8_ceil( %x, i8 %y) { %ysplat = shufflevector %yhead, poison, zeroinitializer %yzv = zext %ysplat to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i16 1) + %div = lshr %add1, splat (i16 1) %ret = trunc %div to ret %ret } @@ -262,10 +236,8 @@ define @vaaddu_vv_nxv8i8_ceil_sexti16( %x, %x to %yzv = sext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i16 1) + %div = lshr %add1, splat (i16 1) %ret = trunc %div to ret %ret } @@ -280,10 +252,8 @@ define @vaaddu_vv_nxv8i8_ceil_zexti32( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i32 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i32 1) + %div = lshr %add1, splat (i32 1) %ret = trunc %div to ret %ret } @@ -301,10 +271,8 @@ define @vaaddu_vv_nxv8i8_ceil_lshr2( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 2, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i16 2) + %div = lshr %add1, splat (i16 2) %ret = trunc %div to ret %ret } @@ -322,12 +290,8 @@ define @vaaddu_vv_nxv8i8_ceil_add2( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i16 2, i32 0 - %splat1 = shufflevector %one, poison, zeroinitializer - %two = insertelement poison, i16 2, i32 0 - %splat2 = shufflevector %two, poison, zeroinitializer - %add2 = add nuw nsw %add, %splat2 - %div = lshr %add2, %splat1 + %add2 = add nuw nsw %add, splat (i16 2) + %div = lshr %add2, splat (i16 2) %ret = trunc %div to ret %ret } @@ -342,10 +306,8 @@ define @vaaddu_vv_nxv8i16_ceil( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i32 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i32 1) + %div = lshr %add1, splat (i32 1) %ret = trunc %div to ret %ret } @@ -362,10 +324,8 @@ define @vaaddu_vx_nxv8i16_ceil( %x, i16 %y) %ysplat = shufflevector %yhead, poison, zeroinitializer %yzv = zext %ysplat to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i32 1, i32 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i32 1) + %div = lshr %add1, splat (i32 1) %ret = trunc %div to ret %ret } @@ -380,10 +340,8 @@ define @vaaddu_vv_nxv8i32_ceil( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i64 1, i64 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i64 1) + %div = lshr %add1, splat (i64 1) %ret = trunc %div to ret %ret } @@ -400,10 +358,8 @@ define @vaaddu_vx_nxv8i32_ceil( %x, i32 %y) %ysplat = shufflevector %yhead, poison, zeroinitializer %yzv = zext %ysplat to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i64 1, i64 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i64 1) + %div = lshr %add1, splat (i64 1) %ret = trunc %div to ret %ret } @@ -418,10 +374,8 @@ define @vaaddu_vv_nxv8i64_ceil( %x, %x to %yzv = zext %y to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i128 1, i128 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i128 1) + %div = lshr %add1, splat (i128 1) %ret = trunc %div to ret %ret } @@ -452,10 +406,8 @@ define @vaaddu_vx_nxv8i64_ceil( %x, i64 %y) %ysplat = shufflevector %yhead, poison, zeroinitializer %yzv = zext %ysplat to %add = add nuw nsw %xzv, %yzv - %one = insertelement poison, i128 1, i128 0 - %splat = shufflevector %one, poison, zeroinitializer - %add1 = add nuw nsw %add, %splat - %div = lshr %add1, %splat + %add1 = add nuw nsw %add, splat (i128 1) + %div = lshr %add1, splat (i128 1) %ret = trunc %div to ret %ret } diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll index 264cf2e8df096..27fceb0112ae3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll @@ -20,9 +20,7 @@ define @vadd_vx_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 -1) ret %vc } @@ -32,9 +30,7 @@ define @vadd_vx_nxv1i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i8 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 2) ret %vc } @@ -45,11 +41,7 @@ define @vadd_ii_nxv1i8_1() { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 5 ; CHECK-NEXT: ret - %heada = insertelement poison, i8 2, i32 0 - %splata = shufflevector %heada, poison, zeroinitializer - %headb = insertelement poison, i8 3, i32 0 - %splatb = shufflevector %headb, poison, zeroinitializer - %vc = add %splata, %splatb + %vc = add splat (i8 2), splat (i8 3) ret %vc } @@ -71,9 +63,7 @@ define @vadd_vx_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 -1) ret %vc } @@ -83,9 +73,7 @@ define @vadd_vx_nxv2i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i8 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 2) ret %vc } @@ -107,9 +95,7 @@ define @vadd_vx_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 -1) ret %vc } @@ -119,9 +105,7 @@ define @vadd_vx_nxv4i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i8 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 2) ret %vc } @@ -143,9 +127,7 @@ define @vadd_vx_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 -1) ret %vc } @@ -155,9 +137,7 @@ define @vadd_vx_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i8 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 2) ret %vc } @@ -179,9 +159,7 @@ define @vadd_vx_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 -1) ret %vc } @@ -191,9 +169,7 @@ define @vadd_vx_nxv16i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i8 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 2) ret %vc } @@ -215,9 +191,7 @@ define @vadd_vx_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 -1) ret %vc } @@ -227,9 +201,7 @@ define @vadd_vx_nxv32i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i8 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 2) ret %vc } @@ -251,9 +223,7 @@ define @vadd_vx_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 -1) ret %vc } @@ -263,9 +233,7 @@ define @vadd_vx_nxv64i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i8 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i8 2) ret %vc } @@ -287,9 +255,7 @@ define @vadd_vx_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 -1) ret %vc } @@ -299,9 +265,7 @@ define @vadd_vx_nxv1i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i16 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 2) ret %vc } @@ -323,9 +287,7 @@ define @vadd_vx_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 -1) ret %vc } @@ -335,9 +297,7 @@ define @vadd_vx_nxv2i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i16 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 2) ret %vc } @@ -359,9 +319,7 @@ define @vadd_vx_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 -1) ret %vc } @@ -371,9 +329,7 @@ define @vadd_vx_nxv4i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i16 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 2) ret %vc } @@ -395,9 +351,7 @@ define @vadd_vx_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 -1) ret %vc } @@ -407,9 +361,7 @@ define @vadd_vx_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i16 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 2) ret %vc } @@ -431,9 +383,7 @@ define @vadd_vx_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 -1) ret %vc } @@ -443,9 +393,7 @@ define @vadd_vx_nxv16i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i16 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 2) ret %vc } @@ -467,9 +415,7 @@ define @vadd_vx_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 -1) ret %vc } @@ -479,9 +425,7 @@ define @vadd_vx_nxv32i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i16 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i16 2) ret %vc } @@ -503,9 +447,7 @@ define @vadd_vx_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 -1) ret %vc } @@ -515,9 +457,7 @@ define @vadd_vx_nxv1i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i32 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 2) ret %vc } @@ -539,9 +479,7 @@ define @vadd_vx_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 -1) ret %vc } @@ -551,9 +489,7 @@ define @vadd_vx_nxv2i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i32 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 2) ret %vc } @@ -575,9 +511,7 @@ define @vadd_vx_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 -1) ret %vc } @@ -587,9 +521,7 @@ define @vadd_vx_nxv4i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i32 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 2) ret %vc } @@ -611,9 +543,7 @@ define @vadd_vx_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 -1) ret %vc } @@ -623,9 +553,7 @@ define @vadd_vx_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i32 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 2) ret %vc } @@ -647,9 +575,7 @@ define @vadd_vx_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 -1) ret %vc } @@ -659,9 +585,7 @@ define @vadd_vx_nxv16i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i32 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i32 2) ret %vc } @@ -696,9 +620,7 @@ define @vadd_vx_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i64 -1) ret %vc } @@ -708,9 +630,7 @@ define @vadd_vx_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i64 2) ret %vc } @@ -745,9 +665,7 @@ define @vadd_vx_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i64 -1) ret %vc } @@ -757,9 +675,7 @@ define @vadd_vx_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i64 2) ret %vc } @@ -794,9 +710,7 @@ define @vadd_vx_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i64 -1) ret %vc } @@ -806,9 +720,7 @@ define @vadd_vx_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i64 2) ret %vc } @@ -843,9 +755,7 @@ define @vadd_vx_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i64 -1) ret %vc } @@ -855,9 +765,7 @@ define @vadd_vx_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = add %va, %splat + %vc = add %va, splat (i64 2) ret %vc } @@ -922,9 +830,7 @@ define @vadd_vi_mask_nxv8i32( %va, poison, i32 7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %splat, zeroinitializer + %vs = select %mask, splat (i32 7), zeroinitializer %vc = add %va, %vs ret %vc } @@ -937,9 +843,7 @@ define @vadd_vv_mask_negative0_nxv8i32( %va ; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 1, i32 0 - %one = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %one + %vs = select %mask, %vb, splat (i32 1) %vc = add %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 8b65c1a70206e..4b5e737d22eb8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -36,9 +36,7 @@ define @vadd_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -74,9 +72,7 @@ define @vadd_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -86,9 +82,7 @@ define @vadd_vi_nxv1i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -98,11 +92,7 @@ define @vadd_vi_nxv1i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -124,9 +114,7 @@ define @vadd_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -150,9 +138,7 @@ define @vadd_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -162,9 +148,7 @@ define @vadd_vi_nxv2i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -174,11 +158,7 @@ define @vadd_vi_nxv2i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -200,9 +180,7 @@ define @vadd_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -226,9 +204,7 @@ define @vadd_vx_nxv3i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -238,9 +214,7 @@ define @vadd_vi_nxv3i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv3i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -250,11 +224,7 @@ define @vadd_vi_nxv3i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv3i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -276,9 +246,7 @@ define @vadd_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -302,9 +270,7 @@ define @vadd_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -314,9 +280,7 @@ define @vadd_vi_nxv4i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -326,11 +290,7 @@ define @vadd_vi_nxv4i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -352,9 +312,7 @@ define @vadd_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -378,9 +336,7 @@ define @vadd_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -390,9 +346,7 @@ define @vadd_vi_nxv8i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -402,11 +356,7 @@ define @vadd_vi_nxv8i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -428,9 +378,7 @@ define @vadd_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -454,9 +402,7 @@ define @vadd_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -466,9 +412,7 @@ define @vadd_vi_nxv16i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -478,11 +422,7 @@ define @vadd_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -504,9 +444,7 @@ define @vadd_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -530,9 +468,7 @@ define @vadd_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -542,9 +478,7 @@ define @vadd_vi_nxv32i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -554,11 +488,7 @@ define @vadd_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -580,9 +510,7 @@ define @vadd_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -606,9 +534,7 @@ define @vadd_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -618,9 +544,7 @@ define @vadd_vi_nxv64i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv64i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -630,11 +554,7 @@ define @vadd_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv64i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -664,9 +584,7 @@ define @vadd_vi_nxv128i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -688,11 +606,7 @@ define @vadd_vi_nxv128i8_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv128i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -714,9 +628,7 @@ define @vadd_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -740,9 +652,7 @@ define @vadd_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -752,9 +662,7 @@ define @vadd_vi_nxv1i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -764,11 +672,7 @@ define @vadd_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -790,9 +694,7 @@ define @vadd_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -816,9 +718,7 @@ define @vadd_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -828,9 +728,7 @@ define @vadd_vi_nxv2i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -840,11 +738,7 @@ define @vadd_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -866,9 +760,7 @@ define @vadd_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -892,9 +784,7 @@ define @vadd_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -904,9 +794,7 @@ define @vadd_vi_nxv4i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -916,11 +804,7 @@ define @vadd_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -942,9 +826,7 @@ define @vadd_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -968,9 +850,7 @@ define @vadd_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -980,9 +860,7 @@ define @vadd_vi_nxv8i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -992,11 +870,7 @@ define @vadd_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1018,9 +892,7 @@ define @vadd_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1044,9 +916,7 @@ define @vadd_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1056,9 +926,7 @@ define @vadd_vi_nxv16i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1068,11 +936,7 @@ define @vadd_vi_nxv16i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1094,9 +958,7 @@ define @vadd_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1120,9 +982,7 @@ define @vadd_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1132,9 +992,7 @@ define @vadd_vi_nxv32i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1144,11 +1002,7 @@ define @vadd_vi_nxv32i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1170,9 +1024,7 @@ define @vadd_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1196,9 +1048,7 @@ define @vadd_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1208,9 +1058,7 @@ define @vadd_vi_nxv1i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1220,11 +1068,7 @@ define @vadd_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1246,9 +1090,7 @@ define @vadd_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1272,9 +1114,7 @@ define @vadd_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1284,9 +1124,7 @@ define @vadd_vi_nxv2i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1296,11 +1134,7 @@ define @vadd_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1322,9 +1156,7 @@ define @vadd_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1348,9 +1180,7 @@ define @vadd_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1360,9 +1190,7 @@ define @vadd_vi_nxv4i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1372,11 +1200,7 @@ define @vadd_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1398,9 +1222,7 @@ define @vadd_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1424,9 +1246,7 @@ define @vadd_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1436,9 +1256,7 @@ define @vadd_vi_nxv8i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1448,11 +1266,7 @@ define @vadd_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1474,9 +1288,7 @@ define @vadd_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1500,9 +1312,7 @@ define @vadd_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1512,9 +1322,7 @@ define @vadd_vi_nxv16i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1524,11 +1332,7 @@ define @vadd_vi_nxv16i32_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv16i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1559,9 +1363,7 @@ define @vadd_vi_nxv32i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1583,11 +1385,7 @@ define @vadd_vi_nxv32i32_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv32i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1621,11 +1419,9 @@ define @vadd_vi_nxv32i32_evl_nx8( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer %evl = call i32 @llvm.vscale.i32() %evl0 = mul i32 %evl, 8 - %v = call @llvm.vp.add.nxv32i32( %va, %vb, %m, i32 %evl0) + %v = call @llvm.vp.add.nxv32i32( %va, splat (i32 -1), %m, i32 %evl0) ret %v } @@ -1659,11 +1455,9 @@ define @vadd_vi_nxv32i32_evl_nx16( %va, < ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer %evl = call i32 @llvm.vscale.i32() %evl0 = mul i32 %evl, 16 - %v = call @llvm.vp.add.nxv32i32( %va, %vb, %m, i32 %evl0) + %v = call @llvm.vp.add.nxv32i32( %va, splat (i32 -1), %m, i32 %evl0) ret %v } @@ -1685,9 +1479,7 @@ define @vadd_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1739,9 +1531,7 @@ define @vadd_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1751,9 +1541,7 @@ define @vadd_vi_nxv1i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1763,11 +1551,7 @@ define @vadd_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv1i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1789,9 +1573,7 @@ define @vadd_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1843,9 +1625,7 @@ define @vadd_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1855,9 +1635,7 @@ define @vadd_vi_nxv2i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1867,11 +1645,7 @@ define @vadd_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv2i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1893,9 +1667,7 @@ define @vadd_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1947,9 +1719,7 @@ define @vadd_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1959,9 +1729,7 @@ define @vadd_vi_nxv4i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1971,11 +1739,7 @@ define @vadd_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv4i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1997,9 +1761,7 @@ define @vadd_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -2051,9 +1813,7 @@ define @vadd_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -2063,9 +1823,7 @@ define @vadd_vi_nxv8i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2075,10 +1833,6 @@ define @vadd_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.add.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.add.nxv8i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll index f10954bb02dfe..40d0d9aa9d1d6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll @@ -30,9 +30,7 @@ define @vand_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 -10) ret %vc } @@ -42,9 +40,7 @@ define @vand_vi_nxv1i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 8) ret %vc } @@ -55,9 +51,7 @@ define @vand_vi_nxv1i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 16) ret %vc } @@ -89,9 +83,7 @@ define @vand_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 -10) ret %vc } @@ -101,9 +93,7 @@ define @vand_vi_nxv2i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 8) ret %vc } @@ -114,9 +104,7 @@ define @vand_vi_nxv2i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 16) ret %vc } @@ -148,9 +136,7 @@ define @vand_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 -10) ret %vc } @@ -160,9 +146,7 @@ define @vand_vi_nxv4i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 8) ret %vc } @@ -173,9 +157,7 @@ define @vand_vi_nxv4i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 16) ret %vc } @@ -207,9 +189,7 @@ define @vand_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 -10) ret %vc } @@ -219,9 +199,7 @@ define @vand_vi_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 8) ret %vc } @@ -232,9 +210,7 @@ define @vand_vi_nxv8i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 16) ret %vc } @@ -266,9 +242,7 @@ define @vand_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 -10) ret %vc } @@ -278,9 +252,7 @@ define @vand_vi_nxv16i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 8) ret %vc } @@ -291,9 +263,7 @@ define @vand_vi_nxv16i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 16) ret %vc } @@ -325,9 +295,7 @@ define @vand_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 -10) ret %vc } @@ -337,9 +305,7 @@ define @vand_vi_nxv32i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 8) ret %vc } @@ -350,9 +316,7 @@ define @vand_vi_nxv32i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 16) ret %vc } @@ -384,9 +348,7 @@ define @vand_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 -10) ret %vc } @@ -396,9 +358,7 @@ define @vand_vi_nxv64i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 8) ret %vc } @@ -409,9 +369,7 @@ define @vand_vi_nxv64i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i8 16) ret %vc } @@ -443,9 +401,7 @@ define @vand_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 -10) ret %vc } @@ -455,9 +411,7 @@ define @vand_vi_nxv1i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 8) ret %vc } @@ -468,9 +422,7 @@ define @vand_vi_nxv1i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 16) ret %vc } @@ -502,9 +454,7 @@ define @vand_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 -10) ret %vc } @@ -514,9 +464,7 @@ define @vand_vi_nxv2i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 8) ret %vc } @@ -527,9 +475,7 @@ define @vand_vi_nxv2i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 16) ret %vc } @@ -561,9 +507,7 @@ define @vand_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 -10) ret %vc } @@ -573,9 +517,7 @@ define @vand_vi_nxv4i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 8) ret %vc } @@ -586,9 +528,7 @@ define @vand_vi_nxv4i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 16) ret %vc } @@ -620,9 +560,7 @@ define @vand_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 -10) ret %vc } @@ -632,9 +570,7 @@ define @vand_vi_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 8) ret %vc } @@ -645,9 +581,7 @@ define @vand_vi_nxv8i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 16) ret %vc } @@ -679,9 +613,7 @@ define @vand_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 -10) ret %vc } @@ -691,9 +623,7 @@ define @vand_vi_nxv16i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 8) ret %vc } @@ -704,9 +634,7 @@ define @vand_vi_nxv16i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 16) ret %vc } @@ -738,9 +666,7 @@ define @vand_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 -10) ret %vc } @@ -750,9 +676,7 @@ define @vand_vi_nxv32i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 8) ret %vc } @@ -763,9 +687,7 @@ define @vand_vi_nxv32i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i16 16) ret %vc } @@ -797,9 +719,7 @@ define @vand_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 -10) ret %vc } @@ -809,9 +729,7 @@ define @vand_vi_nxv1i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 8) ret %vc } @@ -822,9 +740,7 @@ define @vand_vi_nxv1i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 16) ret %vc } @@ -856,9 +772,7 @@ define @vand_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 -10) ret %vc } @@ -868,9 +782,7 @@ define @vand_vi_nxv2i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 8) ret %vc } @@ -881,9 +793,7 @@ define @vand_vi_nxv2i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 16) ret %vc } @@ -915,9 +825,7 @@ define @vand_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 -10) ret %vc } @@ -927,9 +835,7 @@ define @vand_vi_nxv4i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 8) ret %vc } @@ -940,9 +846,7 @@ define @vand_vi_nxv4i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 16) ret %vc } @@ -974,9 +878,7 @@ define @vand_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 -10) ret %vc } @@ -986,9 +888,7 @@ define @vand_vi_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 8) ret %vc } @@ -999,9 +899,7 @@ define @vand_vi_nxv8i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 16) ret %vc } @@ -1033,9 +931,7 @@ define @vand_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 -10) ret %vc } @@ -1045,9 +941,7 @@ define @vand_vi_nxv16i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 8) ret %vc } @@ -1058,9 +952,7 @@ define @vand_vi_nxv16i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i32 16) ret %vc } @@ -1105,9 +997,7 @@ define @vand_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 -10) ret %vc } @@ -1117,9 +1007,7 @@ define @vand_vi_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 8) ret %vc } @@ -1130,9 +1018,7 @@ define @vand_vi_nxv1i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 16) ret %vc } @@ -1177,9 +1063,7 @@ define @vand_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 -10) ret %vc } @@ -1189,9 +1073,7 @@ define @vand_vi_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 8) ret %vc } @@ -1202,9 +1084,7 @@ define @vand_vi_nxv2i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 16) ret %vc } @@ -1249,9 +1129,7 @@ define @vand_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 -10) ret %vc } @@ -1261,9 +1139,7 @@ define @vand_vi_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 8) ret %vc } @@ -1274,9 +1150,7 @@ define @vand_vi_nxv4i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 16) ret %vc } @@ -1321,9 +1195,7 @@ define @vand_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, -10 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -10, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 -10) ret %vc } @@ -1333,9 +1205,7 @@ define @vand_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 8) ret %vc } @@ -1346,9 +1216,7 @@ define @vand_vi_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = and %va, %splat + %vc = and %va, splat (i64 16) ret %vc } @@ -1389,9 +1257,7 @@ define @vand_vv_mask_nxv8i32( %va, poison, i32 -1, i32 0 - %allones = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %allones + %vs = select %mask, %vb, splat (i32 -1) %vc = and %va, %vs ret %vc } @@ -1402,11 +1268,9 @@ define @vand_vx_mask_nxv8i32( %va, i32 sign ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vand.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -1, i32 0 - %allones = shufflevector %head1, poison, zeroinitializer %head2 = insertelement poison, i32 %b, i32 0 %splat = shufflevector %head2, poison, zeroinitializer - %vs = select %mask, %splat, %allones + %vs = select %mask, %splat, splat (i32 -1) %vc = and %va, %vs ret %vc } @@ -1417,11 +1281,7 @@ define @vand_vi_mask_nxv8i32( %va, poison, i32 -1, i32 0 - %allones = shufflevector %head1, poison, zeroinitializer - %head2 = insertelement poison, i32 7, i32 0 - %splat = shufflevector %head2, poison, zeroinitializer - %vs = select %mask, %splat, %allones + %vs = select %mask, splat (i32 7), splat (i32 -1) %vc = and %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll index 032c3a014eca9..7b4a68d5867f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll @@ -36,9 +36,7 @@ define @vand_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -62,9 +60,7 @@ define @vand_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -74,9 +70,7 @@ define @vand_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -86,11 +80,7 @@ define @vand_vi_nxv1i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -112,9 +102,7 @@ define @vand_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -138,9 +126,7 @@ define @vand_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -150,9 +136,7 @@ define @vand_vi_nxv2i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -162,11 +146,7 @@ define @vand_vi_nxv2i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -188,9 +168,7 @@ define @vand_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -214,9 +192,7 @@ define @vand_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -226,9 +202,7 @@ define @vand_vi_nxv4i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -238,11 +212,7 @@ define @vand_vi_nxv4i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -264,9 +234,7 @@ define @vand_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -290,9 +258,7 @@ define @vand_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -302,9 +268,7 @@ define @vand_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -314,11 +278,7 @@ define @vand_vi_nxv8i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -340,9 +300,7 @@ define @vand_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -366,9 +324,7 @@ define @vand_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -378,9 +334,7 @@ define @vand_vi_nxv16i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -390,11 +344,7 @@ define @vand_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -416,9 +366,7 @@ define @vand_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -442,9 +390,7 @@ define @vand_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -454,9 +400,7 @@ define @vand_vi_nxv32i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv32i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -466,11 +410,7 @@ define @vand_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv32i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -492,9 +432,7 @@ define @vand_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -518,9 +456,7 @@ define @vand_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -530,9 +466,7 @@ define @vand_vi_nxv64i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv64i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -542,11 +476,7 @@ define @vand_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv64i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -568,9 +498,7 @@ define @vand_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -594,9 +522,7 @@ define @vand_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -606,9 +532,7 @@ define @vand_vi_nxv1i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -618,11 +542,7 @@ define @vand_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -644,9 +564,7 @@ define @vand_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -670,9 +588,7 @@ define @vand_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -682,9 +598,7 @@ define @vand_vi_nxv2i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -694,11 +608,7 @@ define @vand_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -720,9 +630,7 @@ define @vand_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -746,9 +654,7 @@ define @vand_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -758,9 +664,7 @@ define @vand_vi_nxv4i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -770,11 +674,7 @@ define @vand_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -796,9 +696,7 @@ define @vand_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -822,9 +720,7 @@ define @vand_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -834,9 +730,7 @@ define @vand_vi_nxv8i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -846,11 +740,7 @@ define @vand_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -872,9 +762,7 @@ define @vand_vv_nxv14i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv14i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv14i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -898,9 +786,7 @@ define @vand_vx_nxv14i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv14i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv14i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -910,9 +796,7 @@ define @vand_vi_nxv14i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv14i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv14i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -922,11 +806,7 @@ define @vand_vi_nxv14i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv14i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv14i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -948,9 +828,7 @@ define @vand_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -974,9 +852,7 @@ define @vand_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -986,9 +862,7 @@ define @vand_vi_nxv16i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -998,11 +872,7 @@ define @vand_vi_nxv16i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -1024,9 +894,7 @@ define @vand_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1062,9 +930,7 @@ define @vand_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1074,9 +940,7 @@ define @vand_vi_nxv32i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv32i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -1086,11 +950,7 @@ define @vand_vi_nxv32i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv32i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -1112,9 +972,7 @@ define @vand_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1138,9 +996,7 @@ define @vand_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1150,9 +1006,7 @@ define @vand_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1162,11 +1016,7 @@ define @vand_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1188,9 +1038,7 @@ define @vand_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1214,9 +1062,7 @@ define @vand_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1226,9 +1072,7 @@ define @vand_vi_nxv2i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1238,11 +1082,7 @@ define @vand_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1264,9 +1104,7 @@ define @vand_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1290,9 +1128,7 @@ define @vand_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1302,9 +1138,7 @@ define @vand_vi_nxv4i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1314,11 +1148,7 @@ define @vand_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1340,9 +1170,7 @@ define @vand_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1366,9 +1194,7 @@ define @vand_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1378,9 +1204,7 @@ define @vand_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1390,11 +1214,7 @@ define @vand_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1416,9 +1236,7 @@ define @vand_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1442,9 +1260,7 @@ define @vand_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1454,9 +1270,7 @@ define @vand_vi_nxv16i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1466,11 +1280,7 @@ define @vand_vi_nxv16i32_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv16i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1492,9 +1302,7 @@ define @vand_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1546,9 +1354,7 @@ define @vand_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1558,9 +1364,7 @@ define @vand_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -1570,11 +1374,7 @@ define @vand_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv1i64( %va, splat (i64 4), splat (i1 true), i32 %evl) ret %v } @@ -1596,9 +1396,7 @@ define @vand_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1650,9 +1448,7 @@ define @vand_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1662,9 +1458,7 @@ define @vand_vi_nxv2i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -1674,11 +1468,7 @@ define @vand_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv2i64( %va, splat (i64 4), splat (i1 true), i32 %evl) ret %v } @@ -1700,9 +1490,7 @@ define @vand_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1754,9 +1542,7 @@ define @vand_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1766,9 +1552,7 @@ define @vand_vi_nxv4i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -1778,11 +1562,7 @@ define @vand_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv4i64( %va, splat (i64 4), splat (i1 true), i32 %evl) ret %v } @@ -1804,9 +1584,7 @@ define @vand_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1858,9 +1636,7 @@ define @vand_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1870,9 +1646,7 @@ define @vand_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -1882,10 +1656,6 @@ define @vand_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.and.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.and.nxv8i64( %va, splat (i64 4), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll index f9c53c93472a6..f25a3f937f1b4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vandn-sdnode.ll @@ -17,9 +17,7 @@ define @vandn_vv_nxv1i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %a, %y ret %b } @@ -37,9 +35,7 @@ define @vandn_vv_swapped_nxv1i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %y, %a ret %b } @@ -97,9 +93,7 @@ define @vandn_vv_nxv2i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %a, %y ret %b } @@ -117,9 +111,7 @@ define @vandn_vv_swapped_nxv2i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %y, %a ret %b } @@ -177,9 +169,7 @@ define @vandn_vv_nxv4i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %a, %y ret %b } @@ -197,9 +187,7 @@ define @vandn_vv_swapped_nxv4i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %y, %a ret %b } @@ -257,9 +245,7 @@ define @vandn_vv_nxv8i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %a, %y ret %b } @@ -277,9 +263,7 @@ define @vandn_vv_swapped_nxv8i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %y, %a ret %b } @@ -337,9 +321,7 @@ define @vandn_vv_nxv16i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %a, %y ret %b } @@ -357,9 +339,7 @@ define @vandn_vv_swapped_nxv16i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %y, %a ret %b } @@ -417,9 +397,7 @@ define @vandn_vv_nxv32i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %a, %y ret %b } @@ -437,9 +415,7 @@ define @vandn_vv_swapped_nxv32i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %y, %a ret %b } @@ -497,9 +473,7 @@ define @vandn_vv_nxv64i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %a, %y ret %b } @@ -517,9 +491,7 @@ define @vandn_vv_swapped_nxv64i8( %x, poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i8 -1) %b = and %y, %a ret %b } @@ -577,9 +549,7 @@ define @vandn_vv_nxv1i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %a, %y ret %b } @@ -597,9 +567,7 @@ define @vandn_vv_swapped_nxv1i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %y, %a ret %b } @@ -657,9 +625,7 @@ define @vandn_vv_nxv2i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %a, %y ret %b } @@ -677,9 +643,7 @@ define @vandn_vv_swapped_nxv2i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %y, %a ret %b } @@ -737,9 +701,7 @@ define @vandn_vv_nxv4i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %a, %y ret %b } @@ -757,9 +719,7 @@ define @vandn_vv_swapped_nxv4i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %y, %a ret %b } @@ -817,9 +777,7 @@ define @vandn_vv_nxv8i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %a, %y ret %b } @@ -837,9 +795,7 @@ define @vandn_vv_swapped_nxv8i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %y, %a ret %b } @@ -897,9 +853,7 @@ define @vandn_vv_nxv16i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %a, %y ret %b } @@ -917,9 +871,7 @@ define @vandn_vv_swapped_nxv16i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %y, %a ret %b } @@ -977,9 +929,7 @@ define @vandn_vv_nxv32i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %a, %y ret %b } @@ -997,9 +947,7 @@ define @vandn_vv_swapped_nxv32i16( %x, poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i16 -1) %b = and %y, %a ret %b } @@ -1057,9 +1005,7 @@ define @vandn_vv_nxv1i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %a, %y ret %b } @@ -1077,9 +1023,7 @@ define @vandn_vv_swapped_nxv1i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %y, %a ret %b } @@ -1137,9 +1081,7 @@ define @vandn_vv_nxv2i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %a, %y ret %b } @@ -1157,9 +1099,7 @@ define @vandn_vv_swapped_nxv2i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %y, %a ret %b } @@ -1217,9 +1157,7 @@ define @vandn_vv_nxv4i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %a, %y ret %b } @@ -1237,9 +1175,7 @@ define @vandn_vv_swapped_nxv4i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %y, %a ret %b } @@ -1297,9 +1233,7 @@ define @vandn_vv_nxv8i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %a, %y ret %b } @@ -1317,9 +1251,7 @@ define @vandn_vv_swapped_nxv8i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %y, %a ret %b } @@ -1377,9 +1309,7 @@ define @vandn_vv_nxv16i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %a, %y ret %b } @@ -1397,9 +1327,7 @@ define @vandn_vv_swapped_nxv16i32( %x, poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i32 -1) %b = and %y, %a ret %b } @@ -1457,9 +1385,7 @@ define @vandn_vv_nxv1i64( %x, poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i64 -1) %b = and %a, %y ret %b } @@ -1477,9 +1403,7 @@ define @vandn_vv_swapped_nxv1i64( %x, poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i64 -1) %b = and %y, %a ret %b } @@ -1597,9 +1521,7 @@ define @vandn_vv_nxv2i64( %x, poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i64 -1) %b = and %a, %y ret %b } @@ -1617,9 +1539,7 @@ define @vandn_vv_swapped_nxv2i64( %x, poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i64 -1) %b = and %y, %a ret %b } @@ -1737,9 +1657,7 @@ define @vandn_vv_nxv4i64( %x, poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i64 -1) %b = and %a, %y ret %b } @@ -1757,9 +1675,7 @@ define @vandn_vv_swapped_nxv4i64( %x, poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i64 -1) %b = and %y, %a ret %b } @@ -1877,9 +1793,7 @@ define @vandn_vv_nxv8i64( %x, poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i64 -1) %b = and %a, %y ret %b } @@ -1897,9 +1811,7 @@ define @vandn_vv_swapped_nxv8i64( %x, poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %a = xor %x, %splat + %a = xor %x, splat (i64 -1) %b = and %y, %a ret %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll index 4de71b6ce06fb..939a45e15c103 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -48,9 +48,7 @@ define @vfsgnj_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -94,9 +92,7 @@ define @vfsgnj_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -140,9 +136,7 @@ define @vfsgnj_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -186,9 +180,7 @@ define @vfsgnj_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -232,9 +224,7 @@ define @vfsgnj_vv_nxv16f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -311,8 +301,6 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -320,6 +308,8 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -348,9 +338,7 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -372,9 +360,7 @@ define @vfsgnj_vv_nxv1f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -396,9 +382,7 @@ define @vfsgnj_vv_nxv2f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -420,9 +404,7 @@ define @vfsgnj_vv_nxv4f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -444,9 +426,7 @@ define @vfsgnj_vv_nxv8f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -468,9 +448,7 @@ define @vfsgnj_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv16f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv16f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -492,9 +470,7 @@ define @vfsgnj_vv_nxv1f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -516,9 +492,7 @@ define @vfsgnj_vv_nxv2f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -540,9 +514,7 @@ define @vfsgnj_vv_nxv4f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -564,8 +536,6 @@ define @vfsgnj_vv_nxv8f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.copysign.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.copysign.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll index 0028ac88cc4fe..ef9b2104b2d2d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll @@ -37,9 +37,7 @@ define @vdiv_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsrl.vi v9, v8, 7 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i8 -7) ret %vc } @@ -48,9 +46,7 @@ define @vdiv_vi_nxv1i8_1( %va) { ; CHECK-LABEL: vdiv_vi_nxv1i8_1: ; CHECK: # %bb.0: ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i8 1) ret %vc } @@ -61,9 +57,7 @@ define @vdiv_iv_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %splat, %va + %vc = sdiv splat (i8 0), %va ret %vc } @@ -100,9 +94,7 @@ define @vdiv_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsrl.vi v9, v8, 7 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i8 -7) ret %vc } @@ -139,9 +131,7 @@ define @vdiv_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsrl.vi v9, v8, 7 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i8 -7) ret %vc } @@ -178,9 +168,7 @@ define @vdiv_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsrl.vi v9, v8, 7 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i8 -7) ret %vc } @@ -217,9 +205,7 @@ define @vdiv_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsrl.vi v10, v8, 7 ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i8 -7) ret %vc } @@ -256,9 +242,7 @@ define @vdiv_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsrl.vi v12, v8, 7 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i8 -7) ret %vc } @@ -295,9 +279,7 @@ define @vdiv_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsrl.vi v16, v8, 7 ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i8 -7) ret %vc } @@ -334,9 +316,7 @@ define @vdiv_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsrl.vi v9, v8, 15 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i16 -7) ret %vc } @@ -373,9 +353,7 @@ define @vdiv_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsrl.vi v9, v8, 15 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i16 -7) ret %vc } @@ -412,9 +390,7 @@ define @vdiv_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsrl.vi v9, v8, 15 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i16 -7) ret %vc } @@ -451,9 +427,7 @@ define @vdiv_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsrl.vi v10, v8, 15 ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i16 -7) ret %vc } @@ -490,9 +464,7 @@ define @vdiv_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsrl.vi v12, v8, 15 ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i16 -7) ret %vc } @@ -529,9 +501,7 @@ define @vdiv_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsrl.vi v16, v8, 15 ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i16 -7) ret %vc } @@ -581,9 +551,7 @@ define @vdiv_vi_nxv1i32_0( %va) { ; RV64-NEXT: vsrl.vi v9, v8, 31 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -633,9 +601,7 @@ define @vdiv_vi_nxv2i32_0( %va) { ; RV64-NEXT: vsrl.vi v9, v8, 31 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -685,9 +651,7 @@ define @vdiv_vi_nxv4i32_0( %va) { ; RV64-NEXT: vsrl.vi v10, v8, 31 ; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -737,9 +701,7 @@ define @vdiv_vi_nxv8i32_0( %va) { ; RV64-NEXT: vsrl.vi v12, v8, 31 ; RV64-NEXT: vadd.vv v8, v8, v12 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -789,9 +751,7 @@ define @vdiv_vi_nxv16i32_0( %va) { ; RV64-NEXT: vsrl.vi v16, v8, 31 ; RV64-NEXT: vadd.vv v8, v8, v16 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i32 -7) ret %vc } @@ -870,9 +830,7 @@ define @vdiv_vi_nxv1i64_0( %va) { ; RV64-V-NEXT: vsra.vi v8, v8, 1 ; RV64-V-NEXT: vadd.vv v8, v8, v9 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i64 -7) ret %vc } @@ -951,9 +909,7 @@ define @vdiv_vi_nxv2i64_0( %va) { ; RV64-V-NEXT: vsra.vi v8, v8, 1 ; RV64-V-NEXT: vadd.vv v8, v8, v10 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i64 -7) ret %vc } @@ -1032,9 +988,7 @@ define @vdiv_vi_nxv4i64_0( %va) { ; RV64-V-NEXT: vsra.vi v8, v8, 1 ; RV64-V-NEXT: vadd.vv v8, v8, v12 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i64 -7) ret %vc } @@ -1113,9 +1067,7 @@ define @vdiv_vi_nxv8i64_0( %va) { ; RV64-V-NEXT: vsra.vi v8, v8, 1 ; RV64-V-NEXT: vadd.vv v8, v8, v16 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sdiv %va, %splat + %vc = sdiv %va, splat (i64 -7) ret %vc } @@ -1127,9 +1079,7 @@ define @vdiv_vv_mask_nxv8i32( %va, poison, i32 1, i32 0 - %one = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %one + %vs = select %mask, %vb, splat (i32 1) %vc = sdiv %va, %vs ret %vc } @@ -1142,11 +1092,9 @@ define @vdiv_vx_mask_nxv8i32( %va, i32 sign ; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 ; CHECK-NEXT: vdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 1, i32 0 - %one = shufflevector %head1, poison, zeroinitializer %head2 = insertelement poison, i32 %b, i32 0 %splat = shufflevector %head2, poison, zeroinitializer - %vs = select %mask, %splat, %one + %vs = select %mask, %splat, splat (i32 1) %vc = sdiv %va, %vs ret %vc } @@ -1159,11 +1107,7 @@ define @vdiv_vi_mask_nxv8i32( %va, poison, i32 1, i32 0 - %one = shufflevector %head1, poison, zeroinitializer - %head2 = insertelement poison, i32 7, i32 0 - %splat = shufflevector %head2, poison, zeroinitializer - %vs = select %mask, %splat, %one + %vs = select %mask, splat (i32 7), splat (i32 1) %vc = sdiv %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll index fd951d5cfbffd..26089706cf99e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll @@ -39,9 +39,7 @@ define @vdiv_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -65,9 +63,7 @@ define @vdiv_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -89,9 +85,7 @@ define @vdiv_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -115,9 +109,7 @@ define @vdiv_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -151,9 +143,7 @@ define @vdiv_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -177,9 +167,7 @@ define @vdiv_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -201,9 +189,7 @@ define @vdiv_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -227,9 +213,7 @@ define @vdiv_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -251,9 +235,7 @@ define @vdiv_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -277,9 +259,7 @@ define @vdiv_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -301,9 +281,7 @@ define @vdiv_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -327,9 +305,7 @@ define @vdiv_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -351,9 +327,7 @@ define @vdiv_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -377,9 +351,7 @@ define @vdiv_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -401,9 +373,7 @@ define @vdiv_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -427,9 +397,7 @@ define @vdiv_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -451,9 +419,7 @@ define @vdiv_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -477,9 +443,7 @@ define @vdiv_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -501,9 +465,7 @@ define @vdiv_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -527,9 +489,7 @@ define @vdiv_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -551,9 +511,7 @@ define @vdiv_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -577,9 +535,7 @@ define @vdiv_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -601,9 +557,7 @@ define @vdiv_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -627,9 +581,7 @@ define @vdiv_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -651,9 +603,7 @@ define @vdiv_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -677,9 +627,7 @@ define @vdiv_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -701,9 +649,7 @@ define @vdiv_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -727,9 +673,7 @@ define @vdiv_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -751,9 +695,7 @@ define @vdiv_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -777,9 +719,7 @@ define @vdiv_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -801,9 +741,7 @@ define @vdiv_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -827,9 +765,7 @@ define @vdiv_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -851,9 +787,7 @@ define @vdiv_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -877,9 +811,7 @@ define @vdiv_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -901,9 +833,7 @@ define @vdiv_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vdiv.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -927,9 +857,7 @@ define @vdiv_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -951,9 +879,7 @@ define @vdiv_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1005,9 +931,7 @@ define @vdiv_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1029,9 +953,7 @@ define @vdiv_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1083,9 +1005,7 @@ define @vdiv_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1107,9 +1027,7 @@ define @vdiv_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1161,9 +1079,7 @@ define @vdiv_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1185,9 +1101,7 @@ define @vdiv_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1239,8 +1153,6 @@ define @vdiv_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sdiv.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sdiv.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll index c505cb3d1bbd4..4f2fb937ca73f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll @@ -34,9 +34,7 @@ define @vdivu_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i8 -7) ret %vc } @@ -45,9 +43,7 @@ define @vdivu_vi_nxv1i8_1( %va) { ; CHECK-LABEL: vdivu_vi_nxv1i8_1: ; CHECK: # %bb.0: ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i8 1) ret %vc } @@ -58,9 +54,7 @@ define @vdivu_iv_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %splat, %va + %vc = udiv splat (i8 0), %va ret %vc } @@ -94,9 +88,7 @@ define @vdivu_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i8 -7) ret %vc } @@ -130,9 +122,7 @@ define @vdivu_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i8 -7) ret %vc } @@ -166,9 +156,7 @@ define @vdivu_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i8 -7) ret %vc } @@ -202,9 +190,7 @@ define @vdivu_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i8 -7) ret %vc } @@ -238,9 +224,7 @@ define @vdivu_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i8 -7) ret %vc } @@ -274,9 +258,7 @@ define @vdivu_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i8 -7) ret %vc } @@ -311,9 +293,7 @@ define @vdivu_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 13 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i16 -7) ret %vc } @@ -348,9 +328,7 @@ define @vdivu_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 13 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i16 -7) ret %vc } @@ -385,9 +363,7 @@ define @vdivu_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 13 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i16 -7) ret %vc } @@ -422,9 +398,7 @@ define @vdivu_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 13 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i16 -7) ret %vc } @@ -459,9 +433,7 @@ define @vdivu_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 13 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i16 -7) ret %vc } @@ -496,9 +468,7 @@ define @vdivu_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 13 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i16 -7) ret %vc } @@ -533,9 +503,7 @@ define @vdivu_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 29 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i32 -7) ret %vc } @@ -570,9 +538,7 @@ define @vdivu_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 29 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i32 -7) ret %vc } @@ -607,9 +573,7 @@ define @vdivu_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 29 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i32 -7) ret %vc } @@ -644,9 +608,7 @@ define @vdivu_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 29 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i32 -7) ret %vc } @@ -681,9 +643,7 @@ define @vdivu_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: vsrl.vi v8, v8, 29 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i32 -7) ret %vc } @@ -757,9 +717,7 @@ define @vdivu_vi_nxv1i64_0( %va) { ; RV64-V-NEXT: li a0, 61 ; RV64-V-NEXT: vsrl.vx v8, v8, a0 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i64 -7) ret %vc } @@ -769,9 +727,7 @@ define @vdivu_vi_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i64 2) ret %vc } @@ -783,9 +739,7 @@ define @vdivu_vi_nxv1i64_2( %va, poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %splat, %vb + %vc = shl splat (i64 16), %vb %vd = udiv %va, %vc ret %vd } @@ -860,9 +814,7 @@ define @vdivu_vi_nxv2i64_0( %va) { ; RV64-V-NEXT: li a0, 61 ; RV64-V-NEXT: vsrl.vx v8, v8, a0 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i64 -7) ret %vc } @@ -872,9 +824,7 @@ define @vdivu_vi_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i64 2) ret %vc } @@ -886,9 +836,7 @@ define @vdivu_vi_nxv2i64_2( %va, poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %splat, %vb + %vc = shl splat (i64 16), %vb %vd = udiv %va, %vc ret %vd } @@ -963,9 +911,7 @@ define @vdivu_vi_nxv4i64_0( %va) { ; RV64-V-NEXT: li a0, 61 ; RV64-V-NEXT: vsrl.vx v8, v8, a0 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i64 -7) ret %vc } @@ -975,9 +921,7 @@ define @vdivu_vi_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i64 2) ret %vc } @@ -989,9 +933,7 @@ define @vdivu_vi_nxv4i64_2( %va, poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %splat, %vb + %vc = shl splat (i64 16), %vb %vd = udiv %va, %vc ret %vd } @@ -1066,9 +1008,7 @@ define @vdivu_vi_nxv8i64_0( %va) { ; RV64-V-NEXT: li a0, 61 ; RV64-V-NEXT: vsrl.vx v8, v8, a0 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i64 -7) ret %vc } @@ -1078,9 +1018,7 @@ define @vdivu_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = udiv %va, %splat + %vc = udiv %va, splat (i64 2) ret %vc } @@ -1092,9 +1030,7 @@ define @vdivu_vi_nxv8i64_2( %va, poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %splat, %vb + %vc = shl splat (i64 16), %vb %vd = udiv %va, %vc ret %vd } @@ -1107,9 +1043,7 @@ define @vdivu_vv_mask_nxv8i32( %va, poison, i32 1, i32 0 - %one = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %one + %vs = select %mask, %vb, splat (i32 1) %vc = udiv %va, %vs ret %vc } @@ -1122,11 +1056,9 @@ define @vdivu_vx_mask_nxv8i32( %va, i32 sig ; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 ; CHECK-NEXT: vdivu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 1, i32 0 - %one = shufflevector %head1, poison, zeroinitializer %head2 = insertelement poison, i32 %b, i32 0 %splat = shufflevector %head2, poison, zeroinitializer - %vs = select %mask, %splat, %one + %vs = select %mask, %splat, splat (i32 1) %vc = udiv %va, %vs ret %vc } @@ -1143,11 +1075,7 @@ define @vdivu_vi_mask_nxv8i32( %va, poison, i32 1, i32 0 - %one = shufflevector %head1, poison, zeroinitializer - %head2 = insertelement poison, i32 7, i32 0 - %splat = shufflevector %head2, poison, zeroinitializer - %vs = select %mask, %splat, %one + %vs = select %mask, splat (i32 7), splat (i32 1) %vc = udiv %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll index d6ebf822fd266..f41b885a66eaa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll @@ -41,9 +41,7 @@ define @vdivu_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -67,9 +65,7 @@ define @vdivu_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -91,9 +87,7 @@ define @vdivu_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -117,9 +111,7 @@ define @vdivu_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -153,9 +145,7 @@ define @vdivu_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -179,9 +169,7 @@ define @vdivu_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -203,9 +191,7 @@ define @vdivu_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -229,9 +215,7 @@ define @vdivu_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -253,9 +237,7 @@ define @vdivu_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -279,9 +261,7 @@ define @vdivu_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -303,9 +283,7 @@ define @vdivu_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -329,9 +307,7 @@ define @vdivu_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -353,9 +329,7 @@ define @vdivu_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -379,9 +353,7 @@ define @vdivu_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -403,9 +375,7 @@ define @vdivu_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -429,9 +399,7 @@ define @vdivu_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -453,9 +421,7 @@ define @vdivu_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -479,9 +445,7 @@ define @vdivu_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -503,9 +467,7 @@ define @vdivu_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -529,9 +491,7 @@ define @vdivu_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -553,9 +513,7 @@ define @vdivu_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -579,9 +537,7 @@ define @vdivu_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -603,9 +559,7 @@ define @vdivu_vv_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -629,9 +583,7 @@ define @vdivu_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -653,9 +605,7 @@ define @vdivu_vv_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -679,9 +629,7 @@ define @vdivu_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -703,9 +651,7 @@ define @vdivu_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -729,9 +675,7 @@ define @vdivu_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -753,9 +697,7 @@ define @vdivu_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -779,9 +721,7 @@ define @vdivu_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -803,9 +743,7 @@ define @vdivu_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -829,9 +767,7 @@ define @vdivu_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -853,9 +789,7 @@ define @vdivu_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -879,9 +813,7 @@ define @vdivu_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -903,9 +835,7 @@ define @vdivu_vv_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -929,9 +859,7 @@ define @vdivu_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -953,9 +881,7 @@ define @vdivu_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1007,9 +933,7 @@ define @vdivu_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1031,9 +955,7 @@ define @vdivu_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1085,9 +1007,7 @@ define @vdivu_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1109,9 +1029,7 @@ define @vdivu_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1163,9 +1081,7 @@ define @vdivu_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1187,9 +1103,7 @@ define @vdivu_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1241,8 +1155,6 @@ define @vdivu_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.udiv.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.udiv.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll index 22f0d8bba2e62..df2bc523cd7a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -46,9 +46,7 @@ define @vfabs_vv_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -90,9 +88,7 @@ define @vfabs_vv_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -134,9 +130,7 @@ define @vfabs_vv_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -178,9 +172,7 @@ define @vfabs_vv_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -222,9 +214,7 @@ define @vfabs_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -279,8 +269,6 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ; ZVFHMIN-LABEL: vfabs_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -288,6 +276,8 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -306,9 +296,7 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -330,9 +318,7 @@ define @vfabs_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -354,9 +340,7 @@ define @vfabs_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -378,9 +362,7 @@ define @vfabs_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -402,9 +384,7 @@ define @vfabs_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -426,9 +406,7 @@ define @vfabs_vv_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -450,9 +428,7 @@ define @vfabs_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -474,9 +450,7 @@ define @vfabs_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -498,9 +472,7 @@ define @vfabs_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -522,9 +494,7 @@ define @vfabs_vv_nxv7f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -546,9 +516,7 @@ define @vfabs_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -598,8 +566,6 @@ define @vfabs_vv_nxv16f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fabs.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fabs.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll index 0651438429fde..4065b69e781ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll @@ -562,9 +562,7 @@ define @vfadd_vv_mask_nxv8f32( %va, poison, float 0.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %splat + %vs = select %mask, %vb, splat (float 0.0) %vc = fadd fast %va, %vs ret %vc } @@ -575,11 +573,9 @@ define @vfadd_vf_mask_nxv8f32( %va, flo ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfadd.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret - %head0 = insertelement poison, float 0.0, i32 0 - %splat0 = shufflevector %head0, poison, zeroinitializer %head1 = insertelement poison, float %b, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer - %vs = select %mask, %splat1, %splat0 + %vs = select %mask, %splat1, splat (float 0.0) %vc = fadd fast %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index 4168f5cd50791..c69a7bc5cece4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -48,9 +48,7 @@ define @vfadd_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv1f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv1f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -131,9 +129,7 @@ define @vfadd_vf_nxv1f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -160,9 +156,7 @@ define @vfadd_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv1f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv1f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -206,9 +200,7 @@ define @vfadd_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv2f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv2f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -262,9 +254,7 @@ define @vfadd_vf_nxv2f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -308,9 +298,7 @@ define @vfadd_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv4f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv4f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -364,9 +352,7 @@ define @vfadd_vf_nxv4f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -410,9 +396,7 @@ define @vfadd_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv8f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv8f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -466,9 +450,7 @@ define @vfadd_vf_nxv8f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -512,9 +494,7 @@ define @vfadd_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv16f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv16f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -568,9 +548,7 @@ define @vfadd_vf_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -647,8 +625,6 @@ define @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -656,6 +632,8 @@ define @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -684,9 +662,7 @@ define @vfadd_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv32f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv32f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -752,8 +728,6 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -761,6 +735,8 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -782,9 +758,7 @@ define @vfadd_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -806,9 +780,7 @@ define @vfadd_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv1f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv1f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -832,9 +804,7 @@ define @vfadd_vf_nxv1f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -856,9 +826,7 @@ define @vfadd_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv2f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv2f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -882,9 +850,7 @@ define @vfadd_vf_nxv2f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -906,9 +872,7 @@ define @vfadd_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv4f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv4f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -932,9 +896,7 @@ define @vfadd_vf_nxv4f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -956,9 +918,7 @@ define @vfadd_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv8f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv8f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -982,9 +942,7 @@ define @vfadd_vf_nxv8f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1006,9 +964,7 @@ define @vfadd_vv_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv16f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv16f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1032,9 +988,7 @@ define @vfadd_vf_nxv16f32_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv16f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv16f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1056,9 +1010,7 @@ define @vfadd_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv1f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv1f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1082,9 +1034,7 @@ define @vfadd_vf_nxv1f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1106,9 +1056,7 @@ define @vfadd_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv2f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv2f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1132,9 +1080,7 @@ define @vfadd_vf_nxv2f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1156,9 +1102,7 @@ define @vfadd_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv4f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv4f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1182,9 +1126,7 @@ define @vfadd_vf_nxv4f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1218,9 +1160,7 @@ define @vfadd_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv8f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv8f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1244,8 +1184,6 @@ define @vfadd_vf_nxv8f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fadd.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fadd.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-vp.ll index 12bfc235c0c6d..be2d576597da2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfclass-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-vp.ll @@ -28,9 +28,7 @@ define @isnan_nxv2f16_unmasked( %x, i32 zer ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %1 = call @llvm.vp.is.fpclass.nxv2f16( %x, i32 3, %m, i32 %evl) ; nan + %1 = call @llvm.vp.is.fpclass.nxv2f16( %x, i32 3, splat (i1 true), i32 %evl) ; nan ret %1 } @@ -58,9 +56,7 @@ define @isnan_nxv2f32_unmasked( %x, i32 ze ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %1 = call @llvm.vp.is.fpclass.nxv2f32( %x, i32 639, %m, i32 %evl) + %1 = call @llvm.vp.is.fpclass.nxv2f32( %x, i32 639, splat (i1 true), i32 %evl) ret %1 } @@ -88,9 +84,7 @@ define @isnan_nxv4f32_unmasked( %x, i32 ze ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %1 = call @llvm.vp.is.fpclass.nxv4f32( %x, i32 3, %m, i32 %evl) ; nan + %1 = call @llvm.vp.is.fpclass.nxv4f32( %x, i32 3, splat (i1 true), i32 %evl) ; nan ret %1 } @@ -116,9 +110,7 @@ define @isnan_nxv8f32_unmasked( %x, i32 ze ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmseq.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %1 = call @llvm.vp.is.fpclass.nxv8f32( %x, i32 2, %m, i32 %evl) + %1 = call @llvm.vp.is.fpclass.nxv8f32( %x, i32 2, splat (i1 true), i32 %evl) ret %1 } @@ -144,9 +136,7 @@ define @isnan_nxv16f32_unmasked( %x, i32 ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %1 = call @llvm.vp.is.fpclass.nxv16f32( %x, i32 1, %m, i32 %evl) + %1 = call @llvm.vp.is.fpclass.nxv16f32( %x, i32 1, splat (i1 true), i32 %evl) ret %1 } @@ -174,9 +164,7 @@ define @isnormal_nxv2f64_unmasked( %x, i3 ; CHECK-NEXT: vand.vx v8, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %1 = call @llvm.vp.is.fpclass.nxv2f64( %x, i32 516, %m, i32 %evl) ; 0x204 = "inf" + %1 = call @llvm.vp.is.fpclass.nxv2f64( %x, i32 516, splat (i1 true), i32 %evl) ; 0x204 = "inf" ret %1 } @@ -202,9 +190,7 @@ define @isposinf_nxv4f64_unmasked( %x, i3 ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vmseq.vx v0, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %1 = call @llvm.vp.is.fpclass.nxv4f64( %x, i32 512, %m, i32 %evl) ; 0x200 = "+inf" + %1 = call @llvm.vp.is.fpclass.nxv4f64( %x, i32 512, splat (i1 true), i32 %evl) ; 0x200 = "+inf" ret %1 } @@ -228,9 +214,7 @@ define @isneginf_nxv8f64_unmasked( %x, i3 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vi v0, v8, 1 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %1 = call @llvm.vp.is.fpclass.nxv8f64( %x, i32 4, %m, i32 %evl) ; "-inf" + %1 = call @llvm.vp.is.fpclass.nxv8f64( %x, i32 4, splat (i1 true), i32 %evl) ; "-inf" ret %1 } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll index b46b6743505be..7d1d9ec1680c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll @@ -564,9 +564,7 @@ define @vfdiv_vv_mask_nxv8f32( %va, poison, float 0.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %splat + %vs = select %mask, %vb, splat (float 0.0) %vc = fdiv %va, %vs ret %vc } @@ -579,11 +577,9 @@ define @vfdiv_vf_mask_nxv8f32( %va, flo ; CHECK-NEXT: vfmerge.vfm v12, v12, fa0, v0 ; CHECK-NEXT: vfdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head0 = insertelement poison, float 0.0, i32 0 - %splat0 = shufflevector %head0, poison, zeroinitializer %head1 = insertelement poison, float %b, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer - %vs = select %mask, %splat1, %splat0 + %vs = select %mask, %splat1, splat (float 0.0) %vc = fdiv %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index 396e99bc5e4f5..3ad17e85570a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -48,9 +48,7 @@ define @vfdiv_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -104,9 +102,7 @@ define @vfdiv_vf_nxv1f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -150,9 +146,7 @@ define @vfdiv_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -206,9 +200,7 @@ define @vfdiv_vf_nxv2f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -252,9 +244,7 @@ define @vfdiv_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -308,9 +298,7 @@ define @vfdiv_vf_nxv4f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -354,9 +342,7 @@ define @vfdiv_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -410,9 +396,7 @@ define @vfdiv_vf_nxv8f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -456,9 +440,7 @@ define @vfdiv_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv16f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv16f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -512,9 +494,7 @@ define @vfdiv_vf_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -591,8 +571,6 @@ define @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -600,6 +578,8 @@ define @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -628,9 +608,7 @@ define @vfdiv_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv32f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv32f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -696,8 +674,6 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -705,6 +681,8 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -726,9 +704,7 @@ define @vfdiv_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -750,9 +726,7 @@ define @vfdiv_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -776,9 +750,7 @@ define @vfdiv_vf_nxv1f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -800,9 +772,7 @@ define @vfdiv_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -826,9 +796,7 @@ define @vfdiv_vf_nxv2f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -850,9 +818,7 @@ define @vfdiv_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -876,9 +842,7 @@ define @vfdiv_vf_nxv4f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -900,9 +864,7 @@ define @vfdiv_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +888,7 @@ define @vfdiv_vf_nxv8f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -950,9 +910,7 @@ define @vfdiv_vv_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv16f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv16f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -976,9 +934,7 @@ define @vfdiv_vf_nxv16f32_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv16f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv16f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1000,9 +956,7 @@ define @vfdiv_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1026,9 +980,7 @@ define @vfdiv_vf_nxv1f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1050,9 +1002,7 @@ define @vfdiv_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1076,9 +1026,7 @@ define @vfdiv_vf_nxv2f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1100,9 +1048,7 @@ define @vfdiv_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1126,9 +1072,7 @@ define @vfdiv_vf_nxv4f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1162,9 +1106,7 @@ define @vfdiv_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1188,8 +1130,6 @@ define @vfdiv_vf_nxv8f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll index 1bc0ed4e7513f..a0f269b59bfe5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp-combine.ll @@ -36,12 +36,8 @@ define @test2( %a, ; CHECK-NEXT: vfadd.vf v9, v9, fa5, v0.t ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret - %elt.head1 = insertelement poison, double 2.0, i32 0 - %c1 = shufflevector %elt.head1, poison, zeroinitializer - %t = call @llvm.vp.fmul.nxv1f64( %a, %c1, %m, i32 %evl) - %elt.head2 = insertelement poison, double 4.0, i32 0 - %c2 = shufflevector %elt.head2, poison, zeroinitializer - %v = call fast @llvm.vp.fma.nxv1f64( %a, %c2, %t, %m, i32 %evl) + %t = call @llvm.vp.fmul.nxv1f64( %a, splat (double 2.0), %m, i32 %evl) + %v = call fast @llvm.vp.fma.nxv1f64( %a, splat (double 4.0), %t, %m, i32 %evl) ret %v } @@ -60,11 +56,7 @@ define @test3( %a, poison, double 2.0, i32 0 - %c1 = shufflevector %elt.head1, poison, zeroinitializer - %t = call @llvm.vp.fmul.nxv1f64( %a, %c1, %m, i32 %evl) - %elt.head2 = insertelement poison, double 4.0, i32 0 - %c2 = shufflevector %elt.head2, poison, zeroinitializer - %v = call fast @llvm.vp.fma.nxv1f64( %t, %c2, %b, %m, i32 %evl) + %t = call @llvm.vp.fmul.nxv1f64( %a, splat (double 2.0), %m, i32 %evl) + %v = call fast @llvm.vp.fma.nxv1f64( %t, splat (double 4.0), %b, %m, i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index 9ab907bfcca67..a41c262116136 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -23,9 +23,7 @@ define @vfma_vv_nxv1f16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -61,9 +59,7 @@ define @vfma_vf_nxv1f16_unmasked( %va, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -75,9 +71,7 @@ define @vfma_vf_nxv1f16_unmasked_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -100,9 +94,7 @@ define @vfma_vv_nxv2f16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -138,9 +130,7 @@ define @vfma_vf_nxv2f16_unmasked( %va, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -152,9 +142,7 @@ define @vfma_vf_nxv2f16_unmasked_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -177,9 +165,7 @@ define @vfma_vv_nxv4f16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -215,9 +201,7 @@ define @vfma_vf_nxv4f16_unmasked( %va, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -229,9 +213,7 @@ define @vfma_vf_nxv4f16_unmasked_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -254,9 +236,7 @@ define @vfma_vv_nxv8f16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -292,9 +272,7 @@ define @vfma_vf_nxv8f16_unmasked( %va, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -306,9 +284,7 @@ define @vfma_vf_nxv8f16_unmasked_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -331,9 +307,7 @@ define @vfma_vv_nxv16f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -369,9 +343,7 @@ define @vfma_vf_nxv16f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -383,9 +355,7 @@ define @vfma_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -410,9 +380,7 @@ define @vfma_vv_nxv32f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -448,9 +416,7 @@ define @vfma_vf_nxv32f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -462,9 +428,7 @@ define @vfma_vf_nxv32f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -487,9 +451,7 @@ define @vfma_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -525,9 +487,7 @@ define @vfma_vf_nxv1f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -539,9 +499,7 @@ define @vfma_vf_nxv1f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -564,9 +522,7 @@ define @vfma_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -602,9 +558,7 @@ define @vfma_vf_nxv2f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -616,9 +570,7 @@ define @vfma_vf_nxv2f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -641,9 +593,7 @@ define @vfma_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -679,9 +629,7 @@ define @vfma_vf_nxv4f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -693,9 +641,7 @@ define @vfma_vf_nxv4f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -718,9 +664,7 @@ define @vfma_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -756,9 +700,7 @@ define @vfma_vf_nxv8f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -770,9 +712,7 @@ define @vfma_vf_nxv8f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -797,9 +737,7 @@ define @vfma_vv_nxv16f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -835,9 +773,7 @@ define @vfma_vf_nxv16f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -849,9 +785,7 @@ define @vfma_vf_nxv16f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -874,9 +808,7 @@ define @vfma_vv_nxv1f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -912,9 +844,7 @@ define @vfma_vf_nxv1f64_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +856,7 @@ define @vfma_vf_nxv1f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -951,9 +879,7 @@ define @vfma_vv_nxv2f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -989,9 +915,7 @@ define @vfma_vf_nxv2f64_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1003,9 +927,7 @@ define @vfma_vf_nxv2f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1028,9 +950,7 @@ define @vfma_vv_nxv4f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -1066,9 +986,7 @@ define @vfma_vf_nxv4f64_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1080,9 +998,7 @@ define @vfma_vf_nxv4f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1107,9 +1023,7 @@ define @vfma_vv_nxv7f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv7f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv7f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -1134,9 +1048,7 @@ define @vfma_vv_nxv8f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -1172,9 +1084,7 @@ define @vfma_vf_nxv8f64_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1186,9 +1096,7 @@ define @vfma_vf_nxv8f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1359,9 +1267,7 @@ define @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv16f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -1385,10 +1291,8 @@ define @vfmsub_vv_nxv1f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1426,10 +1330,8 @@ define @vfmsub_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1441,10 +1343,8 @@ define @vfmsub_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1479,11 +1379,9 @@ define @vfnmadd_vv_nxv1f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1493,11 +1391,9 @@ define @vfnmadd_vv_nxv1f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1537,11 +1433,9 @@ define @vfnmadd_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1553,11 +1447,9 @@ define @vfnmadd_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1597,11 +1489,9 @@ define @vfnmadd_vf_nxv1f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1613,11 +1503,9 @@ define @vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1652,11 +1540,9 @@ define @vfnmsub_vv_nxv1f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1666,11 +1552,9 @@ define @vfnmsub_vv_nxv1f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1708,10 +1592,8 @@ define @vfnmsub_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1723,10 +1605,8 @@ define @vfnmsub_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1764,10 +1644,8 @@ define @vfnmsub_vf_nxv1f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1779,10 +1657,8 @@ define @vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1806,10 +1682,8 @@ define @vfmsub_vv_nxv2f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1847,10 +1721,8 @@ define @vfmsub_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1862,10 +1734,8 @@ define @vfmsub_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1900,11 +1770,9 @@ define @vfnmadd_vv_nxv2f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1914,11 +1782,9 @@ define @vfnmadd_vv_nxv2f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1958,11 +1824,9 @@ define @vfnmadd_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1974,11 +1838,9 @@ define @vfnmadd_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2018,11 +1880,9 @@ define @vfnmadd_vf_nxv2f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2034,11 +1894,9 @@ define @vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2073,11 +1931,9 @@ define @vfnmsub_vv_nxv2f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2087,11 +1943,9 @@ define @vfnmsub_vv_nxv2f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2129,10 +1983,8 @@ define @vfnmsub_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2144,10 +1996,8 @@ define @vfnmsub_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2185,10 +2035,8 @@ define @vfnmsub_vf_nxv2f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2200,10 +2048,8 @@ define @vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2227,10 +2073,8 @@ define @vfmsub_vv_nxv4f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2268,10 +2112,8 @@ define @vfmsub_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2283,10 +2125,8 @@ define @vfmsub_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2321,11 +2161,9 @@ define @vfnmadd_vv_nxv4f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2335,11 +2173,9 @@ define @vfnmadd_vv_nxv4f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2379,11 +2215,9 @@ define @vfnmadd_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2395,11 +2229,9 @@ define @vfnmadd_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2439,11 +2271,9 @@ define @vfnmadd_vf_nxv4f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2455,11 +2285,9 @@ define @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2494,11 +2322,9 @@ define @vfnmsub_vv_nxv4f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2508,11 +2334,9 @@ define @vfnmsub_vv_nxv4f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2550,10 +2374,8 @@ define @vfnmsub_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2565,10 +2387,8 @@ define @vfnmsub_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2606,10 +2426,8 @@ define @vfnmsub_vf_nxv4f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2621,10 +2439,8 @@ define @vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2648,10 +2464,8 @@ define @vfmsub_vv_nxv8f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2689,10 +2503,8 @@ define @vfmsub_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2704,10 +2516,8 @@ define @vfmsub_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2742,11 +2552,9 @@ define @vfnmadd_vv_nxv8f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2756,11 +2564,9 @@ define @vfnmadd_vv_nxv8f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2800,11 +2606,9 @@ define @vfnmadd_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2816,11 +2620,9 @@ define @vfnmadd_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2860,11 +2662,9 @@ define @vfnmadd_vf_nxv8f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2876,11 +2676,9 @@ define @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2915,11 +2713,9 @@ define @vfnmsub_vv_nxv8f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2929,11 +2725,9 @@ define @vfnmsub_vv_nxv8f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2971,10 +2765,8 @@ define @vfnmsub_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2986,10 +2778,8 @@ define @vfnmsub_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3027,10 +2817,8 @@ define @vfnmsub_vf_nxv8f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3042,10 +2830,8 @@ define @vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3069,10 +2855,8 @@ define @vfmsub_vv_nxv16f16_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3110,10 +2894,8 @@ define @vfmsub_vf_nxv16f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3125,10 +2907,8 @@ define @vfmsub_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3163,11 +2943,9 @@ define @vfnmadd_vv_nxv16f16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3177,11 +2955,9 @@ define @vfnmadd_vv_nxv16f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3221,11 +2997,9 @@ define @vfnmadd_vf_nxv16f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3237,11 +3011,9 @@ define @vfnmadd_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3281,11 +3053,9 @@ define @vfnmadd_vf_nxv16f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3297,11 +3067,9 @@ define @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3336,11 +3104,9 @@ define @vfnmsub_vv_nxv16f16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3350,11 +3116,9 @@ define @vfnmsub_vv_nxv16f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3392,10 +3156,8 @@ define @vfnmsub_vf_nxv16f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3407,10 +3169,8 @@ define @vfnmsub_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3448,10 +3208,8 @@ define @vfnmsub_vf_nxv16f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3463,10 +3221,8 @@ define @vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3492,10 +3248,8 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3533,10 +3287,8 @@ define @vfmsub_vf_nxv32f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3548,10 +3300,8 @@ define @vfmsub_vf_nxv32f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3589,11 +3339,9 @@ define @vfnmadd_vv_nxv32f16_unmasked( % ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3604,11 +3352,9 @@ define @vfnmadd_vv_nxv32f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3648,11 +3394,9 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3664,11 +3408,9 @@ define @vfnmadd_vf_nxv32f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3708,11 +3450,9 @@ define @vfnmadd_vf_nxv32f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv32f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv32f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3724,11 +3464,9 @@ define @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv32f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv32f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3766,11 +3504,9 @@ define @vfnmsub_vv_nxv32f16_unmasked( % ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3781,11 +3517,9 @@ define @vfnmsub_vv_nxv32f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3823,10 +3557,8 @@ define @vfnmsub_vf_nxv32f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3838,10 +3570,8 @@ define @vfnmsub_vf_nxv32f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3879,10 +3609,8 @@ define @vfnmsub_vf_nxv32f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv32f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv32f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3894,10 +3622,8 @@ define @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv32f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv32f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3921,10 +3647,8 @@ define @vfmsub_vv_nxv1f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3962,10 +3686,8 @@ define @vfmsub_vf_nxv1f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3977,10 +3699,8 @@ define @vfmsub_vf_nxv1f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4015,11 +3735,9 @@ define @vfnmadd_vv_nxv1f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4029,11 +3747,9 @@ define @vfnmadd_vv_nxv1f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4073,11 +3789,9 @@ define @vfnmadd_vf_nxv1f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4089,11 +3803,9 @@ define @vfnmadd_vf_nxv1f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4133,11 +3845,9 @@ define @vfnmadd_vf_nxv1f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4149,11 +3859,9 @@ define @vfnmadd_vf_nxv1f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4188,11 +3896,9 @@ define @vfnmsub_vv_nxv1f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4202,11 +3908,9 @@ define @vfnmsub_vv_nxv1f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4244,10 +3948,8 @@ define @vfnmsub_vf_nxv1f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4259,10 +3961,8 @@ define @vfnmsub_vf_nxv1f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4300,10 +4000,8 @@ define @vfnmsub_vf_nxv1f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4315,10 +4013,8 @@ define @vfnmsub_vf_nxv1f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4342,10 +4038,8 @@ define @vfmsub_vv_nxv2f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4383,10 +4077,8 @@ define @vfmsub_vf_nxv2f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4398,10 +4090,8 @@ define @vfmsub_vf_nxv2f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4436,11 +4126,9 @@ define @vfnmadd_vv_nxv2f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4450,11 +4138,9 @@ define @vfnmadd_vv_nxv2f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4494,11 +4180,9 @@ define @vfnmadd_vf_nxv2f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4510,11 +4194,9 @@ define @vfnmadd_vf_nxv2f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4554,11 +4236,9 @@ define @vfnmadd_vf_nxv2f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4570,11 +4250,9 @@ define @vfnmadd_vf_nxv2f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4609,11 +4287,9 @@ define @vfnmsub_vv_nxv2f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4623,11 +4299,9 @@ define @vfnmsub_vv_nxv2f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4665,10 +4339,8 @@ define @vfnmsub_vf_nxv2f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4680,10 +4352,8 @@ define @vfnmsub_vf_nxv2f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4721,10 +4391,8 @@ define @vfnmsub_vf_nxv2f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4736,10 +4404,8 @@ define @vfnmsub_vf_nxv2f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4763,10 +4429,8 @@ define @vfmsub_vv_nxv4f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4804,10 +4468,8 @@ define @vfmsub_vf_nxv4f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4819,10 +4481,8 @@ define @vfmsub_vf_nxv4f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4857,11 +4517,9 @@ define @vfnmadd_vv_nxv4f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4871,11 +4529,9 @@ define @vfnmadd_vv_nxv4f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4915,11 +4571,9 @@ define @vfnmadd_vf_nxv4f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4931,11 +4585,9 @@ define @vfnmadd_vf_nxv4f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4975,11 +4627,9 @@ define @vfnmadd_vf_nxv4f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4991,11 +4641,9 @@ define @vfnmadd_vf_nxv4f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5030,11 +4678,9 @@ define @vfnmsub_vv_nxv4f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5044,11 +4690,9 @@ define @vfnmsub_vv_nxv4f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5086,10 +4730,8 @@ define @vfnmsub_vf_nxv4f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5101,10 +4743,8 @@ define @vfnmsub_vf_nxv4f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5142,10 +4782,8 @@ define @vfnmsub_vf_nxv4f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5157,10 +4795,8 @@ define @vfnmsub_vf_nxv4f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5184,10 +4820,8 @@ define @vfmsub_vv_nxv8f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5225,10 +4859,8 @@ define @vfmsub_vf_nxv8f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5240,10 +4872,8 @@ define @vfmsub_vf_nxv8f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5278,11 +4908,9 @@ define @vfnmadd_vv_nxv8f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5292,11 +4920,9 @@ define @vfnmadd_vv_nxv8f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5336,11 +4962,9 @@ define @vfnmadd_vf_nxv8f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5352,11 +4976,9 @@ define @vfnmadd_vf_nxv8f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5396,11 +5018,9 @@ define @vfnmadd_vf_nxv8f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5412,11 +5032,9 @@ define @vfnmadd_vf_nxv8f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5451,11 +5069,9 @@ define @vfnmsub_vv_nxv8f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5465,11 +5081,9 @@ define @vfnmsub_vv_nxv8f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5507,10 +5121,8 @@ define @vfnmsub_vf_nxv8f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5522,10 +5134,8 @@ define @vfnmsub_vf_nxv8f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5563,10 +5173,8 @@ define @vfnmsub_vf_nxv8f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5578,10 +5186,8 @@ define @vfnmsub_vf_nxv8f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5607,10 +5213,8 @@ define @vfmsub_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5648,10 +5252,8 @@ define @vfmsub_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5663,10 +5265,8 @@ define @vfmsub_vf_nxv16f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5704,11 +5304,9 @@ define @vfnmadd_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5719,11 +5317,9 @@ define @vfnmadd_vv_nxv16f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5763,11 +5359,9 @@ define @vfnmadd_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5779,11 +5373,9 @@ define @vfnmadd_vf_nxv16f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5823,11 +5415,9 @@ define @vfnmadd_vf_nxv16f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5839,11 +5429,9 @@ define @vfnmadd_vf_nxv16f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5881,11 +5469,9 @@ define @vfnmsub_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5896,11 +5482,9 @@ define @vfnmsub_vv_nxv16f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5938,10 +5522,8 @@ define @vfnmsub_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5953,10 +5535,8 @@ define @vfnmsub_vf_nxv16f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5994,10 +5574,8 @@ define @vfnmsub_vf_nxv16f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6009,10 +5587,8 @@ define @vfnmsub_vf_nxv16f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6036,10 +5612,8 @@ define @vfmsub_vv_nxv1f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6077,10 +5651,8 @@ define @vfmsub_vf_nxv1f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6092,10 +5664,8 @@ define @vfmsub_vf_nxv1f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6130,11 +5700,9 @@ define @vfnmadd_vv_nxv1f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6144,11 +5712,9 @@ define @vfnmadd_vv_nxv1f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6188,11 +5754,9 @@ define @vfnmadd_vf_nxv1f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6204,11 +5768,9 @@ define @vfnmadd_vf_nxv1f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6248,11 +5810,9 @@ define @vfnmadd_vf_nxv1f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6264,11 +5824,9 @@ define @vfnmadd_vf_nxv1f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6303,11 +5861,9 @@ define @vfnmsub_vv_nxv1f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6317,11 +5873,9 @@ define @vfnmsub_vv_nxv1f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6359,10 +5913,8 @@ define @vfnmsub_vf_nxv1f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6374,10 +5926,8 @@ define @vfnmsub_vf_nxv1f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6415,10 +5965,8 @@ define @vfnmsub_vf_nxv1f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6430,10 +5978,8 @@ define @vfnmsub_vf_nxv1f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6457,10 +6003,8 @@ define @vfmsub_vv_nxv2f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6498,10 +6042,8 @@ define @vfmsub_vf_nxv2f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6513,10 +6055,8 @@ define @vfmsub_vf_nxv2f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6551,11 +6091,9 @@ define @vfnmadd_vv_nxv2f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6565,11 +6103,9 @@ define @vfnmadd_vv_nxv2f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6609,11 +6145,9 @@ define @vfnmadd_vf_nxv2f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6625,11 +6159,9 @@ define @vfnmadd_vf_nxv2f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6669,11 +6201,9 @@ define @vfnmadd_vf_nxv2f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6685,11 +6215,9 @@ define @vfnmadd_vf_nxv2f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6724,11 +6252,9 @@ define @vfnmsub_vv_nxv2f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6738,11 +6264,9 @@ define @vfnmsub_vv_nxv2f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6780,10 +6304,8 @@ define @vfnmsub_vf_nxv2f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6795,10 +6317,8 @@ define @vfnmsub_vf_nxv2f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6836,10 +6356,8 @@ define @vfnmsub_vf_nxv2f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6851,10 +6369,8 @@ define @vfnmsub_vf_nxv2f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6878,10 +6394,8 @@ define @vfmsub_vv_nxv4f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6919,10 +6433,8 @@ define @vfmsub_vf_nxv4f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6934,10 +6446,8 @@ define @vfmsub_vf_nxv4f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6972,11 +6482,9 @@ define @vfnmadd_vv_nxv4f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6986,11 +6494,9 @@ define @vfnmadd_vv_nxv4f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7030,11 +6536,9 @@ define @vfnmadd_vf_nxv4f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7046,11 +6550,9 @@ define @vfnmadd_vf_nxv4f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7090,11 +6592,9 @@ define @vfnmadd_vf_nxv4f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7106,11 +6606,9 @@ define @vfnmadd_vf_nxv4f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7145,11 +6643,9 @@ define @vfnmsub_vv_nxv4f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7159,11 +6655,9 @@ define @vfnmsub_vv_nxv4f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7201,10 +6695,8 @@ define @vfnmsub_vf_nxv4f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7216,10 +6708,8 @@ define @vfnmsub_vf_nxv4f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7257,10 +6747,8 @@ define @vfnmsub_vf_nxv4f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7272,10 +6760,8 @@ define @vfnmsub_vf_nxv4f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7301,10 +6787,8 @@ define @vfmsub_vv_nxv8f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7342,10 +6826,8 @@ define @vfmsub_vf_nxv8f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7357,10 +6839,8 @@ define @vfmsub_vf_nxv8f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7398,11 +6878,9 @@ define @vfnmadd_vv_nxv8f64_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7413,11 +6891,9 @@ define @vfnmadd_vv_nxv8f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7457,11 +6933,9 @@ define @vfnmadd_vf_nxv8f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7473,11 +6947,9 @@ define @vfnmadd_vf_nxv8f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7517,11 +6989,9 @@ define @vfnmadd_vf_nxv8f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7533,11 +7003,9 @@ define @vfnmadd_vf_nxv8f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7575,11 +7043,9 @@ define @vfnmsub_vv_nxv8f64_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7590,11 +7056,9 @@ define @vfnmsub_vv_nxv8f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7632,10 +7096,8 @@ define @vfnmsub_vf_nxv8f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7647,10 +7109,8 @@ define @vfnmsub_vf_nxv8f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7688,10 +7148,8 @@ define @vfnmsub_vf_nxv8f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7703,10 +7161,8 @@ define @vfnmsub_vf_nxv8f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll index 6f1b3986caafe..54855e6152b95 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll @@ -16,9 +16,7 @@ define @vfmacc_vv_nxv1f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -30,10 +28,8 @@ define @vfmacc_vv_nxv1f16_unmasked( %a, < ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -46,9 +42,7 @@ define @vfmacc_vf_nxv1f16( %va, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -62,9 +56,7 @@ define @vfmacc_vf_nxv1f16_commute( %va, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -78,10 +70,8 @@ define @vfmacc_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -92,9 +82,7 @@ define @vfmacc_vv_nxv1f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -108,9 +96,7 @@ define @vfmacc_vf_nxv1f16_ta( %va, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -124,9 +110,7 @@ define @vfmacc_vf_nxv1f16_commute_ta( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -143,9 +127,7 @@ define @vfmacc_vv_nxv2f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -157,10 +139,8 @@ define @vfmacc_vv_nxv2f16_unmasked( %a, < ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -173,9 +153,7 @@ define @vfmacc_vf_nxv2f16( %va, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -189,9 +167,7 @@ define @vfmacc_vf_nxv2f16_commute( %va, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -205,10 +181,8 @@ define @vfmacc_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -219,9 +193,7 @@ define @vfmacc_vv_nxv2f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -235,9 +207,7 @@ define @vfmacc_vf_nxv2f16_ta( %va, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -251,9 +221,7 @@ define @vfmacc_vf_nxv2f16_commute_ta( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -270,9 +238,7 @@ define @vfmacc_vv_nxv4f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -284,10 +250,8 @@ define @vfmacc_vv_nxv4f16_unmasked( %a, < ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -300,9 +264,7 @@ define @vfmacc_vf_nxv4f16( %va, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -316,9 +278,7 @@ define @vfmacc_vf_nxv4f16_commute( %va, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -332,10 +292,8 @@ define @vfmacc_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -346,9 +304,7 @@ define @vfmacc_vv_nxv4f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -362,9 +318,7 @@ define @vfmacc_vf_nxv4f16_ta( %va, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -378,9 +332,7 @@ define @vfmacc_vf_nxv4f16_commute_ta( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -397,9 +349,7 @@ define @vfmacc_vv_nxv8f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -411,10 +361,8 @@ define @vfmacc_vv_nxv8f16_unmasked( %a, < ; CHECK-NEXT: vfmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -427,9 +375,7 @@ define @vfmacc_vf_nxv8f16( %va, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -443,9 +389,7 @@ define @vfmacc_vf_nxv8f16_commute( %va, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -459,10 +403,8 @@ define @vfmacc_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -473,9 +415,7 @@ define @vfmacc_vv_nxv8f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -489,9 +429,7 @@ define @vfmacc_vf_nxv8f16_ta( %va, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -505,9 +443,7 @@ define @vfmacc_vf_nxv8f16_commute_ta( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -524,9 +460,7 @@ define @vfmacc_vv_nxv16f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -538,10 +472,8 @@ define @vfmacc_vv_nxv16f16_unmasked( %a ; CHECK-NEXT: vfmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -554,9 +486,7 @@ define @vfmacc_vf_nxv16f16( %va, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -570,9 +500,7 @@ define @vfmacc_vf_nxv16f16_commute( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -586,10 +514,8 @@ define @vfmacc_vf_nxv16f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -600,9 +526,7 @@ define @vfmacc_vv_nxv16f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -616,9 +540,7 @@ define @vfmacc_vf_nxv16f16_ta( %va, hal ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -632,9 +554,7 @@ define @vfmacc_vf_nxv16f16_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -652,9 +572,7 @@ define @vfmacc_vv_nxv32f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -667,10 +585,8 @@ define @vfmacc_vv_nxv32f16_unmasked( %a ; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -683,9 +599,7 @@ define @vfmacc_vf_nxv32f16( %va, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -699,9 +613,7 @@ define @vfmacc_vf_nxv32f16_commute( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -715,10 +627,8 @@ define @vfmacc_vf_nxv32f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -730,9 +640,7 @@ define @vfmacc_vv_nxv32f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -746,9 +654,7 @@ define @vfmacc_vf_nxv32f16_ta( %va, hal ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -762,9 +668,7 @@ define @vfmacc_vf_nxv32f16_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -781,9 +685,7 @@ define @vfmacc_vv_nxv1f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -795,10 +697,8 @@ define @vfmacc_vv_nxv1f32_unmasked( %a, ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -811,9 +711,7 @@ define @vfmacc_vf_nxv1f32( %va, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -827,9 +725,7 @@ define @vfmacc_vf_nxv1f32_commute( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -843,10 +739,8 @@ define @vfmacc_vf_nxv1f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -857,9 +751,7 @@ define @vfmacc_vv_nxv1f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -873,9 +765,7 @@ define @vfmacc_vf_nxv1f32_ta( %va, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -889,9 +779,7 @@ define @vfmacc_vf_nxv1f32_commute_ta( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -908,9 +796,7 @@ define @vfmacc_vv_nxv2f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -922,10 +808,8 @@ define @vfmacc_vv_nxv2f32_unmasked( %a, ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -938,9 +822,7 @@ define @vfmacc_vf_nxv2f32( %va, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -954,9 +836,7 @@ define @vfmacc_vf_nxv2f32_commute( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -970,10 +850,8 @@ define @vfmacc_vf_nxv2f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -984,9 +862,7 @@ define @vfmacc_vv_nxv2f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1000,9 +876,7 @@ define @vfmacc_vf_nxv2f32_ta( %va, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1016,9 +890,7 @@ define @vfmacc_vf_nxv2f32_commute_ta( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1035,9 +907,7 @@ define @vfmacc_vv_nxv4f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1049,10 +919,8 @@ define @vfmacc_vv_nxv4f32_unmasked( %a, ; CHECK-NEXT: vfmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1065,9 +933,7 @@ define @vfmacc_vf_nxv4f32( %va, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1081,9 +947,7 @@ define @vfmacc_vf_nxv4f32_commute( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1097,10 +961,8 @@ define @vfmacc_vf_nxv4f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1111,9 +973,7 @@ define @vfmacc_vv_nxv4f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1127,9 +987,7 @@ define @vfmacc_vf_nxv4f32_ta( %va, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1143,9 +1001,7 @@ define @vfmacc_vf_nxv4f32_commute_ta( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1162,9 +1018,7 @@ define @vfmacc_vv_nxv8f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1176,10 +1030,8 @@ define @vfmacc_vv_nxv8f32_unmasked( %a, ; CHECK-NEXT: vfmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1192,9 +1044,7 @@ define @vfmacc_vf_nxv8f32( %va, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1208,9 +1058,7 @@ define @vfmacc_vf_nxv8f32_commute( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1224,10 +1072,8 @@ define @vfmacc_vf_nxv8f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1238,9 +1084,7 @@ define @vfmacc_vv_nxv8f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1254,9 +1098,7 @@ define @vfmacc_vf_nxv8f32_ta( %va, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1270,9 +1112,7 @@ define @vfmacc_vf_nxv8f32_commute_ta( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1290,9 +1130,7 @@ define @vfmacc_vv_nxv16f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1305,10 +1143,8 @@ define @vfmacc_vv_nxv16f32_unmasked( ; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1321,9 +1157,7 @@ define @vfmacc_vf_nxv16f32( %va, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1337,9 +1171,7 @@ define @vfmacc_vf_nxv16f32_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1353,10 +1185,8 @@ define @vfmacc_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1368,9 +1198,7 @@ define @vfmacc_vv_nxv16f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1384,9 +1212,7 @@ define @vfmacc_vf_nxv16f32_ta( %va, f ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1400,9 +1226,7 @@ define @vfmacc_vf_nxv16f32_commute_ta( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1419,9 +1243,7 @@ define @vfmacc_vv_nxv1f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1433,10 +1255,8 @@ define @vfmacc_vv_nxv1f64_unmasked( % ; CHECK-NEXT: vfmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1449,9 +1269,7 @@ define @vfmacc_vf_nxv1f64( %va, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1465,9 +1283,7 @@ define @vfmacc_vf_nxv1f64_commute( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1481,10 +1297,8 @@ define @vfmacc_vf_nxv1f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1495,9 +1309,7 @@ define @vfmacc_vv_nxv1f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1511,9 +1323,7 @@ define @vfmacc_vf_nxv1f64_ta( %va, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1527,9 +1337,7 @@ define @vfmacc_vf_nxv1f64_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1546,9 +1354,7 @@ define @vfmacc_vv_nxv2f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1560,10 +1366,8 @@ define @vfmacc_vv_nxv2f64_unmasked( % ; CHECK-NEXT: vfmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1576,9 +1380,7 @@ define @vfmacc_vf_nxv2f64( %va, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1592,9 +1394,7 @@ define @vfmacc_vf_nxv2f64_commute( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1608,10 +1408,8 @@ define @vfmacc_vf_nxv2f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1622,9 +1420,7 @@ define @vfmacc_vv_nxv2f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1638,9 +1434,7 @@ define @vfmacc_vf_nxv2f64_ta( %va, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1654,9 +1448,7 @@ define @vfmacc_vf_nxv2f64_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1673,9 +1465,7 @@ define @vfmacc_vv_nxv4f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1687,10 +1477,8 @@ define @vfmacc_vv_nxv4f64_unmasked( % ; CHECK-NEXT: vfmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1703,9 +1491,7 @@ define @vfmacc_vf_nxv4f64( %va, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1719,9 +1505,7 @@ define @vfmacc_vf_nxv4f64_commute( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1735,10 +1519,8 @@ define @vfmacc_vf_nxv4f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1749,9 +1531,7 @@ define @vfmacc_vv_nxv4f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1765,9 +1545,7 @@ define @vfmacc_vf_nxv4f64_ta( %va, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1781,9 +1559,7 @@ define @vfmacc_vf_nxv4f64_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1801,9 +1577,7 @@ define @vfmacc_vv_nxv8f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1816,10 +1590,8 @@ define @vfmacc_vv_nxv8f64_unmasked( % ; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1832,9 +1604,7 @@ define @vfmacc_vf_nxv8f64( %va, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1848,9 +1618,7 @@ define @vfmacc_vf_nxv8f64_commute( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1864,10 +1632,8 @@ define @vfmacc_vf_nxv8f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1879,9 +1645,7 @@ define @vfmacc_vv_nxv8f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1895,9 +1659,7 @@ define @vfmacc_vf_nxv8f64_ta( %va, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %va, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1911,9 +1673,7 @@ define @vfmacc_vf_nxv8f64_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %c, %allones, i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %va, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll index 72101d62567b4..7556b3ace5c6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-vp.ll @@ -48,9 +48,7 @@ define @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -94,9 +92,7 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -140,9 +136,7 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -186,9 +180,7 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -232,9 +224,7 @@ define @vfmax_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -311,8 +301,6 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -320,6 +308,8 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -348,9 +338,7 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -372,9 +360,7 @@ define @vfmax_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -396,9 +382,7 @@ define @vfmax_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -420,9 +404,7 @@ define @vfmax_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -444,9 +426,7 @@ define @vfmax_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -468,9 +448,7 @@ define @vfmax_vv_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv16f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv16f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -492,9 +470,7 @@ define @vfmax_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -516,9 +492,7 @@ define @vfmax_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -540,9 +514,7 @@ define @vfmax_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -564,8 +536,6 @@ define @vfmax_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.maxnum.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.maxnum.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll index 15fa24a35b7d5..755c665376128 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-vp.ll @@ -48,9 +48,7 @@ define @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -94,9 +92,7 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -140,9 +136,7 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -186,9 +180,7 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -232,9 +224,7 @@ define @vfmin_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -311,8 +301,6 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -320,6 +308,8 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -348,9 +338,7 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -372,9 +360,7 @@ define @vfmin_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -396,9 +382,7 @@ define @vfmin_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -420,9 +404,7 @@ define @vfmin_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -444,9 +426,7 @@ define @vfmin_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -468,9 +448,7 @@ define @vfmin_vv_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv16f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv16f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -492,9 +470,7 @@ define @vfmin_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -516,9 +492,7 @@ define @vfmin_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -540,9 +514,7 @@ define @vfmin_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -564,8 +536,6 @@ define @vfmin_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.minnum.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.minnum.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll index 1ce21a1c46331..f1d5562131b8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll @@ -16,10 +16,8 @@ define @vmfsac_vv_nxv1f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -31,11 +29,9 @@ define @vmfsac_vv_nxv1f16_unmasked( %a, < ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -48,10 +44,8 @@ define @vmfsac_vf_nxv1f16( %a, half %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -65,10 +59,8 @@ define @vmfsac_vf_nxv1f16_commute( %a, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -82,11 +74,9 @@ define @vmfsac_vf_nxv1f16_unmasked( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -97,10 +87,8 @@ define @vmfsac_vv_nxv1f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -114,10 +102,8 @@ define @vmfsac_vf_nxv1f16_ta( %a, half %b ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -131,10 +117,8 @@ define @vmfsac_vf_nxv1f16_commute_ta( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -151,10 +135,8 @@ define @vmfsac_vv_nxv2f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -166,11 +148,9 @@ define @vmfsac_vv_nxv2f16_unmasked( %a, < ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -183,10 +163,8 @@ define @vmfsac_vf_nxv2f16( %a, half %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -200,10 +178,8 @@ define @vmfsac_vf_nxv2f16_commute( %a, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -217,11 +193,9 @@ define @vmfsac_vf_nxv2f16_unmasked( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -232,10 +206,8 @@ define @vmfsac_vv_nxv2f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -249,10 +221,8 @@ define @vmfsac_vf_nxv2f16_ta( %a, half %b ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -266,10 +236,8 @@ define @vmfsac_vf_nxv2f16_commute_ta( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -286,10 +254,8 @@ define @vmfsac_vv_nxv4f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -301,11 +267,9 @@ define @vmfsac_vv_nxv4f16_unmasked( %a, < ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -318,10 +282,8 @@ define @vmfsac_vf_nxv4f16( %a, half %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -335,10 +297,8 @@ define @vmfsac_vf_nxv4f16_commute( %a, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -352,11 +312,9 @@ define @vmfsac_vf_nxv4f16_unmasked( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -367,10 +325,8 @@ define @vmfsac_vv_nxv4f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -384,10 +340,8 @@ define @vmfsac_vf_nxv4f16_ta( %a, half %b ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -401,10 +355,8 @@ define @vmfsac_vf_nxv4f16_commute_ta( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -421,10 +373,8 @@ define @vmfsac_vv_nxv8f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -436,11 +386,9 @@ define @vmfsac_vv_nxv8f16_unmasked( %a, < ; CHECK-NEXT: vfmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -453,10 +401,8 @@ define @vmfsac_vf_nxv8f16( %a, half %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -470,10 +416,8 @@ define @vmfsac_vf_nxv8f16_commute( %a, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -487,11 +431,9 @@ define @vmfsac_vf_nxv8f16_unmasked( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -502,10 +444,8 @@ define @vmfsac_vv_nxv8f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -519,10 +459,8 @@ define @vmfsac_vf_nxv8f16_ta( %a, half %b ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -536,10 +474,8 @@ define @vmfsac_vf_nxv8f16_commute_ta( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -556,10 +492,8 @@ define @vmfsac_vv_nxv16f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -571,11 +505,9 @@ define @vmfsac_vv_nxv16f16_unmasked( %a ; CHECK-NEXT: vfmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -588,10 +520,8 @@ define @vmfsac_vf_nxv16f16( %a, half %b ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -605,10 +535,8 @@ define @vmfsac_vf_nxv16f16_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -622,11 +550,9 @@ define @vmfsac_vf_nxv16f16_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -637,10 +563,8 @@ define @vmfsac_vv_nxv16f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -654,10 +578,8 @@ define @vmfsac_vf_nxv16f16_ta( %a, half ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -671,10 +593,8 @@ define @vmfsac_vf_nxv16f16_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -692,10 +612,8 @@ define @vmfsac_vv_nxv32f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -708,11 +626,9 @@ define @vmfsac_vv_nxv32f16_unmasked( %a ; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -725,10 +641,8 @@ define @vmfsac_vf_nxv32f16( %a, half %b ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -742,10 +656,8 @@ define @vmfsac_vf_nxv32f16_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -759,11 +671,9 @@ define @vmfsac_vf_nxv32f16_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -775,10 +685,8 @@ define @vmfsac_vv_nxv32f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -792,10 +700,8 @@ define @vmfsac_vf_nxv32f16_ta( %a, half ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -809,10 +715,8 @@ define @vmfsac_vf_nxv32f16_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -829,10 +733,8 @@ define @vmfsac_vv_nxv1f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -844,11 +746,9 @@ define @vmfsac_vv_nxv1f32_unmasked( %a, ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -861,10 +761,8 @@ define @vmfsac_vf_nxv1f32( %a, float %b ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -878,10 +776,8 @@ define @vmfsac_vf_nxv1f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -895,11 +791,9 @@ define @vmfsac_vf_nxv1f32_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -910,10 +804,8 @@ define @vmfsac_vv_nxv1f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -927,10 +819,8 @@ define @vmfsac_vf_nxv1f32_ta( %a, float ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -944,10 +834,8 @@ define @vmfsac_vf_nxv1f32_commute_ta( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -964,10 +852,8 @@ define @vmfsac_vv_nxv2f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -979,11 +865,9 @@ define @vmfsac_vv_nxv2f32_unmasked( %a, ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -996,10 +880,8 @@ define @vmfsac_vf_nxv2f32( %a, float %b ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1013,10 +895,8 @@ define @vmfsac_vf_nxv2f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1030,11 +910,9 @@ define @vmfsac_vf_nxv2f32_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1045,10 +923,8 @@ define @vmfsac_vv_nxv2f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1062,10 +938,8 @@ define @vmfsac_vf_nxv2f32_ta( %a, float ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1079,10 +953,8 @@ define @vmfsac_vf_nxv2f32_commute_ta( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1099,10 +971,8 @@ define @vmfsac_vv_nxv4f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1114,11 +984,9 @@ define @vmfsac_vv_nxv4f32_unmasked( %a, ; CHECK-NEXT: vfmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1131,10 +999,8 @@ define @vmfsac_vf_nxv4f32( %a, float %b ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1148,10 +1014,8 @@ define @vmfsac_vf_nxv4f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1165,11 +1029,9 @@ define @vmfsac_vf_nxv4f32_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1180,10 +1042,8 @@ define @vmfsac_vv_nxv4f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1197,10 +1057,8 @@ define @vmfsac_vf_nxv4f32_ta( %a, float ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1214,10 +1072,8 @@ define @vmfsac_vf_nxv4f32_commute_ta( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1234,10 +1090,8 @@ define @vmfsac_vv_nxv8f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1249,11 +1103,9 @@ define @vmfsac_vv_nxv8f32_unmasked( %a, ; CHECK-NEXT: vfmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1266,10 +1118,8 @@ define @vmfsac_vf_nxv8f32( %a, float %b ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1283,10 +1133,8 @@ define @vmfsac_vf_nxv8f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1300,11 +1148,9 @@ define @vmfsac_vf_nxv8f32_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1315,10 +1161,8 @@ define @vmfsac_vv_nxv8f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1332,10 +1176,8 @@ define @vmfsac_vf_nxv8f32_ta( %a, float ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1349,10 +1191,8 @@ define @vmfsac_vf_nxv8f32_commute_ta( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1370,10 +1210,8 @@ define @vmfsac_vv_nxv16f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1386,11 +1224,9 @@ define @vmfsac_vv_nxv16f32_unmasked( ; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1403,10 +1239,8 @@ define @vmfsac_vf_nxv16f32( %a, float ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1420,10 +1254,8 @@ define @vmfsac_vf_nxv16f32_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1437,11 +1269,9 @@ define @vmfsac_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1453,10 +1283,8 @@ define @vmfsac_vv_nxv16f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1470,10 +1298,8 @@ define @vmfsac_vf_nxv16f32_ta( %a, fl ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1487,10 +1313,8 @@ define @vmfsac_vf_nxv16f32_commute_ta( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1507,10 +1331,8 @@ define @vmfsac_vv_nxv1f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1522,11 +1344,9 @@ define @vmfsac_vv_nxv1f64_unmasked( % ; CHECK-NEXT: vfmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1539,10 +1359,8 @@ define @vmfsac_vf_nxv1f64( %a, double ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1556,10 +1374,8 @@ define @vmfsac_vf_nxv1f64_commute( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1573,11 +1389,9 @@ define @vmfsac_vf_nxv1f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1588,10 +1402,8 @@ define @vmfsac_vv_nxv1f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1605,10 +1417,8 @@ define @vmfsac_vf_nxv1f64_ta( %a, dou ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1622,10 +1432,8 @@ define @vmfsac_vf_nxv1f64_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1642,10 +1450,8 @@ define @vmfsac_vv_nxv2f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1657,11 +1463,9 @@ define @vmfsac_vv_nxv2f64_unmasked( % ; CHECK-NEXT: vfmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1674,10 +1478,8 @@ define @vmfsac_vf_nxv2f64( %a, double ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1691,10 +1493,8 @@ define @vmfsac_vf_nxv2f64_commute( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1708,11 +1508,9 @@ define @vmfsac_vf_nxv2f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1723,10 +1521,8 @@ define @vmfsac_vv_nxv2f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1740,10 +1536,8 @@ define @vmfsac_vf_nxv2f64_ta( %a, dou ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1757,10 +1551,8 @@ define @vmfsac_vf_nxv2f64_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1777,10 +1569,8 @@ define @vmfsac_vv_nxv4f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1792,11 +1582,9 @@ define @vmfsac_vv_nxv4f64_unmasked( % ; CHECK-NEXT: vfmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1809,10 +1597,8 @@ define @vmfsac_vf_nxv4f64( %a, double ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1826,10 +1612,8 @@ define @vmfsac_vf_nxv4f64_commute( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1843,11 +1627,9 @@ define @vmfsac_vf_nxv4f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1858,10 +1640,8 @@ define @vmfsac_vv_nxv4f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1875,10 +1655,8 @@ define @vmfsac_vf_nxv4f64_ta( %a, dou ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1892,10 +1670,8 @@ define @vmfsac_vf_nxv4f64_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1913,10 +1689,8 @@ define @vmfsac_vv_nxv8f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1929,11 +1703,9 @@ define @vmfsac_vv_nxv8f64_unmasked( % ; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1946,10 +1718,8 @@ define @vmfsac_vf_nxv8f64( %a, double ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1963,10 +1733,8 @@ define @vmfsac_vf_nxv8f64_commute( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1980,11 +1748,9 @@ define @vmfsac_vf_nxv8f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1996,10 +1762,8 @@ define @vmfsac_vv_nxv8f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2013,10 +1777,8 @@ define @vmfsac_vf_nxv8f64_ta( %a, dou ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %a, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2030,10 +1792,8 @@ define @vmfsac_vf_nxv8f64_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %a, %negc, %allones, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %a, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll index 518c1eacf401f..f7f5e88bf8712 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll @@ -564,9 +564,7 @@ define @vfmul_vv_mask_nxv8f32( %va, poison, float 0.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %splat + %vs = select %mask, %vb, splat (float 0.0) %vc = fmul %va, %vs ret %vc } @@ -579,11 +577,9 @@ define @vfmul_vf_mask_nxv8f32( %va, flo ; CHECK-NEXT: vfmerge.vfm v12, v12, fa0, v0 ; CHECK-NEXT: vfmul.vv v8, v8, v12 ; CHECK-NEXT: ret - %head0 = insertelement poison, float 0.0, i32 0 - %splat0 = shufflevector %head0, poison, zeroinitializer %head1 = insertelement poison, float %b, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer - %vs = select %mask, %splat1, %splat0 + %vs = select %mask, %splat1, splat (float 0.0) %vc = fmul %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll index bb9d3cfed3001..30d5919238cfa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll @@ -48,9 +48,7 @@ define @vfmul_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv1f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv1f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -104,9 +102,7 @@ define @vfmul_vf_nxv1f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -150,9 +146,7 @@ define @vfmul_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv2f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv2f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -206,9 +200,7 @@ define @vfmul_vf_nxv2f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -252,9 +244,7 @@ define @vfmul_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv4f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv4f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -308,9 +298,7 @@ define @vfmul_vf_nxv4f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -354,9 +342,7 @@ define @vfmul_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv8f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv8f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -410,9 +396,7 @@ define @vfmul_vf_nxv8f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -456,9 +440,7 @@ define @vfmul_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv16f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv16f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -512,9 +494,7 @@ define @vfmul_vf_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -591,8 +571,6 @@ define @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -600,6 +578,8 @@ define @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -628,9 +608,7 @@ define @vfmul_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv32f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv32f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -696,8 +674,6 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -705,6 +681,8 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -726,9 +704,7 @@ define @vfmul_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -750,9 +726,7 @@ define @vfmul_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv1f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv1f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -776,9 +750,7 @@ define @vfmul_vf_nxv1f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -800,9 +772,7 @@ define @vfmul_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv2f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv2f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -826,9 +796,7 @@ define @vfmul_vf_nxv2f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -850,9 +818,7 @@ define @vfmul_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv4f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv4f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -876,9 +842,7 @@ define @vfmul_vf_nxv4f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -900,9 +864,7 @@ define @vfmul_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv8f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv8f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +888,7 @@ define @vfmul_vf_nxv8f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -950,9 +910,7 @@ define @vfmul_vv_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv16f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv16f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -976,9 +934,7 @@ define @vfmul_vf_nxv16f32_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv16f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv16f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1000,9 +956,7 @@ define @vfmul_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv1f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv1f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1026,9 +980,7 @@ define @vfmul_vf_nxv1f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1050,9 +1002,7 @@ define @vfmul_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv2f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv2f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1076,9 +1026,7 @@ define @vfmul_vf_nxv2f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1100,9 +1048,7 @@ define @vfmul_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv4f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv4f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1126,9 +1072,7 @@ define @vfmul_vf_nxv4f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1162,9 +1106,7 @@ define @vfmul_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmul.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv8f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv8f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1188,8 +1130,6 @@ define @vfmul_vf_nxv8f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmul.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fmul.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index 582043ffb903f..292f27794f378 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -23,9 +23,7 @@ define @vfma_vv_nxv1f16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -61,9 +59,7 @@ define @vfma_vf_nxv1f16_unmasked( %va, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -75,9 +71,7 @@ define @vfma_vf_nxv1f16_unmasked_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -100,9 +94,7 @@ define @vfma_vv_nxv2f16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -138,9 +130,7 @@ define @vfma_vf_nxv2f16_unmasked( %va, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -152,9 +142,7 @@ define @vfma_vf_nxv2f16_unmasked_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -177,9 +165,7 @@ define @vfma_vv_nxv4f16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -215,9 +201,7 @@ define @vfma_vf_nxv4f16_unmasked( %va, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -229,9 +213,7 @@ define @vfma_vf_nxv4f16_unmasked_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -254,9 +236,7 @@ define @vfma_vv_nxv8f16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -292,9 +272,7 @@ define @vfma_vf_nxv8f16_unmasked( %va, ha ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -306,9 +284,7 @@ define @vfma_vf_nxv8f16_unmasked_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -331,9 +307,7 @@ define @vfma_vv_nxv16f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv16f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -369,9 +343,7 @@ define @vfma_vf_nxv16f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv16f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -383,9 +355,7 @@ define @vfma_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv16f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -410,9 +380,7 @@ define @vfma_vv_nxv32f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -448,9 +416,7 @@ define @vfma_vf_nxv32f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -462,9 +428,7 @@ define @vfma_vf_nxv32f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv32f16( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -487,9 +451,7 @@ define @vfma_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -525,9 +487,7 @@ define @vfma_vf_nxv1f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -539,9 +499,7 @@ define @vfma_vf_nxv1f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -564,9 +522,7 @@ define @vfma_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -602,9 +558,7 @@ define @vfma_vf_nxv2f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -616,9 +570,7 @@ define @vfma_vf_nxv2f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -641,9 +593,7 @@ define @vfma_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -679,9 +629,7 @@ define @vfma_vf_nxv4f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -693,9 +641,7 @@ define @vfma_vf_nxv4f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -718,9 +664,7 @@ define @vfma_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -756,9 +700,7 @@ define @vfma_vf_nxv8f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -770,9 +712,7 @@ define @vfma_vf_nxv8f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -797,9 +737,7 @@ define @vfma_vv_nxv16f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv16f32( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -835,9 +773,7 @@ define @vfma_vf_nxv16f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv16f32( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -849,9 +785,7 @@ define @vfma_vf_nxv16f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv16f32( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -874,9 +808,7 @@ define @vfma_vv_nxv1f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -912,9 +844,7 @@ define @vfma_vf_nxv1f64_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f64( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +856,7 @@ define @vfma_vf_nxv1f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv1f64( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -951,9 +879,7 @@ define @vfma_vv_nxv2f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -989,9 +915,7 @@ define @vfma_vf_nxv2f64_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f64( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1003,9 +927,7 @@ define @vfma_vf_nxv2f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv2f64( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1028,9 +950,7 @@ define @vfma_vv_nxv4f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -1066,9 +986,7 @@ define @vfma_vf_nxv4f64_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f64( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1080,9 +998,7 @@ define @vfma_vf_nxv4f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv4f64( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1107,9 +1023,7 @@ define @vfma_vv_nxv7f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv7f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv7f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -1134,9 +1048,7 @@ define @vfma_vv_nxv8f64_unmasked( %va ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -1172,9 +1084,7 @@ define @vfma_vf_nxv8f64_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f64( %va, %vb, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %va, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1186,9 +1096,7 @@ define @vfma_vf_nxv8f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv8f64( %vb, %va, %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %vb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1359,9 +1267,7 @@ define @vfma_vv_nxv16f64_unmasked( ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fmuladd.nxv16f64( %va, %b, %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v } @@ -1385,10 +1291,8 @@ define @vfmsub_vv_nxv1f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1426,10 +1330,8 @@ define @vfmsub_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1441,10 +1343,8 @@ define @vfmsub_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1479,11 +1379,9 @@ define @vfnmadd_vv_nxv1f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1493,11 +1391,9 @@ define @vfnmadd_vv_nxv1f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1537,11 +1433,9 @@ define @vfnmadd_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1553,11 +1447,9 @@ define @vfnmadd_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1597,11 +1489,9 @@ define @vfnmadd_vf_nxv1f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1613,11 +1503,9 @@ define @vfnmadd_vf_nxv1f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1652,11 +1540,9 @@ define @vfnmsub_vv_nxv1f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1666,11 +1552,9 @@ define @vfnmsub_vv_nxv1f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1708,10 +1592,8 @@ define @vfnmsub_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1723,10 +1605,8 @@ define @vfnmsub_vf_nxv1f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1764,10 +1644,8 @@ define @vfnmsub_vf_nxv1f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1779,10 +1657,8 @@ define @vfnmsub_vf_nxv1f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -1806,10 +1682,8 @@ define @vfmsub_vv_nxv2f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1847,10 +1721,8 @@ define @vfmsub_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1862,10 +1734,8 @@ define @vfmsub_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1900,11 +1770,9 @@ define @vfnmadd_vv_nxv2f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1914,11 +1782,9 @@ define @vfnmadd_vv_nxv2f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -1958,11 +1824,9 @@ define @vfnmadd_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -1974,11 +1838,9 @@ define @vfnmadd_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2018,11 +1880,9 @@ define @vfnmadd_vf_nxv2f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2034,11 +1894,9 @@ define @vfnmadd_vf_nxv2f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2073,11 +1931,9 @@ define @vfnmsub_vv_nxv2f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2087,11 +1943,9 @@ define @vfnmsub_vv_nxv2f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2129,10 +1983,8 @@ define @vfnmsub_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2144,10 +1996,8 @@ define @vfnmsub_vf_nxv2f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2185,10 +2035,8 @@ define @vfnmsub_vf_nxv2f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2200,10 +2048,8 @@ define @vfnmsub_vf_nxv2f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2227,10 +2073,8 @@ define @vfmsub_vv_nxv4f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2268,10 +2112,8 @@ define @vfmsub_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2283,10 +2125,8 @@ define @vfmsub_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2321,11 +2161,9 @@ define @vfnmadd_vv_nxv4f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2335,11 +2173,9 @@ define @vfnmadd_vv_nxv4f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2379,11 +2215,9 @@ define @vfnmadd_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2395,11 +2229,9 @@ define @vfnmadd_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2439,11 +2271,9 @@ define @vfnmadd_vf_nxv4f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2455,11 +2285,9 @@ define @vfnmadd_vf_nxv4f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2494,11 +2322,9 @@ define @vfnmsub_vv_nxv4f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2508,11 +2334,9 @@ define @vfnmsub_vv_nxv4f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2550,10 +2374,8 @@ define @vfnmsub_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2565,10 +2387,8 @@ define @vfnmsub_vf_nxv4f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2606,10 +2426,8 @@ define @vfnmsub_vf_nxv4f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2621,10 +2439,8 @@ define @vfnmsub_vf_nxv4f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2648,10 +2464,8 @@ define @vfmsub_vv_nxv8f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2689,10 +2503,8 @@ define @vfmsub_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2704,10 +2516,8 @@ define @vfmsub_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2742,11 +2552,9 @@ define @vfnmadd_vv_nxv8f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2756,11 +2564,9 @@ define @vfnmadd_vv_nxv8f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2800,11 +2606,9 @@ define @vfnmadd_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2816,11 +2620,9 @@ define @vfnmadd_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2860,11 +2662,9 @@ define @vfnmadd_vf_nxv8f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2876,11 +2676,9 @@ define @vfnmadd_vf_nxv8f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -2915,11 +2713,9 @@ define @vfnmsub_vv_nxv8f16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2929,11 +2725,9 @@ define @vfnmsub_vv_nxv8f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -2971,10 +2765,8 @@ define @vfnmsub_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -2986,10 +2778,8 @@ define @vfnmsub_vf_nxv8f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3027,10 +2817,8 @@ define @vfnmsub_vf_nxv8f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3042,10 +2830,8 @@ define @vfnmsub_vf_nxv8f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3069,10 +2855,8 @@ define @vfmsub_vv_nxv16f16_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3110,10 +2894,8 @@ define @vfmsub_vf_nxv16f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3125,10 +2907,8 @@ define @vfmsub_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3163,11 +2943,9 @@ define @vfnmadd_vv_nxv16f16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3177,11 +2955,9 @@ define @vfnmadd_vv_nxv16f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3221,11 +2997,9 @@ define @vfnmadd_vf_nxv16f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3237,11 +3011,9 @@ define @vfnmadd_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3281,11 +3053,9 @@ define @vfnmadd_vf_nxv16f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3297,11 +3067,9 @@ define @vfnmadd_vf_nxv16f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3336,11 +3104,9 @@ define @vfnmsub_vv_nxv16f16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3350,11 +3116,9 @@ define @vfnmsub_vv_nxv16f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3392,10 +3156,8 @@ define @vfnmsub_vf_nxv16f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3407,10 +3169,8 @@ define @vfnmsub_vf_nxv16f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3448,10 +3208,8 @@ define @vfnmsub_vf_nxv16f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3463,10 +3221,8 @@ define @vfnmsub_vf_nxv16f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3492,10 +3248,8 @@ define @vfmsub_vv_nxv32f16_unmasked( %v ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3533,10 +3287,8 @@ define @vfmsub_vf_nxv32f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3548,10 +3300,8 @@ define @vfmsub_vf_nxv32f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3589,11 +3339,9 @@ define @vfnmadd_vv_nxv32f16_unmasked( % ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3604,11 +3352,9 @@ define @vfnmadd_vv_nxv32f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3648,11 +3394,9 @@ define @vfnmadd_vf_nxv32f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3664,11 +3408,9 @@ define @vfnmadd_vf_nxv32f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3708,11 +3450,9 @@ define @vfnmadd_vf_nxv32f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv32f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv32f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3724,11 +3464,9 @@ define @vfnmadd_vf_nxv32f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv32f16( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv32f16( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3766,11 +3504,9 @@ define @vfnmsub_vv_nxv32f16_unmasked( % ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3781,11 +3517,9 @@ define @vfnmsub_vv_nxv32f16_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv32f16( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3823,10 +3557,8 @@ define @vfnmsub_vf_nxv32f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3838,10 +3570,8 @@ define @vfnmsub_vf_nxv32f16_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3879,10 +3609,8 @@ define @vfnmsub_vf_nxv32f16_neg_splat_unmasked( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv32f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv32f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3894,10 +3622,8 @@ define @vfnmsub_vf_nxv32f16_neg_splat_unmasked_commute( poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv32f16( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv32f16( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -3921,10 +3647,8 @@ define @vfmsub_vv_nxv1f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -3962,10 +3686,8 @@ define @vfmsub_vf_nxv1f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -3977,10 +3699,8 @@ define @vfmsub_vf_nxv1f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4015,11 +3735,9 @@ define @vfnmadd_vv_nxv1f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4029,11 +3747,9 @@ define @vfnmadd_vv_nxv1f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4073,11 +3789,9 @@ define @vfnmadd_vf_nxv1f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4089,11 +3803,9 @@ define @vfnmadd_vf_nxv1f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4133,11 +3845,9 @@ define @vfnmadd_vf_nxv1f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4149,11 +3859,9 @@ define @vfnmadd_vf_nxv1f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4188,11 +3896,9 @@ define @vfnmsub_vv_nxv1f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4202,11 +3908,9 @@ define @vfnmsub_vv_nxv1f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4244,10 +3948,8 @@ define @vfnmsub_vf_nxv1f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4259,10 +3961,8 @@ define @vfnmsub_vf_nxv1f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4300,10 +4000,8 @@ define @vfnmsub_vf_nxv1f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4315,10 +4013,8 @@ define @vfnmsub_vf_nxv1f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4342,10 +4038,8 @@ define @vfmsub_vv_nxv2f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4383,10 +4077,8 @@ define @vfmsub_vf_nxv2f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4398,10 +4090,8 @@ define @vfmsub_vf_nxv2f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4436,11 +4126,9 @@ define @vfnmadd_vv_nxv2f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4450,11 +4138,9 @@ define @vfnmadd_vv_nxv2f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4494,11 +4180,9 @@ define @vfnmadd_vf_nxv2f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4510,11 +4194,9 @@ define @vfnmadd_vf_nxv2f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4554,11 +4236,9 @@ define @vfnmadd_vf_nxv2f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4570,11 +4250,9 @@ define @vfnmadd_vf_nxv2f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4609,11 +4287,9 @@ define @vfnmsub_vv_nxv2f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4623,11 +4299,9 @@ define @vfnmsub_vv_nxv2f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4665,10 +4339,8 @@ define @vfnmsub_vf_nxv2f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4680,10 +4352,8 @@ define @vfnmsub_vf_nxv2f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4721,10 +4391,8 @@ define @vfnmsub_vf_nxv2f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4736,10 +4404,8 @@ define @vfnmsub_vf_nxv2f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -4763,10 +4429,8 @@ define @vfmsub_vv_nxv4f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4804,10 +4468,8 @@ define @vfmsub_vf_nxv4f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4819,10 +4481,8 @@ define @vfmsub_vf_nxv4f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4857,11 +4517,9 @@ define @vfnmadd_vv_nxv4f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4871,11 +4529,9 @@ define @vfnmadd_vv_nxv4f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -4915,11 +4571,9 @@ define @vfnmadd_vf_nxv4f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4931,11 +4585,9 @@ define @vfnmadd_vf_nxv4f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4975,11 +4627,9 @@ define @vfnmadd_vf_nxv4f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -4991,11 +4641,9 @@ define @vfnmadd_vf_nxv4f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5030,11 +4678,9 @@ define @vfnmsub_vv_nxv4f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5044,11 +4690,9 @@ define @vfnmsub_vv_nxv4f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5086,10 +4730,8 @@ define @vfnmsub_vf_nxv4f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5101,10 +4743,8 @@ define @vfnmsub_vf_nxv4f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5142,10 +4782,8 @@ define @vfnmsub_vf_nxv4f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5157,10 +4795,8 @@ define @vfnmsub_vf_nxv4f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5184,10 +4820,8 @@ define @vfmsub_vv_nxv8f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5225,10 +4859,8 @@ define @vfmsub_vf_nxv8f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5240,10 +4872,8 @@ define @vfmsub_vf_nxv8f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5278,11 +4908,9 @@ define @vfnmadd_vv_nxv8f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5292,11 +4920,9 @@ define @vfnmadd_vv_nxv8f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5336,11 +4962,9 @@ define @vfnmadd_vf_nxv8f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5352,11 +4976,9 @@ define @vfnmadd_vf_nxv8f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5396,11 +5018,9 @@ define @vfnmadd_vf_nxv8f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5412,11 +5032,9 @@ define @vfnmadd_vf_nxv8f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5451,11 +5069,9 @@ define @vfnmsub_vv_nxv8f32_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5465,11 +5081,9 @@ define @vfnmsub_vv_nxv8f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5507,10 +5121,8 @@ define @vfnmsub_vf_nxv8f32_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5522,10 +5134,8 @@ define @vfnmsub_vf_nxv8f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5563,10 +5173,8 @@ define @vfnmsub_vf_nxv8f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5578,10 +5186,8 @@ define @vfnmsub_vf_nxv8f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5607,10 +5213,8 @@ define @vfmsub_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5648,10 +5252,8 @@ define @vfmsub_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5663,10 +5265,8 @@ define @vfmsub_vf_nxv16f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5704,11 +5304,9 @@ define @vfnmadd_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5719,11 +5317,9 @@ define @vfnmadd_vv_nxv16f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5763,11 +5359,9 @@ define @vfnmadd_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5779,11 +5373,9 @@ define @vfnmadd_vf_nxv16f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5823,11 +5415,9 @@ define @vfnmadd_vf_nxv16f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5839,11 +5429,9 @@ define @vfnmadd_vf_nxv16f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f32( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv16f32( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f32( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv16f32( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -5881,11 +5469,9 @@ define @vfnmsub_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5896,11 +5482,9 @@ define @vfnmsub_vv_nxv16f32_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv16f32( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -5938,10 +5522,8 @@ define @vfnmsub_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5953,10 +5535,8 @@ define @vfnmsub_vf_nxv16f32_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -5994,10 +5574,8 @@ define @vfnmsub_vf_nxv16f32_neg_splat_unmasked( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6009,10 +5587,8 @@ define @vfnmsub_vf_nxv16f32_neg_splat_unmasked_commute( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv16f32( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv16f32( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv16f32( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv16f32( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6036,10 +5612,8 @@ define @vfmsub_vv_nxv1f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6077,10 +5651,8 @@ define @vfmsub_vf_nxv1f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6092,10 +5664,8 @@ define @vfmsub_vf_nxv1f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6130,11 +5700,9 @@ define @vfnmadd_vv_nxv1f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6144,11 +5712,9 @@ define @vfnmadd_vv_nxv1f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6188,11 +5754,9 @@ define @vfnmadd_vf_nxv1f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6204,11 +5768,9 @@ define @vfnmadd_vf_nxv1f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6248,11 +5810,9 @@ define @vfnmadd_vf_nxv1f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6264,11 +5824,9 @@ define @vfnmadd_vf_nxv1f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv1f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv1f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6303,11 +5861,9 @@ define @vfnmsub_vv_nxv1f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v9, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6317,11 +5873,9 @@ define @vfnmsub_vv_nxv1f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv1f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv1f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6359,10 +5913,8 @@ define @vfnmsub_vf_nxv1f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6374,10 +5926,8 @@ define @vfnmsub_vf_nxv1f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6415,10 +5965,8 @@ define @vfnmsub_vf_nxv1f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6430,10 +5978,8 @@ define @vfnmsub_vf_nxv1f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv1f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv1f64( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv1f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv1f64( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6457,10 +6003,8 @@ define @vfmsub_vv_nxv2f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6498,10 +6042,8 @@ define @vfmsub_vf_nxv2f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6513,10 +6055,8 @@ define @vfmsub_vf_nxv2f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6551,11 +6091,9 @@ define @vfnmadd_vv_nxv2f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6565,11 +6103,9 @@ define @vfnmadd_vv_nxv2f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6609,11 +6145,9 @@ define @vfnmadd_vf_nxv2f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6625,11 +6159,9 @@ define @vfnmadd_vf_nxv2f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6669,11 +6201,9 @@ define @vfnmadd_vf_nxv2f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6685,11 +6215,9 @@ define @vfnmadd_vf_nxv2f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv2f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv2f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6724,11 +6252,9 @@ define @vfnmsub_vv_nxv2f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v10, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6738,11 +6264,9 @@ define @vfnmsub_vv_nxv2f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv2f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv2f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6780,10 +6304,8 @@ define @vfnmsub_vf_nxv2f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6795,10 +6317,8 @@ define @vfnmsub_vf_nxv2f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6836,10 +6356,8 @@ define @vfnmsub_vf_nxv2f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6851,10 +6369,8 @@ define @vfnmsub_vf_nxv2f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv2f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv2f64( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv2f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv2f64( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -6878,10 +6394,8 @@ define @vfmsub_vv_nxv4f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6919,10 +6433,8 @@ define @vfmsub_vf_nxv4f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6934,10 +6446,8 @@ define @vfmsub_vf_nxv4f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -6972,11 +6482,9 @@ define @vfnmadd_vv_nxv4f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -6986,11 +6494,9 @@ define @vfnmadd_vv_nxv4f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7030,11 +6536,9 @@ define @vfnmadd_vf_nxv4f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7046,11 +6550,9 @@ define @vfnmadd_vf_nxv4f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7090,11 +6592,9 @@ define @vfnmadd_vf_nxv4f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7106,11 +6606,9 @@ define @vfnmadd_vf_nxv4f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv4f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv4f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7145,11 +6643,9 @@ define @vfnmsub_vv_nxv4f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v12, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7159,11 +6655,9 @@ define @vfnmsub_vv_nxv4f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv4f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv4f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7201,10 +6695,8 @@ define @vfnmsub_vf_nxv4f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7216,10 +6708,8 @@ define @vfnmsub_vf_nxv4f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7257,10 +6747,8 @@ define @vfnmsub_vf_nxv4f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7272,10 +6760,8 @@ define @vfnmsub_vf_nxv4f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv4f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv4f64( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv4f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv4f64( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7301,10 +6787,8 @@ define @vfmsub_vv_nxv8f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %va, %b, %negc, %m, i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %va, %b, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7342,10 +6826,8 @@ define @vfmsub_vf_nxv8f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %va, %vb, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %va, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7357,10 +6839,8 @@ define @vfmsub_vf_nxv8f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %vb, %va, %negvc, %m, i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %vb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7398,11 +6878,9 @@ define @vfnmadd_vv_nxv8f64_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7413,11 +6891,9 @@ define @vfnmadd_vv_nxv8f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7457,11 +6933,9 @@ define @vfnmadd_vf_nxv8f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %negva, %vb, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %negva, %vb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7473,11 +6947,9 @@ define @vfnmadd_vf_nxv8f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %vb, %negva, %negvc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %vb, %negva, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7517,11 +6989,9 @@ define @vfnmadd_vf_nxv8f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %va, %negvb, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %va, %negvb, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7533,11 +7003,9 @@ define @vfnmadd_vf_nxv8f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f64( %vb, %m, i32 %evl) - %negvc = call @llvm.vp.fneg.nxv8f64( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %negvb, %va, %negvc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f64( %vb, splat (i1 true), i32 %evl) + %negvc = call @llvm.vp.fneg.nxv8f64( %vc, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %negvb, %va, %negvc, splat (i1 true), i32 %evl) ret %v } @@ -7575,11 +7043,9 @@ define @vfnmsub_vv_nxv8f64_unmasked( ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %va, %negb, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %va, %negb, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7590,11 +7056,9 @@ define @vfnmsub_vv_nxv8f64_unmasked_commuted( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %negb, %va, %negc, %m, i32 %evl) + %negb = call @llvm.vp.fneg.nxv8f64( %b, splat (i1 true), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %negb, %va, %negc, splat (i1 true), i32 %evl) ret %v } @@ -7632,10 +7096,8 @@ define @vfnmsub_vf_nxv8f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %negva, %vb, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %negva, %vb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7647,10 +7109,8 @@ define @vfnmsub_vf_nxv8f64_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negva = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %vb, %negva, %vc, %m, i32 %evl) + %negva = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %vb, %negva, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7688,10 +7148,8 @@ define @vfnmsub_vf_nxv8f64_neg_splat_unmasked( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %va, %negvb, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %va, %negvb, %vc, splat (i1 true), i32 %evl) ret %v } @@ -7703,9 +7161,7 @@ define @vfnmsub_vf_nxv8f64_neg_splat_unmasked_commute( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negvb = call @llvm.vp.fneg.nxv8f64( %vb, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv8f64( %negvb, %va, %vc, %m, i32 %evl) + %negvb = call @llvm.vp.fneg.nxv8f64( %vb, splat (i1 true), i32 %evl) + %v = call @llvm.vp.fmuladd.nxv8f64( %negvb, %va, %vc, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll index 237ef11d154ba..c00433eba5481 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmv.v.f.nxv1f16( @@ -528,3 +528,123 @@ entry: ret %a } + +declare @llvm.riscv.vfmv.v.f.nxv1bf16( + , + bfloat, + iXLen); + +define @intrinsic_vfmv.v.f_f_nxv1bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfmv.v.f.nxv1bf16( + undef, + bfloat %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfmv.v.f.nxv2bf16( + , + bfloat, + iXLen); + +define @intrinsic_vfmv.v.f_f_nxv2bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfmv.v.f.nxv2bf16( + undef, + bfloat %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfmv.v.f.nxv4bf16( + , + bfloat, + iXLen); + +define @intrinsic_vfmv.v.f_f_nxv4bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfmv.v.f.nxv4bf16( + undef, + bfloat %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfmv.v.f.nxv8bf16( + , + bfloat, + iXLen); + +define @intrinsic_vfmv.v.f_f_nxv8bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfmv.v.f.nxv8bf16( + undef, + bfloat %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfmv.v.f.nxv16bf16( + , + bfloat, + iXLen); + +define @intrinsic_vfmv.v.f_f_nxv16bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfmv.v.f.nxv16bf16( + undef, + bfloat %0, + iXLen %1) + + ret %a +} + +declare @llvm.riscv.vfmv.v.f.nxv32bf16( + , + bfloat, + iXLen); + +define @intrinsic_vfmv.v.f_f_nxv32bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfmv.v.f.nxv32bf16( + undef, + bfloat %0, + iXLen %1) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll index ef08865100f14..1db5fa1720a27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -46,9 +46,7 @@ define @vfneg_vv_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -90,9 +88,7 @@ define @vfneg_vv_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -134,9 +130,7 @@ define @vfneg_vv_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -178,9 +172,7 @@ define @vfneg_vv_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -222,9 +214,7 @@ define @vfneg_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -279,8 +269,6 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ; ZVFHMIN-LABEL: vfneg_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -288,6 +276,8 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -306,9 +296,7 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -330,9 +318,7 @@ define @vfneg_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -354,9 +340,7 @@ define @vfneg_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -378,9 +362,7 @@ define @vfneg_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -402,9 +384,7 @@ define @vfneg_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -426,9 +406,7 @@ define @vfneg_vv_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -450,9 +428,7 @@ define @vfneg_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -474,9 +450,7 @@ define @vfneg_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -498,9 +472,7 @@ define @vfneg_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -522,9 +494,7 @@ define @vfneg_vv_nxv7f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -546,9 +516,7 @@ define @vfneg_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -598,8 +566,6 @@ define @vfneg_vv_nxv16f64_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfneg.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fneg.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.fneg.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll index e642e89b3dff0..ee3ed603ff6d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll @@ -16,11 +16,9 @@ define @vfnmacc_vv_nxv1f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -32,12 +30,10 @@ define @vfnmacc_vv_nxv1f16_unmasked( %a, ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -50,11 +46,9 @@ define @vfnmacc_vf_nxv1f16( %a, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -68,11 +62,9 @@ define @vfnmacc_vf_nxv1f16_commute( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -86,12 +78,10 @@ define @vfnmacc_vf_nxv1f16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -102,11 +92,9 @@ define @vfnmacc_vv_nxv1f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -120,11 +108,9 @@ define @vfnmacc_vf_nxv1f16_ta( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -138,11 +124,9 @@ define @vfnmacc_vf_nxv1f16_commute_ta( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -159,11 +143,9 @@ define @vfnmacc_vv_nxv2f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -175,12 +157,10 @@ define @vfnmacc_vv_nxv2f16_unmasked( %a, ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -193,11 +173,9 @@ define @vfnmacc_vf_nxv2f16( %a, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -211,11 +189,9 @@ define @vfnmacc_vf_nxv2f16_commute( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -229,12 +205,10 @@ define @vfnmacc_vf_nxv2f16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -245,11 +219,9 @@ define @vfnmacc_vv_nxv2f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -263,11 +235,9 @@ define @vfnmacc_vf_nxv2f16_ta( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -281,11 +251,9 @@ define @vfnmacc_vf_nxv2f16_commute_ta( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -302,11 +270,9 @@ define @vfnmacc_vv_nxv4f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -318,12 +284,10 @@ define @vfnmacc_vv_nxv4f16_unmasked( %a, ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -336,11 +300,9 @@ define @vfnmacc_vf_nxv4f16( %a, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -354,11 +316,9 @@ define @vfnmacc_vf_nxv4f16_commute( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -372,12 +332,10 @@ define @vfnmacc_vf_nxv4f16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -388,11 +346,9 @@ define @vfnmacc_vv_nxv4f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -406,11 +362,9 @@ define @vfnmacc_vf_nxv4f16_ta( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -424,11 +378,9 @@ define @vfnmacc_vf_nxv4f16_commute_ta( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -445,11 +397,9 @@ define @vfnmacc_vv_nxv8f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -461,12 +411,10 @@ define @vfnmacc_vv_nxv8f16_unmasked( %a, ; CHECK-NEXT: vfnmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -479,11 +427,9 @@ define @vfnmacc_vf_nxv8f16( %a, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -497,11 +443,9 @@ define @vfnmacc_vf_nxv8f16_commute( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -515,12 +459,10 @@ define @vfnmacc_vf_nxv8f16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -531,11 +473,9 @@ define @vfnmacc_vv_nxv8f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -549,11 +489,9 @@ define @vfnmacc_vf_nxv8f16_ta( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -567,11 +505,9 @@ define @vfnmacc_vf_nxv8f16_commute_ta( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -588,11 +524,9 @@ define @vfnmacc_vv_nxv16f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -604,12 +538,10 @@ define @vfnmacc_vv_nxv16f16_unmasked( % ; CHECK-NEXT: vfnmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -622,11 +554,9 @@ define @vfnmacc_vf_nxv16f16( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -640,11 +570,9 @@ define @vfnmacc_vf_nxv16f16_commute( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -658,12 +586,10 @@ define @vfnmacc_vf_nxv16f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -674,11 +600,9 @@ define @vfnmacc_vv_nxv16f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -692,11 +616,9 @@ define @vfnmacc_vf_nxv16f16_ta( %a, hal ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -710,11 +632,9 @@ define @vfnmacc_vf_nxv16f16_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -732,11 +652,9 @@ define @vfnmacc_vv_nxv32f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -749,12 +667,10 @@ define @vfnmacc_vv_nxv32f16_unmasked( % ; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -767,11 +683,9 @@ define @vfnmacc_vf_nxv32f16( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -785,11 +699,9 @@ define @vfnmacc_vf_nxv32f16_commute( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -803,12 +715,10 @@ define @vfnmacc_vf_nxv32f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -820,11 +730,9 @@ define @vfnmacc_vv_nxv32f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -838,11 +746,9 @@ define @vfnmacc_vf_nxv32f16_ta( %a, hal ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -856,11 +762,9 @@ define @vfnmacc_vf_nxv32f16_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv32f16( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv32f16( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -877,11 +781,9 @@ define @vfnmacc_vv_nxv1f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -893,12 +795,10 @@ define @vfnmacc_vv_nxv1f32_unmasked( %a ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -911,11 +811,9 @@ define @vfnmacc_vf_nxv1f32( %a, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -929,11 +827,9 @@ define @vfnmacc_vf_nxv1f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -947,12 +843,10 @@ define @vfnmacc_vf_nxv1f32_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -963,11 +857,9 @@ define @vfnmacc_vv_nxv1f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -981,11 +873,9 @@ define @vfnmacc_vf_nxv1f32_ta( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -999,11 +889,9 @@ define @vfnmacc_vf_nxv1f32_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -1020,11 +908,9 @@ define @vfnmacc_vv_nxv2f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1036,12 +922,10 @@ define @vfnmacc_vv_nxv2f32_unmasked( %a ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1054,11 +938,9 @@ define @vfnmacc_vf_nxv2f32( %a, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1072,11 +954,9 @@ define @vfnmacc_vf_nxv2f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1090,12 +970,10 @@ define @vfnmacc_vf_nxv2f32_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1106,11 +984,9 @@ define @vfnmacc_vv_nxv2f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1124,11 +1000,9 @@ define @vfnmacc_vf_nxv2f32_ta( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1142,11 +1016,9 @@ define @vfnmacc_vf_nxv2f32_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1163,11 +1035,9 @@ define @vfnmacc_vv_nxv4f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1179,12 +1049,10 @@ define @vfnmacc_vv_nxv4f32_unmasked( %a ; CHECK-NEXT: vfnmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1197,11 +1065,9 @@ define @vfnmacc_vf_nxv4f32( %a, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1215,11 +1081,9 @@ define @vfnmacc_vf_nxv4f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1233,12 +1097,10 @@ define @vfnmacc_vf_nxv4f32_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1249,11 +1111,9 @@ define @vfnmacc_vv_nxv4f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1267,11 +1127,9 @@ define @vfnmacc_vf_nxv4f32_ta( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1285,11 +1143,9 @@ define @vfnmacc_vf_nxv4f32_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1306,11 +1162,9 @@ define @vfnmacc_vv_nxv8f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1322,12 +1176,10 @@ define @vfnmacc_vv_nxv8f32_unmasked( %a ; CHECK-NEXT: vfnmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1340,11 +1192,9 @@ define @vfnmacc_vf_nxv8f32( %a, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1358,11 +1208,9 @@ define @vfnmacc_vf_nxv8f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1376,12 +1224,10 @@ define @vfnmacc_vf_nxv8f32_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1392,11 +1238,9 @@ define @vfnmacc_vv_nxv8f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1410,11 +1254,9 @@ define @vfnmacc_vf_nxv8f32_ta( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1428,11 +1270,9 @@ define @vfnmacc_vf_nxv8f32_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1450,11 +1290,9 @@ define @vfnmacc_vv_nxv16f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1467,12 +1305,10 @@ define @vfnmacc_vv_nxv16f32_unmasked( ; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1485,11 +1321,9 @@ define @vfnmacc_vf_nxv16f32( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1503,11 +1337,9 @@ define @vfnmacc_vf_nxv16f32_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1521,12 +1353,10 @@ define @vfnmacc_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1538,11 +1368,9 @@ define @vfnmacc_vv_nxv16f32_ta( %a, < ; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1556,11 +1384,9 @@ define @vfnmacc_vf_nxv16f32_ta( %a, f ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1574,11 +1400,9 @@ define @vfnmacc_vf_nxv16f32_commute_ta( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1595,11 +1419,9 @@ define @vfnmacc_vv_nxv1f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1611,12 +1433,10 @@ define @vfnmacc_vv_nxv1f64_unmasked( ; CHECK-NEXT: vfnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1629,11 +1449,9 @@ define @vfnmacc_vf_nxv1f64( %a, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1647,11 +1465,9 @@ define @vfnmacc_vf_nxv1f64_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1665,12 +1481,10 @@ define @vfnmacc_vf_nxv1f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1681,11 +1495,9 @@ define @vfnmacc_vv_nxv1f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1699,11 +1511,9 @@ define @vfnmacc_vf_nxv1f64_ta( %a, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1717,11 +1527,9 @@ define @vfnmacc_vf_nxv1f64_commute_ta( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1738,11 +1546,9 @@ define @vfnmacc_vv_nxv2f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1754,12 +1560,10 @@ define @vfnmacc_vv_nxv2f64_unmasked( ; CHECK-NEXT: vfnmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1772,11 +1576,9 @@ define @vfnmacc_vf_nxv2f64( %a, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1790,11 +1592,9 @@ define @vfnmacc_vf_nxv2f64_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1808,12 +1608,10 @@ define @vfnmacc_vf_nxv2f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1824,11 +1622,9 @@ define @vfnmacc_vv_nxv2f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1842,11 +1638,9 @@ define @vfnmacc_vf_nxv2f64_ta( %a, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1860,11 +1654,9 @@ define @vfnmacc_vf_nxv2f64_commute_ta( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1881,11 +1673,9 @@ define @vfnmacc_vv_nxv4f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1897,12 +1687,10 @@ define @vfnmacc_vv_nxv4f64_unmasked( ; CHECK-NEXT: vfnmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1915,11 +1703,9 @@ define @vfnmacc_vf_nxv4f64( %a, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1933,11 +1719,9 @@ define @vfnmacc_vf_nxv4f64_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1951,12 +1735,10 @@ define @vfnmacc_vf_nxv4f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1967,11 +1749,9 @@ define @vfnmacc_vv_nxv4f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1985,11 +1765,9 @@ define @vfnmacc_vf_nxv4f64_ta( %a, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -2003,11 +1781,9 @@ define @vfnmacc_vf_nxv4f64_commute_ta( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -2025,11 +1801,9 @@ define @vfnmacc_vv_nxv8f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2042,12 +1816,10 @@ define @vfnmacc_vv_nxv8f64_unmasked( ; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -2060,11 +1832,9 @@ define @vfnmacc_vf_nxv8f64( %a, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2078,11 +1848,9 @@ define @vfnmacc_vf_nxv8f64_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2096,12 +1864,10 @@ define @vfnmacc_vf_nxv8f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -2113,11 +1879,9 @@ define @vfnmacc_vv_nxv8f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2131,11 +1895,9 @@ define @vfnmacc_vf_nxv8f64_ta( %a, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2149,11 +1911,9 @@ define @vfnmacc_vf_nxv8f64_commute_ta( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %negc, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll index d906b6450a93d..14dba24daf5ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll @@ -16,10 +16,8 @@ define @vfnmsac_vv_nxv1f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -31,11 +29,9 @@ define @vfnmsac_vv_nxv1f16_unmasked( %a, ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -48,10 +44,8 @@ define @vfnmsac_vf_nxv1f16( %a, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -65,10 +59,8 @@ define @vfnmsac_vf_nxv1f16_commute( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -82,11 +74,9 @@ define @vfnmsac_vf_nxv1f16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -97,10 +87,8 @@ define @vfnmsac_vv_nxv1f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -114,10 +102,8 @@ define @vfnmsac_vf_nxv1f16_ta( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -131,10 +117,8 @@ define @vfnmsac_vf_nxv1f16_commute_ta( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f16( %m, %v, %c, i32 %evl) ret %u } @@ -151,10 +135,8 @@ define @vfnmsac_vv_nxv2f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -166,11 +148,9 @@ define @vfnmsac_vv_nxv2f16_unmasked( %a, ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -183,10 +163,8 @@ define @vfnmsac_vf_nxv2f16( %a, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -200,10 +178,8 @@ define @vfnmsac_vf_nxv2f16_commute( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -217,11 +193,9 @@ define @vfnmsac_vf_nxv2f16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -232,10 +206,8 @@ define @vfnmsac_vv_nxv2f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -249,10 +221,8 @@ define @vfnmsac_vf_nxv2f16_ta( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -266,10 +236,8 @@ define @vfnmsac_vf_nxv2f16_commute_ta( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f16( %m, %v, %c, i32 %evl) ret %u } @@ -286,10 +254,8 @@ define @vfnmsac_vv_nxv4f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -301,11 +267,9 @@ define @vfnmsac_vv_nxv4f16_unmasked( %a, ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -318,10 +282,8 @@ define @vfnmsac_vf_nxv4f16( %a, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -335,10 +297,8 @@ define @vfnmsac_vf_nxv4f16_commute( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -352,11 +312,9 @@ define @vfnmsac_vf_nxv4f16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -367,10 +325,8 @@ define @vfnmsac_vv_nxv4f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -384,10 +340,8 @@ define @vfnmsac_vf_nxv4f16_ta( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -401,10 +355,8 @@ define @vfnmsac_vf_nxv4f16_commute_ta( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f16( %m, %v, %c, i32 %evl) ret %u } @@ -421,10 +373,8 @@ define @vfnmsac_vv_nxv8f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -436,11 +386,9 @@ define @vfnmsac_vv_nxv8f16_unmasked( %a, ; CHECK-NEXT: vfnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -453,10 +401,8 @@ define @vfnmsac_vf_nxv8f16( %a, half %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -470,10 +416,8 @@ define @vfnmsac_vf_nxv8f16_commute( %a, h ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -487,11 +431,9 @@ define @vfnmsac_vf_nxv8f16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -502,10 +444,8 @@ define @vfnmsac_vv_nxv8f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -519,10 +459,8 @@ define @vfnmsac_vf_nxv8f16_ta( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -536,10 +474,8 @@ define @vfnmsac_vf_nxv8f16_commute_ta( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f16( %m, %v, %c, i32 %evl) ret %u } @@ -556,10 +492,8 @@ define @vfnmsac_vv_nxv16f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -571,11 +505,9 @@ define @vfnmsac_vv_nxv16f16_unmasked( % ; CHECK-NEXT: vfnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -588,10 +520,8 @@ define @vfnmsac_vf_nxv16f16( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -605,10 +535,8 @@ define @vfnmsac_vf_nxv16f16_commute( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -622,11 +550,9 @@ define @vfnmsac_vf_nxv16f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -637,10 +563,8 @@ define @vfnmsac_vv_nxv16f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -654,10 +578,8 @@ define @vfnmsac_vf_nxv16f16_ta( %a, hal ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -671,10 +593,8 @@ define @vfnmsac_vf_nxv16f16_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f16( %m, %v, %c, i32 %evl) ret %u } @@ -692,10 +612,8 @@ define @vfnmsac_vv_nxv32f16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -708,11 +626,9 @@ define @vfnmsac_vv_nxv32f16_unmasked( % ; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -725,10 +641,8 @@ define @vfnmsac_vf_nxv32f16( %a, half % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -742,10 +656,8 @@ define @vfnmsac_vf_nxv32f16_commute( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -759,11 +671,9 @@ define @vfnmsac_vf_nxv32f16_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32f16( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32f16( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -775,10 +685,8 @@ define @vfnmsac_vv_nxv32f16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -792,10 +700,8 @@ define @vfnmsac_vf_nxv32f16_ta( %a, hal ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -809,10 +715,8 @@ define @vfnmsac_vf_nxv32f16_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv32f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv32f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32f16( %m, %v, %c, i32 %evl) ret %u } @@ -829,10 +733,8 @@ define @vfnmsac_vv_nxv1f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -844,11 +746,9 @@ define @vfnmsac_vv_nxv1f32_unmasked( %a ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -861,10 +761,8 @@ define @vfnmsac_vf_nxv1f32( %a, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -878,10 +776,8 @@ define @vfnmsac_vf_nxv1f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -895,11 +791,9 @@ define @vfnmsac_vf_nxv1f32_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -910,10 +804,8 @@ define @vfnmsac_vv_nxv1f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -927,10 +819,8 @@ define @vfnmsac_vf_nxv1f32_ta( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -944,10 +834,8 @@ define @vfnmsac_vf_nxv1f32_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -964,10 +852,8 @@ define @vfnmsac_vv_nxv2f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -979,11 +865,9 @@ define @vfnmsac_vv_nxv2f32_unmasked( %a ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -996,10 +880,8 @@ define @vfnmsac_vf_nxv2f32( %a, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1013,10 +895,8 @@ define @vfnmsac_vf_nxv2f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1030,11 +910,9 @@ define @vfnmsac_vf_nxv2f32_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1045,10 +923,8 @@ define @vfnmsac_vv_nxv2f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1062,10 +938,8 @@ define @vfnmsac_vf_nxv2f32_ta( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1079,10 +953,8 @@ define @vfnmsac_vf_nxv2f32_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f32( %m, %v, %c, i32 %evl) ret %u } @@ -1099,10 +971,8 @@ define @vfnmsac_vv_nxv4f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1114,11 +984,9 @@ define @vfnmsac_vv_nxv4f32_unmasked( %a ; CHECK-NEXT: vfnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1131,10 +999,8 @@ define @vfnmsac_vf_nxv4f32( %a, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1148,10 +1014,8 @@ define @vfnmsac_vf_nxv4f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1165,11 +1029,9 @@ define @vfnmsac_vf_nxv4f32_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1180,10 +1042,8 @@ define @vfnmsac_vv_nxv4f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1197,10 +1057,8 @@ define @vfnmsac_vf_nxv4f32_ta( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1214,10 +1072,8 @@ define @vfnmsac_vf_nxv4f32_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f32( %m, %v, %c, i32 %evl) ret %u } @@ -1234,10 +1090,8 @@ define @vfnmsac_vv_nxv8f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1249,11 +1103,9 @@ define @vfnmsac_vv_nxv8f32_unmasked( %a ; CHECK-NEXT: vfnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1266,10 +1118,8 @@ define @vfnmsac_vf_nxv8f32( %a, float % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1283,10 +1133,8 @@ define @vfnmsac_vf_nxv8f32_commute( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1300,11 +1148,9 @@ define @vfnmsac_vf_nxv8f32_unmasked( %a ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1315,10 +1161,8 @@ define @vfnmsac_vv_nxv8f32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1332,10 +1176,8 @@ define @vfnmsac_vf_nxv8f32_ta( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1349,10 +1191,8 @@ define @vfnmsac_vf_nxv8f32_commute_ta( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f32( %m, %v, %c, i32 %evl) ret %u } @@ -1370,10 +1210,8 @@ define @vfnmsac_vv_nxv16f32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1386,11 +1224,9 @@ define @vfnmsac_vv_nxv16f32_unmasked( ; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1403,10 +1239,8 @@ define @vfnmsac_vf_nxv16f32( %a, floa ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1420,10 +1254,8 @@ define @vfnmsac_vf_nxv16f32_commute( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1437,11 +1269,9 @@ define @vfnmsac_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16f32( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1453,10 +1283,8 @@ define @vfnmsac_vv_nxv16f32_ta( %a, < ; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1470,10 +1298,8 @@ define @vfnmsac_vf_nxv16f32_ta( %a, f ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1487,10 +1313,8 @@ define @vfnmsac_vf_nxv16f32_commute_ta( poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv16f32( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16f32( %m, %v, %c, i32 %evl) ret %u } @@ -1507,10 +1331,8 @@ define @vfnmsac_vv_nxv1f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1522,11 +1344,9 @@ define @vfnmsac_vv_nxv1f64_unmasked( ; CHECK-NEXT: vfnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1539,10 +1359,8 @@ define @vfnmsac_vf_nxv1f64( %a, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1556,10 +1374,8 @@ define @vfnmsac_vf_nxv1f64_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1573,11 +1389,9 @@ define @vfnmsac_vf_nxv1f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1588,10 +1402,8 @@ define @vfnmsac_vv_nxv1f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1605,10 +1417,8 @@ define @vfnmsac_vf_nxv1f64_ta( %a, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1622,10 +1432,8 @@ define @vfnmsac_vf_nxv1f64_commute_ta( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv1f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1f64( %m, %v, %c, i32 %evl) ret %u } @@ -1642,10 +1450,8 @@ define @vfnmsac_vv_nxv2f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1657,11 +1463,9 @@ define @vfnmsac_vv_nxv2f64_unmasked( ; CHECK-NEXT: vfnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1674,10 +1478,8 @@ define @vfnmsac_vf_nxv2f64( %a, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1691,10 +1493,8 @@ define @vfnmsac_vf_nxv2f64_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1708,11 +1508,9 @@ define @vfnmsac_vf_nxv2f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1723,10 +1521,8 @@ define @vfnmsac_vv_nxv2f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1740,10 +1536,8 @@ define @vfnmsac_vf_nxv2f64_ta( %a, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1757,10 +1551,8 @@ define @vfnmsac_vf_nxv2f64_commute_ta( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv2f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2f64( %m, %v, %c, i32 %evl) ret %u } @@ -1777,10 +1569,8 @@ define @vfnmsac_vv_nxv4f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1792,11 +1582,9 @@ define @vfnmsac_vv_nxv4f64_unmasked( ; CHECK-NEXT: vfnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1809,10 +1597,8 @@ define @vfnmsac_vf_nxv4f64( %a, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1826,10 +1612,8 @@ define @vfnmsac_vf_nxv4f64_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1843,11 +1627,9 @@ define @vfnmsac_vf_nxv4f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1858,10 +1640,8 @@ define @vfnmsac_vv_nxv4f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1875,10 +1655,8 @@ define @vfnmsac_vf_nxv4f64_ta( %a, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1892,10 +1670,8 @@ define @vfnmsac_vf_nxv4f64_commute_ta( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv4f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4f64( %m, %v, %c, i32 %evl) ret %u } @@ -1913,10 +1689,8 @@ define @vfnmsac_vv_nxv8f64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1929,11 +1703,9 @@ define @vfnmsac_vv_nxv8f64_unmasked( ; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1946,10 +1718,8 @@ define @vfnmsac_vf_nxv8f64( %a, doubl ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1963,10 +1733,8 @@ define @vfnmsac_vf_nxv8f64_commute( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -1980,11 +1748,9 @@ define @vfnmsac_vf_nxv8f64_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8f64( %allones, %v, %c, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8f64( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -1996,10 +1762,8 @@ define @vfnmsac_vv_nxv8f64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %b, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2013,10 +1777,8 @@ define @vfnmsac_vf_nxv8f64_ta( %a, do ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vb, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } @@ -2030,10 +1792,8 @@ define @vfnmsac_vf_nxv8f64_commute_ta( poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %nega = call @llvm.vp.fneg.nxv8f64( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %c, %allones, i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vb, %nega, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8f64( %m, %v, %c, i32 %evl) ret %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll index d7bb1ad7726db..876f8d9456386 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll @@ -26,9 +26,7 @@ define @vfrdiv_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -54,9 +52,7 @@ define @vfrdiv_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -82,9 +78,7 @@ define @vfrdiv_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -110,9 +104,7 @@ define @vfrdiv_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -138,9 +130,7 @@ define @vfrdiv_vf_nxv16f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv16f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv16f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -166,9 +156,7 @@ define @vfrdiv_vf_nxv32f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv32f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv32f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -194,9 +182,7 @@ define @vfrdiv_vf_nxv1f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -222,9 +208,7 @@ define @vfrdiv_vf_nxv2f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -250,9 +234,7 @@ define @vfrdiv_vf_nxv4f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -278,9 +260,7 @@ define @vfrdiv_vf_nxv8f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -306,9 +286,7 @@ define @vfrdiv_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv16f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv16f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -334,9 +312,7 @@ define @vfrdiv_vf_nxv1f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv1f64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv1f64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -362,9 +338,7 @@ define @vfrdiv_vf_nxv2f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv2f64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv2f64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -390,9 +364,7 @@ define @vfrdiv_vf_nxv4f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv4f64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv4f64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -418,8 +390,6 @@ define @vfrdiv_vf_nxv8f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fdiv.nxv8f64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fdiv.nxv8f64( %vb, %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll index 9e1c719e3bfa6..bd941dc1a7772 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll @@ -26,9 +26,7 @@ define @vfrsub_vf_nxv1f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -54,9 +52,7 @@ define @vfrsub_vf_nxv2f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -82,9 +78,7 @@ define @vfrsub_vf_nxv4f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -110,9 +104,7 @@ define @vfrsub_vf_nxv8f16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -138,9 +130,7 @@ define @vfrsub_vf_nxv16f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv16f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv16f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -166,9 +156,7 @@ define @vfrsub_vf_nxv32f16_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv32f16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv32f16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -194,9 +182,7 @@ define @vfrsub_vf_nxv1f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -222,9 +208,7 @@ define @vfrsub_vf_nxv2f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -250,9 +234,7 @@ define @vfrsub_vf_nxv4f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -278,9 +260,7 @@ define @vfrsub_vf_nxv8f32_unmasked( %va ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -306,9 +286,7 @@ define @vfrsub_vf_nxv16f32_unmasked( ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv16f32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv16f32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -334,9 +312,7 @@ define @vfrsub_vf_nxv1f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -362,9 +338,7 @@ define @vfrsub_vf_nxv2f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -390,9 +364,7 @@ define @vfrsub_vf_nxv4f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -418,8 +390,6 @@ define @vfrsub_vf_nxv8f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f64( %vb, %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index b13d221e00e63..d6caad15e40a2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -46,9 +46,7 @@ define @vfsqrt_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv1f16( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -90,9 +88,7 @@ define @vfsqrt_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv2f16( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -134,9 +130,7 @@ define @vfsqrt_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv4f16( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -178,9 +172,7 @@ define @vfsqrt_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv8f16( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -222,9 +214,7 @@ define @vfsqrt_vv_nxv16f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv16f16( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -279,8 +269,6 @@ define @vfsqrt_vv_nxv32f16_unmasked( %v ; ; ZVFHMIN-LABEL: vfsqrt_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -288,6 +276,8 @@ define @vfsqrt_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -306,9 +296,7 @@ define @vfsqrt_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv32f16( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v } @@ -330,9 +318,7 @@ define @vfsqrt_vv_nxv1f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv1f32( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -354,9 +340,7 @@ define @vfsqrt_vv_nxv2f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv2f32( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -378,9 +362,7 @@ define @vfsqrt_vv_nxv4f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv4f32( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -402,9 +384,7 @@ define @vfsqrt_vv_nxv8f32_unmasked( %va ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv8f32( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -426,9 +406,7 @@ define @vfsqrt_vv_nxv16f32_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv16f32( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v } @@ -450,9 +428,7 @@ define @vfsqrt_vv_nxv1f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv1f64( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -474,9 +450,7 @@ define @vfsqrt_vv_nxv2f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv2f64( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -498,9 +472,7 @@ define @vfsqrt_vv_nxv4f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv4f64( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -522,9 +494,7 @@ define @vfsqrt_vv_nxv7f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv7f64( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -546,9 +516,7 @@ define @vfsqrt_vv_nxv8f64_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsqrt.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv8f64( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v } @@ -598,8 +566,6 @@ define @vfsqrt_vv_nxv16f64_unmasked( poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sqrt.nxv16f64( %va, %m, i32 %evl) + %v = call @llvm.vp.sqrt.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll index b8b95ad21de60..b7941c17dab5c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll @@ -562,9 +562,7 @@ define @vfsub_vv_mask_nxv8f32( %va, poison, float 0.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %splat + %vs = select %mask, %vb, splat (float 0.0) %vc = fsub fast %va, %vs ret %vc } @@ -575,11 +573,9 @@ define @vfsub_vf_mask_nxv8f32( %va, flo ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsub.vf v8, v8, fa0, v0.t ; CHECK-NEXT: ret - %head0 = insertelement poison, float 0.0, i32 0 - %splat0 = shufflevector %head0, poison, zeroinitializer %head1 = insertelement poison, float %b, i32 0 %splat1 = shufflevector %head1, poison, zeroinitializer - %vs = select %mask, %splat1, %splat0 + %vs = select %mask, %splat1, splat (float 0.0) %vc = fsub fast %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 010b133e51b1f..2eae18d7cc493 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -48,9 +48,7 @@ define @vfsub_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -104,9 +102,7 @@ define @vfsub_vf_nxv1f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -150,9 +146,7 @@ define @vfsub_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -206,9 +200,7 @@ define @vfsub_vf_nxv2f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -252,9 +244,7 @@ define @vfsub_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -308,9 +298,7 @@ define @vfsub_vf_nxv4f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -354,9 +342,7 @@ define @vfsub_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -410,9 +396,7 @@ define @vfsub_vf_nxv8f16_unmasked( %va, h ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -456,9 +440,7 @@ define @vfsub_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv16f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv16f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -512,9 +494,7 @@ define @vfsub_vf_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv16f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -591,8 +571,6 @@ define @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: sub sp, sp, a1 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -600,6 +578,8 @@ define @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -628,9 +608,7 @@ define @vfsub_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv32f16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv32f16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -696,8 +674,6 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 -; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -705,6 +681,8 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi a4, a4, -1 ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma +; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma @@ -726,9 +704,7 @@ define @vfsub_vf_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv32f16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -750,9 +726,7 @@ define @vfsub_vv_nxv1f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -776,9 +750,7 @@ define @vfsub_vf_nxv1f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -800,9 +772,7 @@ define @vfsub_vv_nxv2f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -826,9 +796,7 @@ define @vfsub_vf_nxv2f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -850,9 +818,7 @@ define @vfsub_vv_nxv4f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -876,9 +842,7 @@ define @vfsub_vf_nxv4f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -900,9 +864,7 @@ define @vfsub_vv_nxv8f32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +888,7 @@ define @vfsub_vf_nxv8f32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -950,9 +910,7 @@ define @vfsub_vv_nxv16f32_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv16f32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv16f32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -976,9 +934,7 @@ define @vfsub_vf_nxv16f32_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv16f32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv16f32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1000,9 +956,7 @@ define @vfsub_vv_nxv1f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1026,9 +980,7 @@ define @vfsub_vf_nxv1f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv1f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1050,9 +1002,7 @@ define @vfsub_vv_nxv2f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1076,9 +1026,7 @@ define @vfsub_vf_nxv2f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv2f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv2f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1100,9 +1048,7 @@ define @vfsub_vv_nxv4f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1126,9 +1072,7 @@ define @vfsub_vf_nxv4f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv4f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv4f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1162,9 +1106,7 @@ define @vfsub_vv_nxv8f64_unmasked( %v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1188,8 +1130,6 @@ define @vfsub_vf_nxv8f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.fsub.nxv8f64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.fsub.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll index 8a484c7f6b776..fab76ac564581 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll @@ -51,11 +51,9 @@ define @vfmacc_vv_nxv1f32_unmasked( %a, ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -76,11 +74,9 @@ define @vfmacc_vv_nxv1f32_tu( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -132,12 +128,10 @@ define @vfmacc_vv_nxv1f32_unmasked_tu( % ; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8 ; ZVFHMIN-NEXT: vmv1r.v v8, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -223,11 +217,9 @@ define @vfmacc_vf_nxv1f32_unmasked( %va, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -255,11 +247,9 @@ define @vfmacc_vf_nxv1f32_tu( %va, half ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %vc, i32 %evl) ret %u } @@ -288,11 +278,9 @@ define @vfmacc_vf_nxv1f32_commute_tu( %v ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vbext, %vaext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vbext, %vaext, %vc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %vc, i32 %evl) ret %u } @@ -321,12 +309,10 @@ define @vfmacc_vf_nxv1f32_unmasked_tu( % ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %vaext, %vbext, %vc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %vc, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %vc, i32 %evl) ret %u } @@ -374,11 +360,9 @@ define @vfmacc_vv_nxv2f32_unmasked( %a, ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -434,11 +418,9 @@ define @vfmacc_vf_nxv2f32_unmasked( %va, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -488,11 +470,9 @@ define @vfmacc_vv_nxv4f32_unmasked( %a, ; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10 ; ZVFHMIN-NEXT: vmv.v.v v8, v12 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -548,11 +528,9 @@ define @vfmacc_vf_nxv4f32_unmasked( %va, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -602,11 +580,9 @@ define @vfmacc_vv_nxv8f32_unmasked( %a, ; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12 ; ZVFHMIN-NEXT: vmv.v.v v8, v16 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -662,11 +638,9 @@ define @vfmacc_vf_nxv8f32_unmasked( %va, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -730,11 +704,9 @@ define @vfmacc_vv_nxv16f32_unmasked( % ; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16 ; ZVFHMIN-NEXT: vmv.v.v v8, v24 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -790,11 +762,9 @@ define @vfmacc_vf_nxv16f32_unmasked( % ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -824,11 +794,9 @@ define @vfmacc_vv_nxv1f64_unmasked( %a ; CHECK-NEXT: vfwmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -856,11 +824,9 @@ define @vfmacc_vf_nxv1f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -890,11 +856,9 @@ define @vfmacc_vv_nxv2f64_unmasked( %a ; CHECK-NEXT: vfwmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -922,11 +886,9 @@ define @vfmacc_vf_nxv2f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -956,11 +918,9 @@ define @vfmacc_vv_nxv4f64_unmasked( %a ; CHECK-NEXT: vfwmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -988,11 +948,9 @@ define @vfmacc_vf_nxv4f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -1022,11 +980,9 @@ define @vfmacc_vv_nxv8f64_unmasked( %a ; CHECK-NEXT: vfwmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -1054,11 +1010,9 @@ define @vfmacc_vf_nxv8f64_unmasked( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %vaext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %va, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %vb, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %vaext, %vbext, %vc, %allones, i32 %evl) + %vaext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %va, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %vb, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %vaext, %vbext, %vc, splat (i1 -1), i32 %evl) ret %v } @@ -1090,11 +1044,9 @@ define @vfmacc_vv_nxv1f64_nxv1f16_unmasked( poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f64.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f64.nxv1f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f64.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f64.nxv1f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -1126,11 +1078,9 @@ define @vfmacc_vv_nxv2f64_nxv2f16_unmasked( poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f64.nxv2f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f64.nxv2f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f64.nxv2f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f64.nxv2f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -1162,11 +1112,9 @@ define @vfmacc_vv_nxv4f64_nxv4f16_unmasked( poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f64.nxv4f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f64.nxv4f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f64.nxv4f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f64.nxv4f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -1198,11 +1146,9 @@ define @vfmacc_vv_nxv8f64_nxv8f16_unmasked( poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %b, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %aext, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %aext, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -1243,9 +1189,7 @@ define @vfmacc_squared_nxv1f32_unmasked( ; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10 ; ZVFHMIN-NEXT: vmv1r.v v8, v9 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %aext, %aext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %aext, %aext, %c, splat (i1 -1), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll index 92ad961999ddc..4cd9b8bc2cded 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-vp.ll @@ -52,12 +52,10 @@ define @vmfsac_vv_nxv1f32_unmasked( %a, ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfmsub.vv v8, v11, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -78,12 +76,10 @@ define @vmfsac_vv_nxv1f32_tu( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %negc, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1f32( %m, %v, %c, i32 %evl) ret %u } @@ -105,13 +101,11 @@ define @vmfsac_vv_nxv1f32_unmasked_tu( % ; ZVFHMIN-NEXT: vfmsac.vv v10, v11, v8 ; ZVFHMIN-NEXT: vmv1r.v v8, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %negc, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1f32( %allones, %v, %c, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %aext, %bext, %negc, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1f32( splat (i1 -1), %v, %c, i32 %evl) ret %u } @@ -138,8 +132,6 @@ define @vmfsac_vf_nxv1f32( %a, half %b, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %m, i32 %evl) %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv1f32( %c, %m, i32 %evl) @@ -201,12 +193,10 @@ define @vmfsac_vf_nxv1f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %aext, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %aext, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -253,12 +243,10 @@ define @vmfsac_vv_nxv2f32_unmasked( %a, ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfmsub.vv v8, v11, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %b, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %aext, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %b, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %aext, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -285,8 +273,6 @@ define @vmfsac_vf_nxv2f32( %a, half %b, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, %m, i32 %evl) %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv2f32( %c, %m, i32 %evl) @@ -348,12 +334,10 @@ define @vmfsac_vf_nxv2f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %aext, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %aext, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -402,12 +386,10 @@ define @vmfsac_vv_nxv4f32_unmasked( %a, ; ZVFHMIN-NEXT: vfmsub.vv v12, v14, v10 ; ZVFHMIN-NEXT: vmv.v.v v8, v12 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %b, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %aext, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %b, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %aext, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -434,8 +416,6 @@ define @vmfsac_vf_nxv4f32( %a, half %b, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, %m, i32 %evl) %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv4f32( %c, %m, i32 %evl) @@ -497,12 +477,10 @@ define @vmfsac_vf_nxv4f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %aext, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %aext, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -551,12 +529,10 @@ define @vmfsac_vv_nxv8f32_unmasked( %a, ; ZVFHMIN-NEXT: vfmsub.vv v16, v20, v12 ; ZVFHMIN-NEXT: vmv.v.v v8, v16 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %b, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %aext, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %b, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %aext, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -583,8 +559,6 @@ define @vmfsac_vf_nxv8f32( %a, half %b, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, %m, i32 %evl) %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f32( %c, %m, i32 %evl) @@ -646,11 +620,9 @@ define @vmfsac_vf_nxv8f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %aext, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %aext, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll index 3a03f0d65273e..ca0bbfd65ca29 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll @@ -52,13 +52,11 @@ define @vfnmacc_vv_nxv1f32_unmasked( %a, ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfnmadd.vv v8, v11, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -149,13 +147,11 @@ define @vfnmacc_vf_nxv1f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -203,13 +199,11 @@ define @vfnmacc_vv_nxv2f32_unmasked( %a, ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfnmadd.vv v8, v11, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -300,13 +294,11 @@ define @vfnmacc_vf_nxv2f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -356,13 +348,11 @@ define @vfnmacc_vv_nxv4f32_unmasked( %a, ; ZVFHMIN-NEXT: vfnmadd.vv v12, v14, v10 ; ZVFHMIN-NEXT: vmv.v.v v8, v12 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -453,13 +443,11 @@ define @vfnmacc_vf_nxv4f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -509,13 +497,11 @@ define @vfnmacc_vv_nxv8f32_unmasked( %a, ; ZVFHMIN-NEXT: vfnmadd.vv v16, v20, v12 ; ZVFHMIN-NEXT: vmv.v.v v8, v16 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -606,13 +592,11 @@ define @vfnmacc_vf_nxv8f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -676,13 +660,11 @@ define @vfnmacc_vv_nxv16f32_unmasked( ; ZVFHMIN-NEXT: vfnmadd.vv v24, v0, v16 ; ZVFHMIN-NEXT: vmv.v.v v8, v24 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv16f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -773,13 +755,11 @@ define @vfnmacc_vf_nxv16f32_unmasked( ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv16f32( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv16f32( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv16f32( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -809,13 +789,11 @@ define @vfnmacc_vv_nxv1f64_unmasked( % ; CHECK-NEXT: vfwnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -863,13 +841,11 @@ define @vfnmacc_vf_nxv1f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -899,13 +875,11 @@ define @vfnmacc_vv_nxv2f64_unmasked( % ; CHECK-NEXT: vfwnmacc.vv v10, v8, v9 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -953,13 +927,11 @@ define @vfnmacc_vf_nxv2f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -989,13 +961,11 @@ define @vfnmacc_vv_nxv4f64_unmasked( % ; CHECK-NEXT: vfwnmacc.vv v12, v8, v10 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -1043,13 +1013,11 @@ define @vfnmacc_vf_nxv4f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -1079,13 +1047,11 @@ define @vfnmacc_vv_nxv8f64_unmasked( % ; CHECK-NEXT: vfwnmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -1133,13 +1099,11 @@ define @vfnmacc_vf_nxv8f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %vbext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vbext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -1173,13 +1137,11 @@ define @vfnmacc_vv_nxv1f64_nxv1f16_unmasked( poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f64.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f64.nxv1f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv1f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f64.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f64.nxv1f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv1f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -1213,13 +1175,11 @@ define @vfnmacc_vv_nxv2f64_nxv2f16_unmasked( poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f64.nxv2f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f64.nxv2f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv2f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f64.nxv2f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f64.nxv2f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv2f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -1253,13 +1213,11 @@ define @vfnmacc_vv_nxv4f64_nxv4f16_unmasked( poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f64.nxv4f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f64.nxv4f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv4f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f64.nxv4f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f64.nxv4f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv4f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } @@ -1293,12 +1251,10 @@ define @vfnmacc_vv_nxv8f64_nxv4f16_unmasked( poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f64( %aext, %allones, i32 %evl) - %negc = call @llvm.vp.fneg.nxv8f64( %c, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %bext, %negc, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %aext, splat (i1 -1), i32 %evl) + %negc = call @llvm.vp.fneg.nxv8f64( %c, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %bext, %negc, splat (i1 -1), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll index a8cc0ce92aa16..2797ca2eb3163 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll @@ -51,12 +51,10 @@ define @vfnmsac_vv_nxv1f32_unmasked( %a, ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfnmsub.vv v8, v11, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -144,12 +142,10 @@ define @vfnmsac_vf_nxv1f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f32( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f32.nxv1f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f32( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -196,12 +192,10 @@ define @vfnmsac_vv_nxv2f32_unmasked( %a, ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfnmsub.vv v8, v11, v10 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -289,12 +283,10 @@ define @vfnmsac_vf_nxv2f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f32( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv2f32.nxv2f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f32( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -343,12 +335,10 @@ define @vfnmsac_vv_nxv4f32_unmasked( %a, ; ZVFHMIN-NEXT: vfnmsub.vv v12, v14, v10 ; ZVFHMIN-NEXT: vmv.v.v v8, v12 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -436,12 +426,10 @@ define @vfnmsac_vf_nxv4f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f32( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv4f32.nxv4f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f32( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -490,12 +478,10 @@ define @vfnmsac_vv_nxv8f32_unmasked( %a, ; ZVFHMIN-NEXT: vfnmsub.vv v16, v20, v12 ; ZVFHMIN-NEXT: vmv.v.v v8, v16 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -583,12 +569,10 @@ define @vfnmsac_vf_nxv8f32_unmasked( %a, ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f32( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv8f32.nxv8f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f32( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -651,12 +635,10 @@ define @vfnmsac_vv_nxv16f32_unmasked( ; ZVFHMIN-NEXT: vfnmsub.vv v24, v0, v16 ; ZVFHMIN-NEXT: vmv.v.v v8, v24 ; ZVFHMIN-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv16f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -744,12 +726,10 @@ define @vfnmsac_vf_nxv16f32_unmasked( ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv16f32( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv16f32( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv16f32.nxv16f16( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv16f32( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv16f32( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -778,12 +758,10 @@ define @vfnmsac_vv_nxv1f64_unmasked( % ; CHECK-NEXT: vfwnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f64( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -828,12 +806,10 @@ define @vfnmsac_vf_nxv1f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv1f64( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv1f64( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv1f64.nxv1f32( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv1f64( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv1f64( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -862,12 +838,10 @@ define @vfnmsac_vv_nxv2f64_unmasked( % ; CHECK-NEXT: vfwnmsac.vv v10, v8, v9 ; CHECK-NEXT: vmv2r.v v8, v10 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f64( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -912,12 +886,10 @@ define @vfnmsac_vf_nxv2f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv2f64( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv2f64( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv2f64.nxv2f32( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv2f64( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv2f64( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -946,12 +918,10 @@ define @vfnmsac_vv_nxv4f64_unmasked( % ; CHECK-NEXT: vfwnmsac.vv v12, v8, v10 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f64( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -996,12 +966,10 @@ define @vfnmsac_vf_nxv4f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv4f64( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv4f64( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv4f64.nxv4f32( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv4f64( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv4f64( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -1030,12 +998,10 @@ define @vfnmsac_vv_nxv8f64_unmasked( % ; CHECK-NEXT: vfwnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, %allones, i32 %evl) - %bext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %b, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f64( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %bext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %b, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %bext, %c, splat (i1 -1), i32 %evl) ret %v } @@ -1080,11 +1046,9 @@ define @vfnmsac_vf_nxv8f64_unmasked( % ; CHECK-NEXT: ret %elt.head = insertelement poison, float %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, %allones, i32 %evl) - %vbext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %vb, %allones, i32 %evl) - %nega = call @llvm.vp.fneg.nxv8f64( %aext, %allones, i32 %evl) - %v = call @llvm.vp.fma.nxv8f64( %nega, %vbext, %c, %allones, i32 %evl) + %aext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %a, splat (i1 -1), i32 %evl) + %vbext = call @llvm.vp.fpext.nxv8f64.nxv8f32( %vb, splat (i1 -1), i32 %evl) + %nega = call @llvm.vp.fneg.nxv8f64( %aext, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.fma.nxv8f64( %nega, %vbext, %c, splat (i1 -1), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll index 797bd4125f481..5aba7ef4cc5b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll @@ -16,10 +16,8 @@ define @vmacc_vv_nxv1i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i8( %m, %y, %c, i32 %evl) ret %u } @@ -31,11 +29,9 @@ define @vmacc_vv_nxv1i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -48,10 +44,8 @@ define @vmacc_vx_nxv1i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i8( %m, %y, %c, i32 %evl) ret %u } @@ -65,11 +59,9 @@ define @vmacc_vx_nxv1i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -80,10 +72,8 @@ define @vmacc_vv_nxv1i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i8( %m, %y, %c, i32 %evl) ret %u } @@ -97,10 +87,8 @@ define @vmacc_vx_nxv1i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i8( %m, %y, %c, i32 %evl) ret %u } @@ -117,10 +105,8 @@ define @vmacc_vv_nxv2i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i8( %m, %y, %c, i32 %evl) ret %u } @@ -132,11 +118,9 @@ define @vmacc_vv_nxv2i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -149,10 +133,8 @@ define @vmacc_vx_nxv2i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i8( %m, %y, %c, i32 %evl) ret %u } @@ -166,11 +148,9 @@ define @vmacc_vx_nxv2i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -181,10 +161,8 @@ define @vmacc_vv_nxv2i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i8( %m, %y, %c, i32 %evl) ret %u } @@ -198,10 +176,8 @@ define @vmacc_vx_nxv2i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i8( %m, %y, %c, i32 %evl) ret %u } @@ -218,10 +194,8 @@ define @vmacc_vv_nxv4i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i8( %m, %y, %c, i32 %evl) ret %u } @@ -233,11 +207,9 @@ define @vmacc_vv_nxv4i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -250,10 +222,8 @@ define @vmacc_vx_nxv4i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i8( %m, %y, %c, i32 %evl) ret %u } @@ -267,11 +237,9 @@ define @vmacc_vx_nxv4i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -282,10 +250,8 @@ define @vmacc_vv_nxv4i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i8( %m, %y, %c, i32 %evl) ret %u } @@ -299,10 +265,8 @@ define @vmacc_vx_nxv4i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i8( %m, %y, %c, i32 %evl) ret %u } @@ -319,10 +283,8 @@ define @vmacc_vv_nxv8i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i8( %m, %y, %c, i32 %evl) ret %u } @@ -334,11 +296,9 @@ define @vmacc_vv_nxv8i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -351,10 +311,8 @@ define @vmacc_vx_nxv8i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i8( %m, %y, %c, i32 %evl) ret %u } @@ -368,11 +326,9 @@ define @vmacc_vx_nxv8i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -383,10 +339,8 @@ define @vmacc_vv_nxv8i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i8( %m, %y, %c, i32 %evl) ret %u } @@ -400,10 +354,8 @@ define @vmacc_vx_nxv8i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i8( %m, %y, %c, i32 %evl) ret %u } @@ -420,10 +372,8 @@ define @vmacc_vv_nxv16i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i8( %m, %y, %c, i32 %evl) ret %u } @@ -435,11 +385,9 @@ define @vmacc_vv_nxv16i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -452,10 +400,8 @@ define @vmacc_vx_nxv16i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i8( %m, %y, %c, i32 %evl) ret %u } @@ -469,11 +415,9 @@ define @vmacc_vx_nxv16i8_unmasked( %a, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -484,10 +428,8 @@ define @vmacc_vv_nxv16i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i8( %m, %y, %c, i32 %evl) ret %u } @@ -501,10 +443,8 @@ define @vmacc_vx_nxv16i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i8( %m, %y, %c, i32 %evl) ret %u } @@ -521,10 +461,8 @@ define @vmacc_vv_nxv32i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i8( %m, %y, %c, i32 %evl) ret %u } @@ -536,11 +474,9 @@ define @vmacc_vv_nxv32i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -553,10 +489,8 @@ define @vmacc_vx_nxv32i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i8( %m, %y, %c, i32 %evl) ret %u } @@ -570,11 +504,9 @@ define @vmacc_vx_nxv32i8_unmasked( %a, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -585,10 +517,8 @@ define @vmacc_vv_nxv32i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i8( %m, %y, %c, i32 %evl) ret %u } @@ -602,10 +532,8 @@ define @vmacc_vx_nxv32i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i8( %m, %y, %c, i32 %evl) ret %u } @@ -623,10 +551,8 @@ define @vmacc_vv_nxv64i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv64i8( %m, %y, %c, i32 %evl) ret %u } @@ -639,11 +565,9 @@ define @vmacc_vv_nxv64i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -656,10 +580,8 @@ define @vmacc_vx_nxv64i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv64i8( %m, %y, %c, i32 %evl) ret %u } @@ -673,11 +595,9 @@ define @vmacc_vx_nxv64i8_unmasked( %a, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -689,10 +609,8 @@ define @vmacc_vv_nxv64i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv64i8( %m, %y, %c, i32 %evl) ret %u } @@ -706,10 +624,8 @@ define @vmacc_vx_nxv64i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv64i8( %m, %y, %c, i32 %evl) ret %u } @@ -726,10 +642,8 @@ define @vmacc_vv_nxv1i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i16( %m, %y, %c, i32 %evl) ret %u } @@ -741,11 +655,9 @@ define @vmacc_vv_nxv1i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -758,10 +670,8 @@ define @vmacc_vx_nxv1i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i16( %m, %y, %c, i32 %evl) ret %u } @@ -775,11 +685,9 @@ define @vmacc_vx_nxv1i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -790,10 +698,8 @@ define @vmacc_vv_nxv1i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i16( %m, %y, %c, i32 %evl) ret %u } @@ -807,10 +713,8 @@ define @vmacc_vx_nxv1i16_ta( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i16( %m, %y, %c, i32 %evl) ret %u } @@ -827,10 +731,8 @@ define @vmacc_vv_nxv2i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i16( %m, %y, %c, i32 %evl) ret %u } @@ -842,11 +744,9 @@ define @vmacc_vv_nxv2i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -859,10 +759,8 @@ define @vmacc_vx_nxv2i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i16( %m, %y, %c, i32 %evl) ret %u } @@ -876,11 +774,9 @@ define @vmacc_vx_nxv2i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -891,10 +787,8 @@ define @vmacc_vv_nxv2i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i16( %m, %y, %c, i32 %evl) ret %u } @@ -908,10 +802,8 @@ define @vmacc_vx_nxv2i16_ta( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i16( %m, %y, %c, i32 %evl) ret %u } @@ -928,10 +820,8 @@ define @vmacc_vv_nxv4i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i16( %m, %y, %c, i32 %evl) ret %u } @@ -943,11 +833,9 @@ define @vmacc_vv_nxv4i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -960,10 +848,8 @@ define @vmacc_vx_nxv4i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i16( %m, %y, %c, i32 %evl) ret %u } @@ -977,11 +863,9 @@ define @vmacc_vx_nxv4i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -992,10 +876,8 @@ define @vmacc_vv_nxv4i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i16( %m, %y, %c, i32 %evl) ret %u } @@ -1009,10 +891,8 @@ define @vmacc_vx_nxv4i16_ta( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i16( %m, %y, %c, i32 %evl) ret %u } @@ -1029,10 +909,8 @@ define @vmacc_vv_nxv8i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i16( %m, %y, %c, i32 %evl) ret %u } @@ -1044,11 +922,9 @@ define @vmacc_vv_nxv8i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1061,10 +937,8 @@ define @vmacc_vx_nxv8i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i16( %m, %y, %c, i32 %evl) ret %u } @@ -1078,11 +952,9 @@ define @vmacc_vx_nxv8i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1093,10 +965,8 @@ define @vmacc_vv_nxv8i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i16( %m, %y, %c, i32 %evl) ret %u } @@ -1110,10 +980,8 @@ define @vmacc_vx_nxv8i16_ta( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i16( %m, %y, %c, i32 %evl) ret %u } @@ -1130,10 +998,8 @@ define @vmacc_vv_nxv16i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i16( %m, %y, %c, i32 %evl) ret %u } @@ -1145,11 +1011,9 @@ define @vmacc_vv_nxv16i16_unmasked( %a, < ; CHECK-NEXT: vmacc.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1162,10 +1026,8 @@ define @vmacc_vx_nxv16i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i16( %m, %y, %c, i32 %evl) ret %u } @@ -1179,11 +1041,9 @@ define @vmacc_vx_nxv16i16_unmasked( %a, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1194,10 +1054,8 @@ define @vmacc_vv_nxv16i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i16( %m, %y, %c, i32 %evl) ret %u } @@ -1211,10 +1069,8 @@ define @vmacc_vx_nxv16i16_ta( %a, i16 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i16( %m, %y, %c, i32 %evl) ret %u } @@ -1232,10 +1088,8 @@ define @vmacc_vv_nxv32i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i16( %m, %y, %c, i32 %evl) ret %u } @@ -1248,11 +1102,9 @@ define @vmacc_vv_nxv32i16_unmasked( %a, < ; CHECK-NEXT: vmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1265,10 +1117,8 @@ define @vmacc_vx_nxv32i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i16( %m, %y, %c, i32 %evl) ret %u } @@ -1282,11 +1132,9 @@ define @vmacc_vx_nxv32i16_unmasked( %a, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1298,10 +1146,8 @@ define @vmacc_vv_nxv32i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i16( %m, %y, %c, i32 %evl) ret %u } @@ -1315,10 +1161,8 @@ define @vmacc_vx_nxv32i16_ta( %a, i16 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i16( %m, %y, %c, i32 %evl) ret %u } @@ -1335,10 +1179,8 @@ define @vmacc_vv_nxv1i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i32( %m, %y, %c, i32 %evl) ret %u } @@ -1350,11 +1192,9 @@ define @vmacc_vv_nxv1i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1367,10 +1207,8 @@ define @vmacc_vx_nxv1i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i32( %m, %y, %c, i32 %evl) ret %u } @@ -1384,11 +1222,9 @@ define @vmacc_vx_nxv1i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1399,10 +1235,8 @@ define @vmacc_vv_nxv1i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i32( %m, %y, %c, i32 %evl) ret %u } @@ -1416,10 +1250,8 @@ define @vmacc_vx_nxv1i32_ta( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i32( %m, %y, %c, i32 %evl) ret %u } @@ -1436,10 +1268,8 @@ define @vmacc_vv_nxv2i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i32( %m, %y, %c, i32 %evl) ret %u } @@ -1451,11 +1281,9 @@ define @vmacc_vv_nxv2i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1468,10 +1296,8 @@ define @vmacc_vx_nxv2i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i32( %m, %y, %c, i32 %evl) ret %u } @@ -1485,11 +1311,9 @@ define @vmacc_vx_nxv2i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1500,10 +1324,8 @@ define @vmacc_vv_nxv2i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i32( %m, %y, %c, i32 %evl) ret %u } @@ -1517,10 +1339,8 @@ define @vmacc_vx_nxv2i32_ta( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i32( %m, %y, %c, i32 %evl) ret %u } @@ -1537,10 +1357,8 @@ define @vmacc_vv_nxv4i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i32( %m, %y, %c, i32 %evl) ret %u } @@ -1552,11 +1370,9 @@ define @vmacc_vv_nxv4i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1569,10 +1385,8 @@ define @vmacc_vx_nxv4i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i32( %m, %y, %c, i32 %evl) ret %u } @@ -1586,11 +1400,9 @@ define @vmacc_vx_nxv4i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1601,10 +1413,8 @@ define @vmacc_vv_nxv4i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i32( %m, %y, %c, i32 %evl) ret %u } @@ -1618,10 +1428,8 @@ define @vmacc_vx_nxv4i32_ta( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i32( %m, %y, %c, i32 %evl) ret %u } @@ -1638,10 +1446,8 @@ define @vmacc_vv_nxv8i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i32( %m, %y, %c, i32 %evl) ret %u } @@ -1653,11 +1459,9 @@ define @vmacc_vv_nxv8i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1670,10 +1474,8 @@ define @vmacc_vx_nxv8i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i32( %m, %y, %c, i32 %evl) ret %u } @@ -1687,11 +1489,9 @@ define @vmacc_vx_nxv8i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1702,10 +1502,8 @@ define @vmacc_vv_nxv8i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i32( %m, %y, %c, i32 %evl) ret %u } @@ -1719,10 +1517,8 @@ define @vmacc_vx_nxv8i32_ta( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i32( %m, %y, %c, i32 %evl) ret %u } @@ -1740,10 +1536,8 @@ define @vmacc_vv_nxv16i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i32( %m, %y, %c, i32 %evl) ret %u } @@ -1756,11 +1550,9 @@ define @vmacc_vv_nxv16i32_unmasked( %a, < ; CHECK-NEXT: vmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1773,10 +1565,8 @@ define @vmacc_vx_nxv16i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i32( %m, %y, %c, i32 %evl) ret %u } @@ -1790,11 +1580,9 @@ define @vmacc_vx_nxv16i32_unmasked( %a, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1806,10 +1594,8 @@ define @vmacc_vv_nxv16i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i32( %m, %y, %c, i32 %evl) ret %u } @@ -1823,10 +1609,8 @@ define @vmacc_vx_nxv16i32_ta( %a, i32 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i32( %m, %y, %c, i32 %evl) ret %u } @@ -1843,10 +1627,8 @@ define @vmacc_vv_nxv1i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i64( %m, %y, %c, i32 %evl) ret %u } @@ -1858,11 +1640,9 @@ define @vmacc_vv_nxv1i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1890,10 +1670,8 @@ define @vmacc_vx_nxv1i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i64( %m, %y, %c, i32 %evl) ret %u } @@ -1922,11 +1700,9 @@ define @vmacc_vx_nxv1i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1937,10 +1713,8 @@ define @vmacc_vv_nxv1i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i64( %m, %y, %c, i32 %evl) ret %u } @@ -1969,10 +1743,8 @@ define @vmacc_vx_nxv1i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i64( %m, %y, %c, i32 %evl) ret %u } @@ -1989,10 +1761,8 @@ define @vmacc_vv_nxv2i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i64( %m, %y, %c, i32 %evl) ret %u } @@ -2004,11 +1774,9 @@ define @vmacc_vv_nxv2i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2036,10 +1804,8 @@ define @vmacc_vx_nxv2i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i64( %m, %y, %c, i32 %evl) ret %u } @@ -2068,11 +1834,9 @@ define @vmacc_vx_nxv2i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2083,10 +1847,8 @@ define @vmacc_vv_nxv2i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i64( %m, %y, %c, i32 %evl) ret %u } @@ -2115,10 +1877,8 @@ define @vmacc_vx_nxv2i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i64( %m, %y, %c, i32 %evl) ret %u } @@ -2135,10 +1895,8 @@ define @vmacc_vv_nxv4i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i64( %m, %y, %c, i32 %evl) ret %u } @@ -2150,11 +1908,9 @@ define @vmacc_vv_nxv4i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2182,10 +1938,8 @@ define @vmacc_vx_nxv4i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i64( %m, %y, %c, i32 %evl) ret %u } @@ -2214,11 +1968,9 @@ define @vmacc_vx_nxv4i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2229,10 +1981,8 @@ define @vmacc_vv_nxv4i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i64( %m, %y, %c, i32 %evl) ret %u } @@ -2261,10 +2011,8 @@ define @vmacc_vx_nxv4i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i64( %m, %y, %c, i32 %evl) ret %u } @@ -2282,10 +2030,8 @@ define @vmacc_vv_nxv8i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i64( %m, %y, %c, i32 %evl) ret %u } @@ -2298,11 +2044,9 @@ define @vmacc_vv_nxv8i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2330,10 +2074,8 @@ define @vmacc_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i64( %m, %y, %c, i32 %evl) ret %u } @@ -2362,11 +2104,9 @@ define @vmacc_vx_nxv8i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2378,10 +2118,8 @@ define @vmacc_vv_nxv8i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i64( %m, %y, %c, i32 %evl) ret %u } @@ -2410,10 +2148,8 @@ define @vmacc_vx_nxv8i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i64( %m, %y, %c, i32 %evl) ret %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll index 6a6d7d2a41424..0322c1ab9f631 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-vp.ll @@ -17,10 +17,8 @@ define @vmadd_vv_nxv1i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i8( %m, %y, %a, i32 %evl) ret %u } @@ -33,11 +31,9 @@ define @vmadd_vv_nxv1i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -49,10 +45,8 @@ define @vmadd_vx_nxv1i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i8( %m, %y, %a, i32 %evl) ret %u } @@ -65,11 +59,9 @@ define @vmadd_vx_nxv1i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -80,10 +72,8 @@ define @vmadd_vv_nxv1i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i8( %m, %y, %a, i32 %evl) ret %u } @@ -97,10 +87,8 @@ define @vmadd_vx_nxv1i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i8( %m, %y, %a, i32 %evl) ret %u } @@ -118,10 +106,8 @@ define @vmadd_vv_nxv2i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i8( %m, %y, %a, i32 %evl) ret %u } @@ -134,11 +120,9 @@ define @vmadd_vv_nxv2i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -150,10 +134,8 @@ define @vmadd_vx_nxv2i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i8( %m, %y, %a, i32 %evl) ret %u } @@ -166,11 +148,9 @@ define @vmadd_vx_nxv2i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -181,10 +161,8 @@ define @vmadd_vv_nxv2i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i8( %m, %y, %a, i32 %evl) ret %u } @@ -198,10 +176,8 @@ define @vmadd_vx_nxv2i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i8( %m, %y, %a, i32 %evl) ret %u } @@ -219,10 +195,8 @@ define @vmadd_vv_nxv4i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i8( %m, %y, %a, i32 %evl) ret %u } @@ -235,11 +209,9 @@ define @vmadd_vv_nxv4i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -251,10 +223,8 @@ define @vmadd_vx_nxv4i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i8( %m, %y, %a, i32 %evl) ret %u } @@ -267,11 +237,9 @@ define @vmadd_vx_nxv4i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -282,10 +250,8 @@ define @vmadd_vv_nxv4i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i8( %m, %y, %a, i32 %evl) ret %u } @@ -299,10 +265,8 @@ define @vmadd_vx_nxv4i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i8( %m, %y, %a, i32 %evl) ret %u } @@ -320,10 +284,8 @@ define @vmadd_vv_nxv8i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i8( %m, %y, %a, i32 %evl) ret %u } @@ -336,11 +298,9 @@ define @vmadd_vv_nxv8i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -352,10 +312,8 @@ define @vmadd_vx_nxv8i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i8( %m, %y, %a, i32 %evl) ret %u } @@ -368,11 +326,9 @@ define @vmadd_vx_nxv8i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -383,10 +339,8 @@ define @vmadd_vv_nxv8i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i8( %m, %y, %a, i32 %evl) ret %u } @@ -400,10 +354,8 @@ define @vmadd_vx_nxv8i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i8( %m, %y, %a, i32 %evl) ret %u } @@ -421,10 +373,8 @@ define @vmadd_vv_nxv16i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i8( %m, %y, %a, i32 %evl) ret %u } @@ -437,11 +387,9 @@ define @vmadd_vv_nxv16i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -453,10 +401,8 @@ define @vmadd_vx_nxv16i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i8( %m, %y, %a, i32 %evl) ret %u } @@ -469,11 +415,9 @@ define @vmadd_vx_nxv16i8_unmasked( %a, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -484,10 +428,8 @@ define @vmadd_vv_nxv16i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i8( %m, %y, %a, i32 %evl) ret %u } @@ -501,10 +443,8 @@ define @vmadd_vx_nxv16i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i8( %m, %y, %a, i32 %evl) ret %u } @@ -522,10 +462,8 @@ define @vmadd_vv_nxv32i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i8( %m, %y, %a, i32 %evl) ret %u } @@ -538,11 +476,9 @@ define @vmadd_vv_nxv32i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -554,10 +490,8 @@ define @vmadd_vx_nxv32i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i8( %m, %y, %a, i32 %evl) ret %u } @@ -570,11 +504,9 @@ define @vmadd_vx_nxv32i8_unmasked( %a, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -585,10 +517,8 @@ define @vmadd_vv_nxv32i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i8( %m, %y, %a, i32 %evl) ret %u } @@ -602,10 +532,8 @@ define @vmadd_vx_nxv32i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i8( %m, %y, %a, i32 %evl) ret %u } @@ -624,10 +552,8 @@ define @vmadd_vv_nxv64i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv64i8( %m, %y, %a, i32 %evl) ret %u } @@ -641,11 +567,9 @@ define @vmadd_vv_nxv64i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -657,10 +581,8 @@ define @vmadd_vx_nxv64i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv64i8( %m, %y, %a, i32 %evl) ret %u } @@ -673,11 +595,9 @@ define @vmadd_vx_nxv64i8_unmasked( %a, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -689,10 +609,8 @@ define @vmadd_vv_nxv64i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv64i8( %m, %y, %a, i32 %evl) ret %u } @@ -706,10 +624,8 @@ define @vmadd_vx_nxv64i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv64i8( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv64i8( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv64i8( %m, %y, %a, i32 %evl) ret %u } @@ -727,10 +643,8 @@ define @vmadd_vv_nxv1i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i16( %m, %y, %a, i32 %evl) ret %u } @@ -743,11 +657,9 @@ define @vmadd_vv_nxv1i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -759,10 +671,8 @@ define @vmadd_vx_nxv1i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i16( %m, %y, %a, i32 %evl) ret %u } @@ -775,11 +685,9 @@ define @vmadd_vx_nxv1i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -790,10 +698,8 @@ define @vmadd_vv_nxv1i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i16( %m, %y, %a, i32 %evl) ret %u } @@ -807,10 +713,8 @@ define @vmadd_vx_nxv1i16_ta( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i16( %m, %y, %a, i32 %evl) ret %u } @@ -828,10 +732,8 @@ define @vmadd_vv_nxv2i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i16( %m, %y, %a, i32 %evl) ret %u } @@ -844,11 +746,9 @@ define @vmadd_vv_nxv2i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -860,10 +760,8 @@ define @vmadd_vx_nxv2i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i16( %m, %y, %a, i32 %evl) ret %u } @@ -876,11 +774,9 @@ define @vmadd_vx_nxv2i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -891,10 +787,8 @@ define @vmadd_vv_nxv2i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i16( %m, %y, %a, i32 %evl) ret %u } @@ -908,10 +802,8 @@ define @vmadd_vx_nxv2i16_ta( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i16( %m, %y, %a, i32 %evl) ret %u } @@ -929,10 +821,8 @@ define @vmadd_vv_nxv4i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i16( %m, %y, %a, i32 %evl) ret %u } @@ -945,11 +835,9 @@ define @vmadd_vv_nxv4i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -961,10 +849,8 @@ define @vmadd_vx_nxv4i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i16( %m, %y, %a, i32 %evl) ret %u } @@ -977,11 +863,9 @@ define @vmadd_vx_nxv4i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -992,10 +876,8 @@ define @vmadd_vv_nxv4i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i16( %m, %y, %a, i32 %evl) ret %u } @@ -1009,10 +891,8 @@ define @vmadd_vx_nxv4i16_ta( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i16( %m, %y, %a, i32 %evl) ret %u } @@ -1030,10 +910,8 @@ define @vmadd_vv_nxv8i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i16( %m, %y, %a, i32 %evl) ret %u } @@ -1046,11 +924,9 @@ define @vmadd_vv_nxv8i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1062,10 +938,8 @@ define @vmadd_vx_nxv8i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i16( %m, %y, %a, i32 %evl) ret %u } @@ -1078,11 +952,9 @@ define @vmadd_vx_nxv8i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1093,10 +965,8 @@ define @vmadd_vv_nxv8i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i16( %m, %y, %a, i32 %evl) ret %u } @@ -1110,10 +980,8 @@ define @vmadd_vx_nxv8i16_ta( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i16( %m, %y, %a, i32 %evl) ret %u } @@ -1131,10 +999,8 @@ define @vmadd_vv_nxv16i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i16( %m, %y, %a, i32 %evl) ret %u } @@ -1147,11 +1013,9 @@ define @vmadd_vv_nxv16i16_unmasked( %a, < ; CHECK-NEXT: vsetvli zero, zero, e16, m4, tu, ma ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1163,10 +1027,8 @@ define @vmadd_vx_nxv16i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i16( %m, %y, %a, i32 %evl) ret %u } @@ -1179,11 +1041,9 @@ define @vmadd_vx_nxv16i16_unmasked( %a, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1194,10 +1054,8 @@ define @vmadd_vv_nxv16i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i16( %m, %y, %a, i32 %evl) ret %u } @@ -1211,10 +1069,8 @@ define @vmadd_vx_nxv16i16_ta( %a, i16 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i16( %m, %y, %a, i32 %evl) ret %u } @@ -1233,10 +1089,8 @@ define @vmadd_vv_nxv32i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i16( %m, %y, %a, i32 %evl) ret %u } @@ -1250,11 +1104,9 @@ define @vmadd_vv_nxv32i16_unmasked( %a, < ; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, ma ; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1266,10 +1118,8 @@ define @vmadd_vx_nxv32i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i16( %m, %y, %a, i32 %evl) ret %u } @@ -1282,11 +1132,9 @@ define @vmadd_vx_nxv32i16_unmasked( %a, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1298,10 +1146,8 @@ define @vmadd_vv_nxv32i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i16( %m, %y, %a, i32 %evl) ret %u } @@ -1315,10 +1161,8 @@ define @vmadd_vx_nxv32i16_ta( %a, i16 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv32i16( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv32i16( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i16( %m, %y, %a, i32 %evl) ret %u } @@ -1336,10 +1180,8 @@ define @vmadd_vv_nxv1i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i32( %m, %y, %a, i32 %evl) ret %u } @@ -1352,11 +1194,9 @@ define @vmadd_vv_nxv1i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1368,10 +1208,8 @@ define @vmadd_vx_nxv1i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i32( %m, %y, %a, i32 %evl) ret %u } @@ -1384,11 +1222,9 @@ define @vmadd_vx_nxv1i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1399,10 +1235,8 @@ define @vmadd_vv_nxv1i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i32( %m, %y, %a, i32 %evl) ret %u } @@ -1416,10 +1250,8 @@ define @vmadd_vx_nxv1i32_ta( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i32( %m, %y, %a, i32 %evl) ret %u } @@ -1437,10 +1269,8 @@ define @vmadd_vv_nxv2i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i32( %m, %y, %a, i32 %evl) ret %u } @@ -1453,11 +1283,9 @@ define @vmadd_vv_nxv2i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1469,10 +1297,8 @@ define @vmadd_vx_nxv2i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i32( %m, %y, %a, i32 %evl) ret %u } @@ -1485,11 +1311,9 @@ define @vmadd_vx_nxv2i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1500,10 +1324,8 @@ define @vmadd_vv_nxv2i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i32( %m, %y, %a, i32 %evl) ret %u } @@ -1517,10 +1339,8 @@ define @vmadd_vx_nxv2i32_ta( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i32( %m, %y, %a, i32 %evl) ret %u } @@ -1538,10 +1358,8 @@ define @vmadd_vv_nxv4i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i32( %m, %y, %a, i32 %evl) ret %u } @@ -1554,11 +1372,9 @@ define @vmadd_vv_nxv4i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1570,10 +1386,8 @@ define @vmadd_vx_nxv4i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i32( %m, %y, %a, i32 %evl) ret %u } @@ -1586,11 +1400,9 @@ define @vmadd_vx_nxv4i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1601,10 +1413,8 @@ define @vmadd_vv_nxv4i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i32( %m, %y, %a, i32 %evl) ret %u } @@ -1618,10 +1428,8 @@ define @vmadd_vx_nxv4i32_ta( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i32( %m, %y, %a, i32 %evl) ret %u } @@ -1639,10 +1447,8 @@ define @vmadd_vv_nxv8i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i32( %m, %y, %a, i32 %evl) ret %u } @@ -1655,11 +1461,9 @@ define @vmadd_vv_nxv8i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1671,10 +1475,8 @@ define @vmadd_vx_nxv8i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i32( %m, %y, %a, i32 %evl) ret %u } @@ -1687,11 +1489,9 @@ define @vmadd_vx_nxv8i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1702,10 +1502,8 @@ define @vmadd_vv_nxv8i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i32( %m, %y, %a, i32 %evl) ret %u } @@ -1719,10 +1517,8 @@ define @vmadd_vx_nxv8i32_ta( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i32( %m, %y, %a, i32 %evl) ret %u } @@ -1741,10 +1537,8 @@ define @vmadd_vv_nxv16i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i32( %m, %y, %a, i32 %evl) ret %u } @@ -1758,11 +1552,9 @@ define @vmadd_vv_nxv16i32_unmasked( %a, < ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma ; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1774,10 +1566,8 @@ define @vmadd_vx_nxv16i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i32( %m, %y, %a, i32 %evl) ret %u } @@ -1790,11 +1580,9 @@ define @vmadd_vx_nxv16i32_unmasked( %a, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1806,10 +1594,8 @@ define @vmadd_vv_nxv16i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i32( %m, %y, %a, i32 %evl) ret %u } @@ -1823,10 +1609,8 @@ define @vmadd_vx_nxv16i32_ta( %a, i32 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv16i32( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv16i32( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i32( %m, %y, %a, i32 %evl) ret %u } @@ -1844,10 +1628,8 @@ define @vmadd_vv_nxv1i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i64( %m, %y, %a, i32 %evl) ret %u } @@ -1860,11 +1642,9 @@ define @vmadd_vv_nxv1i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1892,10 +1672,8 @@ define @vmadd_vx_nxv1i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i64( %m, %y, %a, i32 %evl) ret %u } @@ -1924,11 +1702,9 @@ define @vmadd_vx_nxv1i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -1939,10 +1715,8 @@ define @vmadd_vv_nxv1i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i64( %m, %y, %a, i32 %evl) ret %u } @@ -1971,10 +1745,8 @@ define @vmadd_vx_nxv1i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv1i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv1i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i64( %m, %y, %a, i32 %evl) ret %u } @@ -1992,10 +1764,8 @@ define @vmadd_vv_nxv2i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i64( %m, %y, %a, i32 %evl) ret %u } @@ -2008,11 +1778,9 @@ define @vmadd_vv_nxv2i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -2040,10 +1808,8 @@ define @vmadd_vx_nxv2i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i64( %m, %y, %a, i32 %evl) ret %u } @@ -2072,11 +1838,9 @@ define @vmadd_vx_nxv2i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -2087,10 +1851,8 @@ define @vmadd_vv_nxv2i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i64( %m, %y, %a, i32 %evl) ret %u } @@ -2119,10 +1881,8 @@ define @vmadd_vx_nxv2i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv2i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv2i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i64( %m, %y, %a, i32 %evl) ret %u } @@ -2140,10 +1900,8 @@ define @vmadd_vv_nxv4i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i64( %m, %y, %a, i32 %evl) ret %u } @@ -2156,11 +1914,9 @@ define @vmadd_vv_nxv4i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -2188,10 +1944,8 @@ define @vmadd_vx_nxv4i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i64( %m, %y, %a, i32 %evl) ret %u } @@ -2220,11 +1974,9 @@ define @vmadd_vx_nxv4i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -2235,10 +1987,8 @@ define @vmadd_vv_nxv4i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i64( %m, %y, %a, i32 %evl) ret %u } @@ -2267,10 +2017,8 @@ define @vmadd_vx_nxv4i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv4i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv4i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i64( %m, %y, %a, i32 %evl) ret %u } @@ -2289,10 +2037,8 @@ define @vmadd_vv_nxv8i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i64( %m, %y, %a, i32 %evl) ret %u } @@ -2306,11 +2052,9 @@ define @vmadd_vv_nxv8i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -2338,10 +2082,8 @@ define @vmadd_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i64( %m, %y, %a, i32 %evl) ret %u } @@ -2370,11 +2112,9 @@ define @vmadd_vx_nxv8i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %a, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( splat (i1 -1), %y, %a, i32 %evl) ret %u } @@ -2386,10 +2126,8 @@ define @vmadd_vv_nxv8i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i64( %m, %y, %a, i32 %evl) ret %u } @@ -2418,10 +2156,8 @@ define @vmadd_vx_nxv8i64_ta( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.add.nxv8i64( %x, %c, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.add.nxv8i64( %x, %c, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i64( %m, %y, %a, i32 %evl) ret %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll index d243c89958c54..91ea33733560b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll @@ -159,9 +159,7 @@ define @vmnand_vv_nxv1i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -172,9 +170,7 @@ define @vmnand_vv_nxv2i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -185,9 +181,7 @@ define @vmnand_vv_nxv4i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -198,9 +192,7 @@ define @vmnand_vv_nxv8i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -211,9 +203,7 @@ define @vmnand_vv_nxv16i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -224,9 +214,7 @@ define @vmnor_vv_nxv1i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -237,9 +225,7 @@ define @vmnor_vv_nxv2i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -250,9 +236,7 @@ define @vmnor_vv_nxv4i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -263,9 +247,7 @@ define @vmnor_vv_nxv8i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -276,9 +258,7 @@ define @vmnor_vv_nxv16i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -289,9 +269,7 @@ define @vmxnor_vv_nxv1i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -302,9 +280,7 @@ define @vmxnor_vv_nxv2i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -315,9 +291,7 @@ define @vmxnor_vv_nxv4i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -328,9 +302,7 @@ define @vmxnor_vv_nxv8i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -341,9 +313,7 @@ define @vmxnor_vv_nxv16i1( %va, %va, %vb - %head = insertelement poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vc, %splat + %not = xor %vc, splat (i1 1) ret %not } @@ -353,9 +323,7 @@ define @vmandn_vv_nxv1i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = and %va, %not ret %vc } @@ -366,9 +334,7 @@ define @vmandn_vv_nxv2i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = and %va, %not ret %vc } @@ -379,9 +345,7 @@ define @vmandn_vv_nxv4i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = and %va, %not ret %vc } @@ -392,9 +356,7 @@ define @vmandn_vv_nxv8i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = and %va, %not ret %vc } @@ -405,9 +367,7 @@ define @vmandn_vv_nxv16i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = and %va, %not ret %vc } @@ -418,9 +378,7 @@ define @vmorn_vv_nxv1i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = or %va, %not ret %vc } @@ -431,9 +389,7 @@ define @vmorn_vv_nxv2i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = or %va, %not ret %vc } @@ -444,9 +400,7 @@ define @vmorn_vv_nxv4i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = or %va, %not ret %vc } @@ -457,9 +411,7 @@ define @vmorn_vv_nxv8i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = or %va, %not ret %vc } @@ -470,9 +422,7 @@ define @vmorn_vv_nxv16i1( %va, poison, i1 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %not = xor %vb, %splat + %not = xor %vb, splat (i1 1) %vc = or %va, %not ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll index 1247f3d29c099..52720755dd5b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll @@ -33,10 +33,8 @@ define @vmax_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -71,10 +69,8 @@ define @vmax_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -109,10 +105,8 @@ define @vmax_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -147,10 +141,8 @@ define @vmax_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -185,10 +177,8 @@ define @vmax_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -223,10 +213,8 @@ define @vmax_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -261,10 +249,8 @@ define @vmax_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -299,10 +285,8 @@ define @vmax_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -337,10 +321,8 @@ define @vmax_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -375,10 +357,8 @@ define @vmax_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -413,10 +393,8 @@ define @vmax_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -451,10 +429,8 @@ define @vmax_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -489,10 +465,8 @@ define @vmax_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -527,10 +501,8 @@ define @vmax_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -565,10 +537,8 @@ define @vmax_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -603,10 +573,8 @@ define @vmax_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -641,10 +609,8 @@ define @vmax_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -679,10 +645,8 @@ define @vmax_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -730,10 +694,8 @@ define @vmax_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -781,10 +743,8 @@ define @vmax_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -832,10 +792,8 @@ define @vmax_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -883,9 +841,7 @@ define @vmax_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmax.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp sgt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp sgt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index c69f5fdb5b711..a35fc874065a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -39,9 +39,7 @@ define @vmax_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -77,9 +75,7 @@ define @vmax_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -101,9 +97,7 @@ define @vmax_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -127,9 +121,7 @@ define @vmax_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -151,9 +143,7 @@ define @vmax_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -177,9 +167,7 @@ define @vmax_vx_nxv3i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -201,9 +189,7 @@ define @vmax_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -227,9 +213,7 @@ define @vmax_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -251,9 +235,7 @@ define @vmax_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -277,9 +259,7 @@ define @vmax_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -301,9 +281,7 @@ define @vmax_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -327,9 +305,7 @@ define @vmax_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -351,9 +327,7 @@ define @vmax_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -377,9 +351,7 @@ define @vmax_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -401,9 +373,7 @@ define @vmax_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -427,9 +397,7 @@ define @vmax_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -485,9 +453,7 @@ define @vmax_vx_nxv128i8_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv128i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -509,9 +475,7 @@ define @vmax_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -535,9 +499,7 @@ define @vmax_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -559,9 +521,7 @@ define @vmax_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -585,9 +545,7 @@ define @vmax_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -609,9 +567,7 @@ define @vmax_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -635,9 +591,7 @@ define @vmax_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -659,9 +613,7 @@ define @vmax_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -685,9 +637,7 @@ define @vmax_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -709,9 +659,7 @@ define @vmax_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -735,9 +683,7 @@ define @vmax_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -759,9 +705,7 @@ define @vmax_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -785,9 +729,7 @@ define @vmax_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -809,9 +751,7 @@ define @vmax_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -835,9 +775,7 @@ define @vmax_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -859,9 +797,7 @@ define @vmax_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -885,9 +821,7 @@ define @vmax_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -909,9 +843,7 @@ define @vmax_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -935,9 +867,7 @@ define @vmax_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -959,9 +889,7 @@ define @vmax_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -985,9 +913,7 @@ define @vmax_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1009,9 +935,7 @@ define @vmax_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmax.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1035,9 +959,7 @@ define @vmax_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1094,9 +1016,7 @@ define @vmax_vx_nxv32i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv32i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1194,9 +1114,7 @@ define @vmax_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1248,9 +1166,7 @@ define @vmax_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1272,9 +1188,7 @@ define @vmax_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1326,9 +1240,7 @@ define @vmax_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1350,9 +1262,7 @@ define @vmax_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1404,9 +1314,7 @@ define @vmax_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1428,9 +1336,7 @@ define @vmax_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1482,8 +1388,6 @@ define @vmax_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smax.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smax.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll index a6693fad8dd5f..8eb70fbc91fa5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll @@ -33,10 +33,8 @@ define @vmax_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -71,10 +69,8 @@ define @vmax_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -109,10 +105,8 @@ define @vmax_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -147,10 +141,8 @@ define @vmax_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -185,10 +177,8 @@ define @vmax_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -223,10 +213,8 @@ define @vmax_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -261,10 +249,8 @@ define @vmax_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -299,10 +285,8 @@ define @vmax_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -337,10 +321,8 @@ define @vmax_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -375,10 +357,8 @@ define @vmax_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -413,10 +393,8 @@ define @vmax_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -451,10 +429,8 @@ define @vmax_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -489,10 +465,8 @@ define @vmax_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -527,10 +501,8 @@ define @vmax_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -565,10 +537,8 @@ define @vmax_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -603,10 +573,8 @@ define @vmax_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -641,10 +609,8 @@ define @vmax_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -679,10 +645,8 @@ define @vmax_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -730,10 +694,8 @@ define @vmax_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -781,10 +743,8 @@ define @vmax_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -832,10 +792,8 @@ define @vmax_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -883,10 +841,8 @@ define @vmax_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ugt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ugt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -928,9 +884,7 @@ define @vmax_vi_mask_nxv8i32( %va, poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %splat, zeroinitializer + %vs = select %mask, splat (i32 -3), zeroinitializer %cmp = icmp ugt %va, %vs %vc = select %cmp, %va, %vs ret %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index a7fce573da9fe..1f620a44dbbc8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -41,9 +41,7 @@ define @vmaxu_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -79,9 +77,7 @@ define @vmaxu_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -103,9 +99,7 @@ define @vmaxu_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -129,9 +123,7 @@ define @vmaxu_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -153,9 +145,7 @@ define @vmaxu_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -179,9 +169,7 @@ define @vmaxu_vx_nxv3i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -203,9 +191,7 @@ define @vmaxu_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -229,9 +215,7 @@ define @vmaxu_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -253,9 +237,7 @@ define @vmaxu_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -279,9 +261,7 @@ define @vmaxu_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -303,9 +283,7 @@ define @vmaxu_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -329,9 +307,7 @@ define @vmaxu_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -353,9 +329,7 @@ define @vmaxu_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -379,9 +353,7 @@ define @vmaxu_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -403,9 +375,7 @@ define @vmaxu_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -429,9 +399,7 @@ define @vmaxu_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -487,9 +455,7 @@ define @vmaxu_vx_nxv128i8_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv128i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -511,9 +477,7 @@ define @vmaxu_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -537,9 +501,7 @@ define @vmaxu_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -561,9 +523,7 @@ define @vmaxu_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -587,9 +547,7 @@ define @vmaxu_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -611,9 +569,7 @@ define @vmaxu_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -637,9 +593,7 @@ define @vmaxu_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -661,9 +615,7 @@ define @vmaxu_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -687,9 +639,7 @@ define @vmaxu_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -711,9 +661,7 @@ define @vmaxu_vv_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -737,9 +685,7 @@ define @vmaxu_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -761,9 +707,7 @@ define @vmaxu_vv_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -787,9 +731,7 @@ define @vmaxu_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -811,9 +753,7 @@ define @vmaxu_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -837,9 +777,7 @@ define @vmaxu_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -861,9 +799,7 @@ define @vmaxu_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -887,9 +823,7 @@ define @vmaxu_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -911,9 +845,7 @@ define @vmaxu_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -937,9 +869,7 @@ define @vmaxu_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -961,9 +891,7 @@ define @vmaxu_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -987,9 +915,7 @@ define @vmaxu_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1011,9 +937,7 @@ define @vmaxu_vv_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmaxu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1037,9 +961,7 @@ define @vmaxu_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1096,9 +1018,7 @@ define @vmaxu_vx_nxv32i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv32i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1196,9 +1116,7 @@ define @vmaxu_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1250,9 +1168,7 @@ define @vmaxu_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1274,9 +1190,7 @@ define @vmaxu_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1328,9 +1242,7 @@ define @vmaxu_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1352,9 +1264,7 @@ define @vmaxu_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1406,9 +1316,7 @@ define @vmaxu_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1430,9 +1338,7 @@ define @vmaxu_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1484,8 +1390,6 @@ define @vmaxu_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umax.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umax.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll index 7405282a07b70..7f526a21deac1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll @@ -33,10 +33,8 @@ define @vmin_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -71,10 +69,8 @@ define @vmin_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -109,10 +105,8 @@ define @vmin_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -147,10 +141,8 @@ define @vmin_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -185,10 +177,8 @@ define @vmin_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -223,10 +213,8 @@ define @vmin_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -261,10 +249,8 @@ define @vmin_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -299,10 +285,8 @@ define @vmin_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -337,10 +321,8 @@ define @vmin_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -375,10 +357,8 @@ define @vmin_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -413,10 +393,8 @@ define @vmin_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -451,10 +429,8 @@ define @vmin_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -489,10 +465,8 @@ define @vmin_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -527,10 +501,8 @@ define @vmin_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -565,10 +537,8 @@ define @vmin_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -603,10 +573,8 @@ define @vmin_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -641,10 +609,8 @@ define @vmin_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -679,10 +645,8 @@ define @vmin_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -730,10 +694,8 @@ define @vmin_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -781,10 +743,8 @@ define @vmin_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -832,10 +792,8 @@ define @vmin_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -883,9 +841,7 @@ define @vmin_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmin.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp slt %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp slt %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 95c5cda5e988e..8fabf93356aeb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -39,9 +39,7 @@ define @vmin_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -77,9 +75,7 @@ define @vmin_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -101,9 +97,7 @@ define @vmin_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -127,9 +121,7 @@ define @vmin_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -151,9 +143,7 @@ define @vmin_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -177,9 +167,7 @@ define @vmin_vx_nxv3i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -201,9 +189,7 @@ define @vmin_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -227,9 +213,7 @@ define @vmin_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -251,9 +235,7 @@ define @vmin_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -277,9 +259,7 @@ define @vmin_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -301,9 +281,7 @@ define @vmin_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -327,9 +305,7 @@ define @vmin_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -351,9 +327,7 @@ define @vmin_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -377,9 +351,7 @@ define @vmin_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -401,9 +373,7 @@ define @vmin_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -427,9 +397,7 @@ define @vmin_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -485,9 +453,7 @@ define @vmin_vx_nxv128i8_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv128i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -509,9 +475,7 @@ define @vmin_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -535,9 +499,7 @@ define @vmin_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -559,9 +521,7 @@ define @vmin_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -585,9 +545,7 @@ define @vmin_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -609,9 +567,7 @@ define @vmin_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -635,9 +591,7 @@ define @vmin_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -659,9 +613,7 @@ define @vmin_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -685,9 +637,7 @@ define @vmin_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -709,9 +659,7 @@ define @vmin_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -735,9 +683,7 @@ define @vmin_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -759,9 +705,7 @@ define @vmin_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -785,9 +729,7 @@ define @vmin_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -809,9 +751,7 @@ define @vmin_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -835,9 +775,7 @@ define @vmin_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -859,9 +797,7 @@ define @vmin_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -885,9 +821,7 @@ define @vmin_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -909,9 +843,7 @@ define @vmin_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -935,9 +867,7 @@ define @vmin_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -959,9 +889,7 @@ define @vmin_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -985,9 +913,7 @@ define @vmin_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1009,9 +935,7 @@ define @vmin_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmin.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1035,9 +959,7 @@ define @vmin_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1094,9 +1016,7 @@ define @vmin_vx_nxv32i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv32i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1194,9 +1114,7 @@ define @vmin_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1248,9 +1166,7 @@ define @vmin_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1272,9 +1188,7 @@ define @vmin_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1326,9 +1240,7 @@ define @vmin_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1350,9 +1262,7 @@ define @vmin_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1404,9 +1314,7 @@ define @vmin_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1428,9 +1336,7 @@ define @vmin_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1482,8 +1388,6 @@ define @vmin_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.smin.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.smin.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll index 2fbe87742a4c4..d22a7dcccf0ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll @@ -33,10 +33,8 @@ define @vmin_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -71,10 +69,8 @@ define @vmin_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -109,10 +105,8 @@ define @vmin_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -147,10 +141,8 @@ define @vmin_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -185,10 +177,8 @@ define @vmin_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -223,10 +213,8 @@ define @vmin_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -261,10 +249,8 @@ define @vmin_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i8 -3) + %vc = select %cmp, %va, splat (i8 -3) ret %vc } @@ -299,10 +285,8 @@ define @vmin_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -337,10 +321,8 @@ define @vmin_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -375,10 +357,8 @@ define @vmin_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -413,10 +393,8 @@ define @vmin_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -451,10 +429,8 @@ define @vmin_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -489,10 +465,8 @@ define @vmin_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i16 -3) + %vc = select %cmp, %va, splat (i16 -3) ret %vc } @@ -527,10 +501,8 @@ define @vmin_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -565,10 +537,8 @@ define @vmin_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -603,10 +573,8 @@ define @vmin_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -641,10 +609,8 @@ define @vmin_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -679,10 +645,8 @@ define @vmin_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i32 -3) + %vc = select %cmp, %va, splat (i32 -3) ret %vc } @@ -730,10 +694,8 @@ define @vmin_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -781,10 +743,8 @@ define @vmin_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -832,10 +792,8 @@ define @vmin_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -883,10 +841,8 @@ define @vmin_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vminu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %cmp = icmp ult %va, %splat - %vc = select %cmp, %va, %splat + %cmp = icmp ult %va, splat (i64 -3) + %vc = select %cmp, %va, splat (i64 -3) ret %vc } @@ -898,9 +854,7 @@ define @vmin_vv_mask_nxv8i32( %va, poison, i32 -1, i32 0 - %max = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %max + %vs = select %mask, %vb, splat (i32 -1) %cmp = icmp ult %va, %vs %vc = select %cmp, %va, %vs ret %vc @@ -914,11 +868,9 @@ define @vmin_vx_mask_nxv8i32( %va, i32 sign ; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 ; CHECK-NEXT: vminu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head0 = insertelement poison, i32 -1, i32 0 - %max = shufflevector %head0, poison, zeroinitializer %head1 = insertelement poison, i32 %b, i32 0 %splat = shufflevector %head1, poison, zeroinitializer - %vs = select %mask, %splat, %max + %vs = select %mask, %splat, splat (i32 -1) %cmp = icmp ult %va, %vs %vc = select %cmp, %va, %vs ret %vc @@ -932,11 +884,7 @@ define @vmin_vi_mask_nxv8i32( %va, poison, i32 -1, i32 0 - %max = shufflevector %head0, poison, zeroinitializer - %head1 = insertelement poison, i32 -3, i32 0 - %splat = shufflevector %head1, poison, zeroinitializer - %vs = select %mask, %splat, %max + %vs = select %mask, splat (i32 -3), splat (i32 -1) %cmp = icmp ult %va, %vs %vc = select %cmp, %va, %vs ret %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index d3d5d6ece9b41..8ec85e545a0f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -41,9 +41,7 @@ define @vminu_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -79,9 +77,7 @@ define @vminu_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -103,9 +99,7 @@ define @vminu_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -129,9 +123,7 @@ define @vminu_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -153,9 +145,7 @@ define @vminu_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -179,9 +169,7 @@ define @vminu_vx_nxv3i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -203,9 +191,7 @@ define @vminu_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -229,9 +215,7 @@ define @vminu_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -253,9 +237,7 @@ define @vminu_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -279,9 +261,7 @@ define @vminu_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -303,9 +283,7 @@ define @vminu_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -329,9 +307,7 @@ define @vminu_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -353,9 +329,7 @@ define @vminu_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -379,9 +353,7 @@ define @vminu_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -403,9 +375,7 @@ define @vminu_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -429,9 +399,7 @@ define @vminu_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -487,9 +455,7 @@ define @vminu_vx_nxv128i8_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv128i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -511,9 +477,7 @@ define @vminu_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -537,9 +501,7 @@ define @vminu_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -561,9 +523,7 @@ define @vminu_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -587,9 +547,7 @@ define @vminu_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -611,9 +569,7 @@ define @vminu_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -637,9 +593,7 @@ define @vminu_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -661,9 +615,7 @@ define @vminu_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -687,9 +639,7 @@ define @vminu_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -711,9 +661,7 @@ define @vminu_vv_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -737,9 +685,7 @@ define @vminu_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -761,9 +707,7 @@ define @vminu_vv_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -787,9 +731,7 @@ define @vminu_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -811,9 +753,7 @@ define @vminu_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -837,9 +777,7 @@ define @vminu_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -861,9 +799,7 @@ define @vminu_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -887,9 +823,7 @@ define @vminu_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -911,9 +845,7 @@ define @vminu_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -937,9 +869,7 @@ define @vminu_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -961,9 +891,7 @@ define @vminu_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -987,9 +915,7 @@ define @vminu_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1011,9 +937,7 @@ define @vminu_vv_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vminu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1037,9 +961,7 @@ define @vminu_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1096,9 +1018,7 @@ define @vminu_vx_nxv32i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv32i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1196,9 +1116,7 @@ define @vminu_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1250,9 +1168,7 @@ define @vminu_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1274,9 +1190,7 @@ define @vminu_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1328,9 +1242,7 @@ define @vminu_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1352,9 +1264,7 @@ define @vminu_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1406,9 +1316,7 @@ define @vminu_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1430,9 +1338,7 @@ define @vminu_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1484,8 +1390,6 @@ define @vminu_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.umin.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.umin.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll index 3e14058210e51..1a6d5a1d0029d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll @@ -34,9 +34,7 @@ define @vmul_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i8 -7) ret %vc } @@ -69,9 +67,7 @@ define @vmul_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i8 -7) ret %vc } @@ -104,9 +100,7 @@ define @vmul_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i8 -7) ret %vc } @@ -139,9 +133,7 @@ define @vmul_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i8 -7) ret %vc } @@ -174,9 +166,7 @@ define @vmul_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i8 -7) ret %vc } @@ -209,9 +199,7 @@ define @vmul_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i8 -7) ret %vc } @@ -244,9 +232,7 @@ define @vmul_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i8 -7) ret %vc } @@ -279,9 +265,7 @@ define @vmul_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i16 -7) ret %vc } @@ -314,9 +298,7 @@ define @vmul_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i16 -7) ret %vc } @@ -349,9 +331,7 @@ define @vmul_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i16 -7) ret %vc } @@ -384,9 +364,7 @@ define @vmul_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i16 -7) ret %vc } @@ -419,9 +397,7 @@ define @vmul_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i16 -7) ret %vc } @@ -454,9 +430,7 @@ define @vmul_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i16 -7) ret %vc } @@ -489,9 +463,7 @@ define @vmul_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i32 -7) ret %vc } @@ -524,9 +496,7 @@ define @vmul_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i32 -7) ret %vc } @@ -559,9 +529,7 @@ define @vmul_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i32 -7) ret %vc } @@ -594,9 +562,7 @@ define @vmul_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i32 -7) ret %vc } @@ -629,9 +595,7 @@ define @vmul_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i32 -7) ret %vc } @@ -677,9 +641,7 @@ define @vmul_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 -7) ret %vc } @@ -689,9 +651,7 @@ define @vmul_vi_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 2) ret %vc } @@ -701,9 +661,7 @@ define @vmul_vi_nxv1i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 16) ret %vc } @@ -749,9 +707,7 @@ define @vmul_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 -7) ret %vc } @@ -761,9 +717,7 @@ define @vmul_vi_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 2) ret %vc } @@ -773,9 +727,7 @@ define @vmul_vi_nxv2i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 16) ret %vc } @@ -821,9 +773,7 @@ define @vmul_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 -7) ret %vc } @@ -833,9 +783,7 @@ define @vmul_vi_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 2) ret %vc } @@ -845,9 +793,7 @@ define @vmul_vi_nxv4i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 16) ret %vc } @@ -893,9 +839,7 @@ define @vmul_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmul.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 -7) ret %vc } @@ -905,9 +849,7 @@ define @vmul_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 2) ret %vc } @@ -917,9 +859,7 @@ define @vmul_vi_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = mul %va, %splat + %vc = mul %va, splat (i64 16) ret %vc } @@ -967,9 +907,7 @@ define @vmul_vv_mask_nxv8i32( %va, poison, i32 1, i32 0 - %one = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %vb, %one + %vs = select %mask, %vb, splat (i32 1) %vc = mul %va, %vs ret %vc } @@ -980,11 +918,9 @@ define @vmul_vx_mask_nxv8i32( %va, i32 sign ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu ; CHECK-NEXT: vmul.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 1, i32 0 - %one = shufflevector %head1, poison, zeroinitializer %head2 = insertelement poison, i32 %b, i32 0 %splat = shufflevector %head2, poison, zeroinitializer - %vs = select %mask, %splat, %one + %vs = select %mask, %splat, splat (i32 1) %vc = mul %va, %vs ret %vc } @@ -996,11 +932,7 @@ define @vmul_vi_mask_nxv8i32( %va, poison, i32 1, i32 0 - %one = shufflevector %head1, poison, zeroinitializer - %head2 = insertelement poison, i32 7, i32 0 - %splat = shufflevector %head2, poison, zeroinitializer - %vs = select %mask, %splat, %one + %vs = select %mask, splat (i32 7), splat (i32 1) %vc = mul %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll index 30ff90d8aa481..24d1768da1027 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll @@ -36,9 +36,7 @@ define @vmul_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -62,9 +60,7 @@ define @vmul_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -86,9 +82,7 @@ define @vmul_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -112,9 +106,7 @@ define @vmul_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -136,9 +128,7 @@ define @vmul_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -162,9 +152,7 @@ define @vmul_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -186,9 +174,7 @@ define @vmul_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -212,9 +198,7 @@ define @vmul_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -236,9 +220,7 @@ define @vmul_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -262,9 +244,7 @@ define @vmul_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -286,9 +266,7 @@ define @vmul_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -312,9 +290,7 @@ define @vmul_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -336,9 +312,7 @@ define @vmul_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -362,9 +336,7 @@ define @vmul_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -386,9 +358,7 @@ define @vmul_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -412,9 +382,7 @@ define @vmul_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -436,9 +404,7 @@ define @vmul_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -462,9 +428,7 @@ define @vmul_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -486,9 +450,7 @@ define @vmul_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -512,9 +474,7 @@ define @vmul_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -536,9 +496,7 @@ define @vmul_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -562,9 +520,7 @@ define @vmul_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -586,9 +542,7 @@ define @vmul_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -612,9 +566,7 @@ define @vmul_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -636,9 +588,7 @@ define @vmul_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -662,9 +612,7 @@ define @vmul_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -686,9 +634,7 @@ define @vmul_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -712,9 +658,7 @@ define @vmul_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -736,9 +680,7 @@ define @vmul_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -762,9 +704,7 @@ define @vmul_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -786,9 +726,7 @@ define @vmul_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -812,9 +750,7 @@ define @vmul_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -836,9 +772,7 @@ define @vmul_vv_nxv7i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv7i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv7i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -862,9 +796,7 @@ define @vmul_vx_nxv7i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv7i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv7i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -886,9 +818,7 @@ define @vmul_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -912,9 +842,7 @@ define @vmul_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -936,9 +864,7 @@ define @vmul_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -974,9 +900,7 @@ define @vmul_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -998,9 +922,7 @@ define @vmul_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1052,9 +974,7 @@ define @vmul_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1076,9 +996,7 @@ define @vmul_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1130,9 +1048,7 @@ define @vmul_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1154,9 +1070,7 @@ define @vmul_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1208,9 +1122,7 @@ define @vmul_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1232,9 +1144,7 @@ define @vmul_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1286,8 +1196,6 @@ define @vmul_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.mul.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.mul.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll index 0fda7909df313..253cfb040308b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll @@ -17,9 +17,7 @@ define @srem_eq_fold_nxv4i8( %va) { ; CHECK-NEXT: vor.vv v8, v9, v8 ; CHECK-NEXT: vmsleu.vx v0, v8, a0 ; CHECK-NEXT: ret - %head_six = insertelement poison, i8 6, i32 0 - %splat_six = shufflevector %head_six, poison, zeroinitializer - %rem = srem %va, %splat_six + %rem = srem %va, splat (i8 6) %cc = icmp eq %rem, zeroinitializer ret %cc @@ -34,9 +32,7 @@ define @vmulh_vv_nxv1i32( %va, %vb to %vd = sext %va to %ve = mul %vc, %vd - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vf = lshr %ve, %splat + %vf = lshr %ve, splat (i64 32) %vg = trunc %vf to ret %vg } @@ -52,9 +48,7 @@ define @vmulh_vx_nxv1i32( %va, i32 %x) { %vb = sext %splat1 to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -66,14 +60,10 @@ define @vmulh_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -7, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = sext %splat1 to + %vb = sext splat (i32 -7) to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -85,14 +75,10 @@ define @vmulh_vi_nxv1i32_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 16, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = sext %splat1 to + %vb = sext splat (i32 16) to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -106,9 +92,7 @@ define @vmulh_vv_nxv2i32( %va, %vb to %vd = sext %va to %ve = mul %vc, %vd - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vf = lshr %ve, %splat + %vf = lshr %ve, splat (i64 32) %vg = trunc %vf to ret %vg } @@ -124,9 +108,7 @@ define @vmulh_vx_nxv2i32( %va, i32 %x) { %vb = sext %splat1 to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -138,14 +120,10 @@ define @vmulh_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -7, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = sext %splat1 to + %vb = sext splat (i32 -7) to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -157,14 +135,10 @@ define @vmulh_vi_nxv2i32_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 16, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = sext %splat1 to + %vb = sext splat (i32 16) to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -178,9 +152,7 @@ define @vmulh_vv_nxv4i32( %va, %vb to %vd = sext %va to %ve = mul %vc, %vd - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vf = lshr %ve, %splat + %vf = lshr %ve, splat (i64 32) %vg = trunc %vf to ret %vg } @@ -196,9 +168,7 @@ define @vmulh_vx_nxv4i32( %va, i32 %x) { %vb = sext %splat1 to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -210,14 +180,10 @@ define @vmulh_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -7, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = sext %splat1 to + %vb = sext splat (i32 -7) to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -229,14 +195,10 @@ define @vmulh_vi_nxv4i32_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 16, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = sext %splat1 to + %vb = sext splat (i32 16) to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -250,9 +212,7 @@ define @vmulh_vv_nxv8i32( %va, %vb to %vd = sext %va to %ve = mul %vc, %vd - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vf = lshr %ve, %splat + %vf = lshr %ve, splat (i64 32) %vg = trunc %vf to ret %vg } @@ -268,9 +228,7 @@ define @vmulh_vx_nxv8i32( %va, i32 %x) { %vb = sext %splat1 to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -282,14 +240,10 @@ define @vmulh_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -7, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = sext %splat1 to + %vb = sext splat (i32 -7) to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -301,14 +255,10 @@ define @vmulh_vi_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmulh.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 16, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = sext %splat1 to + %vb = sext splat (i32 16) to %vc = sext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll index 5354c17fd2a7d..3fd7f5be860cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll @@ -11,9 +11,7 @@ define @vmulhu_vv_nxv1i32( %va, %vb to %vd = zext %va to %ve = mul %vc, %vd - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vf = lshr %ve, %splat + %vf = lshr %ve, splat (i64 32) %vg = trunc %vf to ret %vg } @@ -29,9 +27,7 @@ define @vmulhu_vx_nxv1i32( %va, i32 %x) { %vb = zext %splat1 to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -43,14 +39,10 @@ define @vmulhu_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -7, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = zext %splat1 to + %vb = zext splat (i32 -7) to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -68,14 +60,10 @@ define @vmulhu_vi_nxv1i32_1( %va) { ; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; RV64-NEXT: vmulhu.vx v8, v8, a0 ; RV64-NEXT: ret - %head1 = insertelement poison, i32 16, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = zext %splat1 to + %vb = zext splat (i32 16) to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -89,9 +77,7 @@ define @vmulhu_vv_nxv2i32( %va, %vb to %vd = zext %va to %ve = mul %vc, %vd - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vf = lshr %ve, %splat + %vf = lshr %ve, splat (i64 32) %vg = trunc %vf to ret %vg } @@ -107,9 +93,7 @@ define @vmulhu_vx_nxv2i32( %va, i32 %x) { %vb = zext %splat1 to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -121,14 +105,10 @@ define @vmulhu_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -7, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = zext %splat1 to + %vb = zext splat (i32 -7) to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -146,14 +126,10 @@ define @vmulhu_vi_nxv2i32_1( %va) { ; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV64-NEXT: vmulhu.vx v8, v8, a0 ; RV64-NEXT: ret - %head1 = insertelement poison, i32 16, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = zext %splat1 to + %vb = zext splat (i32 16) to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -167,9 +143,7 @@ define @vmulhu_vv_nxv4i32( %va, %vb to %vd = zext %va to %ve = mul %vc, %vd - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vf = lshr %ve, %splat + %vf = lshr %ve, splat (i64 32) %vg = trunc %vf to ret %vg } @@ -185,9 +159,7 @@ define @vmulhu_vx_nxv4i32( %va, i32 %x) { %vb = zext %splat1 to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -199,14 +171,10 @@ define @vmulhu_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -7, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = zext %splat1 to + %vb = zext splat (i32 -7) to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -224,14 +192,10 @@ define @vmulhu_vi_nxv4i32_1( %va) { ; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV64-NEXT: vmulhu.vx v8, v8, a0 ; RV64-NEXT: ret - %head1 = insertelement poison, i32 16, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = zext %splat1 to + %vb = zext splat (i32 16) to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -245,9 +209,7 @@ define @vmulhu_vv_nxv8i32( %va, %vb to %vd = zext %va to %ve = mul %vc, %vd - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vf = lshr %ve, %splat + %vf = lshr %ve, splat (i64 32) %vg = trunc %vf to ret %vg } @@ -263,9 +225,7 @@ define @vmulhu_vx_nxv8i32( %va, i32 %x) { %vb = zext %splat1 to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -277,14 +237,10 @@ define @vmulhu_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: ret - %head1 = insertelement poison, i32 -7, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = zext %splat1 to + %vb = zext splat (i32 -7) to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } @@ -302,14 +258,10 @@ define @vmulhu_vi_nxv8i32_1( %va) { ; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV64-NEXT: vmulhu.vx v8, v8, a0 ; RV64-NEXT: ret - %head1 = insertelement poison, i32 16, i32 0 - %splat1 = shufflevector %head1, poison, zeroinitializer - %vb = zext %splat1 to + %vb = zext splat (i32 16) to %vc = zext %va to %vd = mul %vb, %vc - %head2 = insertelement poison, i64 32, i32 0 - %splat2 = shufflevector %head2, poison, zeroinitializer - %ve = lshr %vd, %splat2 + %ve = lshr %vd, splat (i64 32) %vf = trunc %ve to ret %vf } diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir index 4fa29e174602d..5bb6ce250e8db 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir @@ -8,7 +8,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16 - ; 82 = e32,m4 ; CHECK-LABEL: name: copy_different_lmul ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} @@ -25,7 +24,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16 - ; 82 = e32,m4 ; CHECK-LABEL: name: copy_convert_to_vmv_v_v ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} @@ -42,7 +40,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14 - ; 82 = e32,m4 ; CHECK-LABEL: name: copy_convert_to_vmv_v_i ; CHECK: liveins: $x14 ; CHECK-NEXT: {{ $}} @@ -59,7 +56,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16 - ; 82 = e32,m4 ; CHECK-LABEL: name: copy_from_whole_load_store ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} @@ -76,7 +72,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16 - ; 82 = e32,m4 ; CHECK-LABEL: name: copy_with_vleff ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} @@ -95,8 +90,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16, $x17, $x18 - ; 82 = e32,m4 - ; 73 = e16,m2 ; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_1 ; CHECK: liveins: $x14, $x16, $x17, $x18 ; CHECK-NEXT: {{ $}} @@ -121,8 +114,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16, $x17, $x18 - ; 82 = e32,m4 - ; 73 = e16,m2 ; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_2 ; CHECK: liveins: $x14, $x16, $x17, $x18 ; CHECK-NEXT: {{ $}} @@ -147,8 +138,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16, $x17, $x18 - ; 82 = e32,m4 - ; 73 = e16,m2 ; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_3 ; CHECK: liveins: $x14, $x16, $x17, $x18 ; CHECK-NEXT: {{ $}} @@ -169,7 +158,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x16, $x17 - ; 73 = e16,m2 ; CHECK-LABEL: name: copy_subregister ; CHECK: liveins: $x16, $x17 ; CHECK-NEXT: {{ $}} @@ -191,8 +179,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16 - ; 82 = e32,m4 - ; 74 = e16,m4 ; CHECK-LABEL: name: copy_with_different_vlmax ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} @@ -231,7 +217,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16 - ; 80 = e32,m1 ; CHECK-LABEL: name: copy_zvlsseg_reg ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} @@ -248,14 +233,12 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16 - ; 80 = e32,m1 ; CHECK-LABEL: name: copy_zvlsseg_reg_2 ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v10 = PseudoVMV_V_V_M1 undef $v10, $v8, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v11 = PseudoVMV_V_V_M1 undef $v11, $v9, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v10m2 = VMV2R_V $v8m2 $x15 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype $v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $v10_v11 = COPY $v8_v9 @@ -266,7 +249,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x14, $x16 - ; 87 = e32,mf2 ; CHECK-LABEL: name: copy_fractional_lmul ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} @@ -283,7 +265,6 @@ tracksRegLiveness: true body: | bb.0: liveins: $x12, $x14, $x16 - ; 80 = e32,m1 ; CHECK-LABEL: name: copy_implicit_def ; CHECK: liveins: $x12, $x14, $x16 ; CHECK-NEXT: {{ $}} @@ -291,14 +272,7 @@ body: | ; CHECK-NEXT: $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 undef $v8_v9_v10_v11_v12_v13_v14_v15, killed $x12, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETIVLI 10, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v15 = PseudoVLE32_V_M1 undef $v15, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit killed $v8_v9_v10_v11_v12_v13_v14_v15, implicit-def $v8_v9_v10_v11_v12_v13_v14_v15 - ; CHECK-NEXT: $v24 = VMV1R_V killed $v8 - ; CHECK-NEXT: $v25 = VMV1R_V killed $v9 - ; CHECK-NEXT: $v26 = VMV1R_V killed $v10 - ; CHECK-NEXT: $v27 = VMV1R_V killed $v11 - ; CHECK-NEXT: $v28 = VMV1R_V killed $v12 - ; CHECK-NEXT: $v29 = VMV1R_V killed $v13 - ; CHECK-NEXT: $v30 = VMV1R_V killed $v14 - ; CHECK-NEXT: $v31 = VMV1R_V killed $v15 + ; CHECK-NEXT: $v24m8 = VMV8R_V killed $v8m8 $x0 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 undef $v8_v9_v10_v11_v12_v13_v14_v15, killed $x12, $noreg, 5, 0, implicit $vl, implicit $vtype $x0 = PseudoVSETIVLI 10, 80, implicit-def $vl, implicit-def $vtype diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll index 564f278372cc0..f958fe815caaa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll @@ -16,10 +16,8 @@ define @vnmsac_vv_nxv1i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i8( %m, %y, %c, i32 %evl) ret %u } @@ -31,11 +29,9 @@ define @vnmsac_vv_nxv1i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -48,10 +44,8 @@ define @vnmsac_vx_nxv1i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i8( %m, %y, %c, i32 %evl) ret %u } @@ -65,11 +59,9 @@ define @vnmsac_vx_nxv1i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -80,10 +72,8 @@ define @vnmsac_vv_nxv1i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i8( %m, %y, %c, i32 %evl) ret %u } @@ -97,10 +87,8 @@ define @vnmsac_vx_nxv1i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i8( %m, %y, %c, i32 %evl) ret %u } @@ -117,10 +105,8 @@ define @vnmsac_vv_nxv2i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i8( %m, %y, %c, i32 %evl) ret %u } @@ -132,11 +118,9 @@ define @vnmsac_vv_nxv2i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -149,10 +133,8 @@ define @vnmsac_vx_nxv2i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i8( %m, %y, %c, i32 %evl) ret %u } @@ -166,11 +148,9 @@ define @vnmsac_vx_nxv2i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -181,10 +161,8 @@ define @vnmsac_vv_nxv2i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i8( %m, %y, %c, i32 %evl) ret %u } @@ -198,10 +176,8 @@ define @vnmsac_vx_nxv2i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i8( %m, %y, %c, i32 %evl) ret %u } @@ -218,10 +194,8 @@ define @vnmsac_vv_nxv4i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i8( %m, %y, %c, i32 %evl) ret %u } @@ -233,11 +207,9 @@ define @vnmsac_vv_nxv4i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -250,10 +222,8 @@ define @vnmsac_vx_nxv4i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i8( %m, %y, %c, i32 %evl) ret %u } @@ -267,11 +237,9 @@ define @vnmsac_vx_nxv4i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -282,10 +250,8 @@ define @vnmsac_vv_nxv4i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i8( %m, %y, %c, i32 %evl) ret %u } @@ -299,10 +265,8 @@ define @vnmsac_vx_nxv4i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i8( %m, %y, %c, i32 %evl) ret %u } @@ -319,10 +283,8 @@ define @vnmsac_vv_nxv8i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i8( %m, %y, %c, i32 %evl) ret %u } @@ -334,11 +296,9 @@ define @vnmsac_vv_nxv8i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -351,10 +311,8 @@ define @vnmsac_vx_nxv8i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i8( %m, %y, %c, i32 %evl) ret %u } @@ -368,11 +326,9 @@ define @vnmsac_vx_nxv8i8_unmasked( %a, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -383,10 +339,8 @@ define @vnmsac_vv_nxv8i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i8( %m, %y, %c, i32 %evl) ret %u } @@ -400,10 +354,8 @@ define @vnmsac_vx_nxv8i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i8( %m, %y, %c, i32 %evl) ret %u } @@ -420,10 +372,8 @@ define @vnmsac_vv_nxv16i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i8( %m, %y, %c, i32 %evl) ret %u } @@ -435,11 +385,9 @@ define @vnmsac_vv_nxv16i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -452,10 +400,8 @@ define @vnmsac_vx_nxv16i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i8( %m, %y, %c, i32 %evl) ret %u } @@ -469,11 +415,9 @@ define @vnmsac_vx_nxv16i8_unmasked( %a, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -484,10 +428,8 @@ define @vnmsac_vv_nxv16i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i8( %m, %y, %c, i32 %evl) ret %u } @@ -501,10 +443,8 @@ define @vnmsac_vx_nxv16i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i8( %m, %y, %c, i32 %evl) ret %u } @@ -521,10 +461,8 @@ define @vnmsac_vv_nxv32i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i8( %m, %y, %c, i32 %evl) ret %u } @@ -536,11 +474,9 @@ define @vnmsac_vv_nxv32i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -553,10 +489,8 @@ define @vnmsac_vx_nxv32i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i8( %m, %y, %c, i32 %evl) ret %u } @@ -570,11 +504,9 @@ define @vnmsac_vx_nxv32i8_unmasked( %a, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -585,10 +517,8 @@ define @vnmsac_vv_nxv32i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i8( %m, %y, %c, i32 %evl) ret %u } @@ -602,10 +532,8 @@ define @vnmsac_vx_nxv32i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i8( %m, %y, %c, i32 %evl) ret %u } @@ -623,10 +551,8 @@ define @vnmsac_vv_nxv64i8( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv64i8( %m, %y, %c, i32 %evl) ret %u } @@ -639,11 +565,9 @@ define @vnmsac_vv_nxv64i8_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -656,10 +580,8 @@ define @vnmsac_vx_nxv64i8( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv64i8( %m, %y, %c, i32 %evl) ret %u } @@ -673,11 +595,9 @@ define @vnmsac_vx_nxv64i8_unmasked( %a, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv64i8( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv64i8( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -689,10 +609,8 @@ define @vnmsac_vv_nxv64i8_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv64i8( %m, %y, %c, i32 %evl) ret %u } @@ -706,10 +624,8 @@ define @vnmsac_vx_nxv64i8_ta( %a, i8 %b, poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv64i8( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv64i8( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv64i8( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv64i8( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv64i8( %m, %y, %c, i32 %evl) ret %u } @@ -726,10 +642,8 @@ define @vnmsac_vv_nxv1i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i16( %m, %y, %c, i32 %evl) ret %u } @@ -741,11 +655,9 @@ define @vnmsac_vv_nxv1i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -758,10 +670,8 @@ define @vnmsac_vx_nxv1i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i16( %m, %y, %c, i32 %evl) ret %u } @@ -775,11 +685,9 @@ define @vnmsac_vx_nxv1i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -790,10 +698,8 @@ define @vnmsac_vv_nxv1i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i16( %m, %y, %c, i32 %evl) ret %u } @@ -807,10 +713,8 @@ define @vnmsac_vx_nxv1i16_ta( %a, i16 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i16( %m, %y, %c, i32 %evl) ret %u } @@ -827,10 +731,8 @@ define @vnmsac_vv_nxv2i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i16( %m, %y, %c, i32 %evl) ret %u } @@ -842,11 +744,9 @@ define @vnmsac_vv_nxv2i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -859,10 +759,8 @@ define @vnmsac_vx_nxv2i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i16( %m, %y, %c, i32 %evl) ret %u } @@ -876,11 +774,9 @@ define @vnmsac_vx_nxv2i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -891,10 +787,8 @@ define @vnmsac_vv_nxv2i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i16( %m, %y, %c, i32 %evl) ret %u } @@ -908,10 +802,8 @@ define @vnmsac_vx_nxv2i16_ta( %a, i16 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i16( %m, %y, %c, i32 %evl) ret %u } @@ -928,10 +820,8 @@ define @vnmsac_vv_nxv4i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i16( %m, %y, %c, i32 %evl) ret %u } @@ -943,11 +833,9 @@ define @vnmsac_vv_nxv4i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -960,10 +848,8 @@ define @vnmsac_vx_nxv4i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i16( %m, %y, %c, i32 %evl) ret %u } @@ -977,11 +863,9 @@ define @vnmsac_vx_nxv4i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -992,10 +876,8 @@ define @vnmsac_vv_nxv4i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i16( %m, %y, %c, i32 %evl) ret %u } @@ -1009,10 +891,8 @@ define @vnmsac_vx_nxv4i16_ta( %a, i16 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i16( %m, %y, %c, i32 %evl) ret %u } @@ -1029,10 +909,8 @@ define @vnmsac_vv_nxv8i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i16( %m, %y, %c, i32 %evl) ret %u } @@ -1044,11 +922,9 @@ define @vnmsac_vv_nxv8i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1061,10 +937,8 @@ define @vnmsac_vx_nxv8i16( %a, i16 %b, poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i16( %m, %y, %c, i32 %evl) ret %u } @@ -1078,11 +952,9 @@ define @vnmsac_vx_nxv8i16_unmasked( %a, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1093,10 +965,8 @@ define @vnmsac_vv_nxv8i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i16( %m, %y, %c, i32 %evl) ret %u } @@ -1110,10 +980,8 @@ define @vnmsac_vx_nxv8i16_ta( %a, i16 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i16( %m, %y, %c, i32 %evl) ret %u } @@ -1130,10 +998,8 @@ define @vnmsac_vv_nxv16i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i16( %m, %y, %c, i32 %evl) ret %u } @@ -1145,11 +1011,9 @@ define @vnmsac_vv_nxv16i16_unmasked( %a, ; CHECK-NEXT: vnmsac.vv v16, v8, v12 ; CHECK-NEXT: vmv4r.v v8, v16 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1162,10 +1026,8 @@ define @vnmsac_vx_nxv16i16( %a, i16 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i16( %m, %y, %c, i32 %evl) ret %u } @@ -1179,11 +1041,9 @@ define @vnmsac_vx_nxv16i16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1194,10 +1054,8 @@ define @vnmsac_vv_nxv16i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i16( %m, %y, %c, i32 %evl) ret %u } @@ -1211,10 +1069,8 @@ define @vnmsac_vx_nxv16i16_ta( %a, i16 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i16( %m, %y, %c, i32 %evl) ret %u } @@ -1232,10 +1088,8 @@ define @vnmsac_vv_nxv32i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i16( %m, %y, %c, i32 %evl) ret %u } @@ -1248,11 +1102,9 @@ define @vnmsac_vv_nxv32i16_unmasked( %a, ; CHECK-NEXT: vnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1265,10 +1117,8 @@ define @vnmsac_vx_nxv32i16( %a, i16 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv32i16( %m, %y, %c, i32 %evl) ret %u } @@ -1282,11 +1132,9 @@ define @vnmsac_vx_nxv32i16_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv32i16( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv32i16( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1298,10 +1146,8 @@ define @vnmsac_vv_nxv32i16_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i16( %m, %y, %c, i32 %evl) ret %u } @@ -1315,10 +1161,8 @@ define @vnmsac_vx_nxv32i16_ta( %a, i16 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv32i16( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv32i16( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv32i16( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv32i16( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv32i16( %m, %y, %c, i32 %evl) ret %u } @@ -1335,10 +1179,8 @@ define @vnmsac_vv_nxv1i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i32( %m, %y, %c, i32 %evl) ret %u } @@ -1350,11 +1192,9 @@ define @vnmsac_vv_nxv1i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1367,10 +1207,8 @@ define @vnmsac_vx_nxv1i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i32( %m, %y, %c, i32 %evl) ret %u } @@ -1384,11 +1222,9 @@ define @vnmsac_vx_nxv1i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1399,10 +1235,8 @@ define @vnmsac_vv_nxv1i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i32( %m, %y, %c, i32 %evl) ret %u } @@ -1416,10 +1250,8 @@ define @vnmsac_vx_nxv1i32_ta( %a, i32 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i32( %m, %y, %c, i32 %evl) ret %u } @@ -1436,10 +1268,8 @@ define @vnmsac_vv_nxv2i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i32( %m, %y, %c, i32 %evl) ret %u } @@ -1451,11 +1281,9 @@ define @vnmsac_vv_nxv2i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1468,10 +1296,8 @@ define @vnmsac_vx_nxv2i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i32( %m, %y, %c, i32 %evl) ret %u } @@ -1485,11 +1311,9 @@ define @vnmsac_vx_nxv2i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1500,10 +1324,8 @@ define @vnmsac_vv_nxv2i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i32( %m, %y, %c, i32 %evl) ret %u } @@ -1517,10 +1339,8 @@ define @vnmsac_vx_nxv2i32_ta( %a, i32 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i32( %m, %y, %c, i32 %evl) ret %u } @@ -1537,10 +1357,8 @@ define @vnmsac_vv_nxv4i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i32( %m, %y, %c, i32 %evl) ret %u } @@ -1552,11 +1370,9 @@ define @vnmsac_vv_nxv4i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1569,10 +1385,8 @@ define @vnmsac_vx_nxv4i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i32( %m, %y, %c, i32 %evl) ret %u } @@ -1586,11 +1400,9 @@ define @vnmsac_vx_nxv4i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1601,10 +1413,8 @@ define @vnmsac_vv_nxv4i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i32( %m, %y, %c, i32 %evl) ret %u } @@ -1618,10 +1428,8 @@ define @vnmsac_vx_nxv4i32_ta( %a, i32 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i32( %m, %y, %c, i32 %evl) ret %u } @@ -1638,10 +1446,8 @@ define @vnmsac_vv_nxv8i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i32( %m, %y, %c, i32 %evl) ret %u } @@ -1653,11 +1459,9 @@ define @vnmsac_vv_nxv8i32_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1670,10 +1474,8 @@ define @vnmsac_vx_nxv8i32( %a, i32 %b, poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i32( %m, %y, %c, i32 %evl) ret %u } @@ -1687,11 +1489,9 @@ define @vnmsac_vx_nxv8i32_unmasked( %a, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1702,10 +1502,8 @@ define @vnmsac_vv_nxv8i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i32( %m, %y, %c, i32 %evl) ret %u } @@ -1719,10 +1517,8 @@ define @vnmsac_vx_nxv8i32_ta( %a, i32 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i32( %m, %y, %c, i32 %evl) ret %u } @@ -1740,10 +1536,8 @@ define @vnmsac_vv_nxv16i32( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i32( %m, %y, %c, i32 %evl) ret %u } @@ -1756,11 +1550,9 @@ define @vnmsac_vv_nxv16i32_unmasked( %a, ; CHECK-NEXT: vnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1773,10 +1565,8 @@ define @vnmsac_vx_nxv16i32( %a, i32 %b, < ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv16i32( %m, %y, %c, i32 %evl) ret %u } @@ -1790,11 +1580,9 @@ define @vnmsac_vx_nxv16i32_unmasked( %a, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv16i32( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv16i32( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1806,10 +1594,8 @@ define @vnmsac_vv_nxv16i32_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i32( %m, %y, %c, i32 %evl) ret %u } @@ -1823,10 +1609,8 @@ define @vnmsac_vx_nxv16i32_ta( %a, i32 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv16i32( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv16i32( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv16i32( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv16i32( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv16i32( %m, %y, %c, i32 %evl) ret %u } @@ -1843,10 +1627,8 @@ define @vnmsac_vv_nxv1i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i64( %m, %y, %c, i32 %evl) ret %u } @@ -1858,11 +1640,9 @@ define @vnmsac_vv_nxv1i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1890,10 +1670,8 @@ define @vnmsac_vx_nxv1i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv1i64( %m, %y, %c, i32 %evl) ret %u } @@ -1922,11 +1700,9 @@ define @vnmsac_vx_nxv1i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv1i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv1i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -1937,10 +1713,8 @@ define @vnmsac_vv_nxv1i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i64( %m, %y, %c, i32 %evl) ret %u } @@ -1969,10 +1743,8 @@ define @vnmsac_vx_nxv1i64_ta( %a, i64 %b, < ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv1i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv1i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv1i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv1i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv1i64( %m, %y, %c, i32 %evl) ret %u } @@ -1989,10 +1761,8 @@ define @vnmsac_vv_nxv2i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i64( %m, %y, %c, i32 %evl) ret %u } @@ -2004,11 +1774,9 @@ define @vnmsac_vv_nxv2i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2036,10 +1804,8 @@ define @vnmsac_vx_nxv2i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv2i64( %m, %y, %c, i32 %evl) ret %u } @@ -2068,11 +1834,9 @@ define @vnmsac_vx_nxv2i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv2i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv2i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2083,10 +1847,8 @@ define @vnmsac_vv_nxv2i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i64( %m, %y, %c, i32 %evl) ret %u } @@ -2115,10 +1877,8 @@ define @vnmsac_vx_nxv2i64_ta( %a, i64 %b, < ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv2i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv2i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv2i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv2i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv2i64( %m, %y, %c, i32 %evl) ret %u } @@ -2135,10 +1895,8 @@ define @vnmsac_vv_nxv4i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i64( %m, %y, %c, i32 %evl) ret %u } @@ -2150,11 +1908,9 @@ define @vnmsac_vv_nxv4i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2182,10 +1938,8 @@ define @vnmsac_vx_nxv4i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv4i64( %m, %y, %c, i32 %evl) ret %u } @@ -2214,11 +1968,9 @@ define @vnmsac_vx_nxv4i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv4i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv4i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2229,10 +1981,8 @@ define @vnmsac_vv_nxv4i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i64( %m, %y, %c, i32 %evl) ret %u } @@ -2261,10 +2011,8 @@ define @vnmsac_vx_nxv4i64_ta( %a, i64 %b, < ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv4i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv4i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv4i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv4i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv4i64( %m, %y, %c, i32 %evl) ret %u } @@ -2282,10 +2030,8 @@ define @vnmsac_vv_nxv8i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i64( %m, %y, %c, i32 %evl) ret %u } @@ -2298,11 +2044,9 @@ define @vnmsac_vv_nxv8i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2330,10 +2074,8 @@ define @vnmsac_vx_nxv8i64( %a, i64 %b, poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.merge.nxv8i64( %m, %y, %c, i32 %evl) ret %u } @@ -2362,11 +2104,9 @@ define @vnmsac_vx_nxv8i64_unmasked( %a, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) - %u = call @llvm.vp.merge.nxv8i64( %allones, %y, %c, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, splat (i1 -1), i32 %evl) + %u = call @llvm.vp.merge.nxv8i64( splat (i1 -1), %y, %c, i32 %evl) ret %u } @@ -2378,10 +2118,8 @@ define @vnmsac_vv_nxv8i64_ta( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %b, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %b, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i64( %m, %y, %c, i32 %evl) ret %u } @@ -2410,10 +2148,8 @@ define @vnmsac_vx_nxv8i64_ta( %a, i64 %b, < ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %x = call @llvm.vp.mul.nxv8i64( %a, %vb, %allones, i32 %evl) - %y = call @llvm.vp.sub.nxv8i64( %c, %x, %allones, i32 %evl) + %x = call @llvm.vp.mul.nxv8i64( %a, %vb, splat (i1 -1), i32 %evl) + %y = call @llvm.vp.sub.nxv8i64( %c, %x, splat (i1 -1), i32 %evl) %u = call @llvm.vp.select.nxv8i64( %m, %y, %c, i32 %evl) ret %u } diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll index 13b7a8e1991ac..7b97ca3c0090e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll @@ -119,9 +119,7 @@ define @vnsra_wi_i32_nxv1i32_sext( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = sext %splat to + %vb = sext splat (i32 15) to %x = ashr %va, %vb %y = trunc %x to ret %y @@ -192,9 +190,7 @@ define @vnsra_wi_i32_nxv2i32_sext( %va) { ; CHECK-NEXT: vnsrl.wi v10, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = sext %splat to + %vb = sext splat (i32 15) to %x = ashr %va, %vb %y = trunc %x to ret %y @@ -265,9 +261,7 @@ define @vnsra_wi_i32_nxv4i32_sext( %va) { ; CHECK-NEXT: vnsrl.wi v12, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = sext %splat to + %vb = sext splat (i32 15) to %x = ashr %va, %vb %y = trunc %x to ret %y @@ -338,9 +332,7 @@ define @vnsra_wi_i32_nxv8i32_sext( %va) { ; CHECK-NEXT: vnsrl.wi v16, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = sext %splat to + %vb = sext splat (i32 15) to %x = ashr %va, %vb %y = trunc %x to ret %y @@ -406,9 +398,7 @@ define @vnsra_wi_i32_nxv1i32_zext( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = zext %splat to + %vb = zext splat (i32 15) to %x = ashr %va, %vb %y = trunc %x to ret %y @@ -479,9 +469,7 @@ define @vnsra_wi_i32_nxv2i32_zext( %va) { ; CHECK-NEXT: vnsrl.wi v10, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = zext %splat to + %vb = zext splat (i32 15) to %x = ashr %va, %vb %y = trunc %x to ret %y @@ -552,9 +540,7 @@ define @vnsra_wi_i32_nxv4i32_zext( %va) { ; CHECK-NEXT: vnsrl.wi v12, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = zext %splat to + %vb = zext splat (i32 15) to %x = ashr %va, %vb %y = trunc %x to ret %y @@ -625,9 +611,7 @@ define @vnsra_wi_i32_nxv8i32_zext( %va) { ; CHECK-NEXT: vnsrl.wi v16, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = zext %splat to + %vb = zext splat (i32 15) to %x = ashr %va, %vb %y = trunc %x to ret %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll index 18f743482a561..cb7a020d0b964 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-vp.ll @@ -12,10 +12,8 @@ define @vsra_vv_nxv1i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) - %v = call @llvm.vp.ashr.nxv1i32( %a, %bext, %allones, i32 %evl) + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.ashr.nxv1i32( %a, %bext, splat (i1 -1), i32 %evl) %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, %m, i32 %evl) ret %vr } @@ -27,11 +25,9 @@ define @vsra_vv_nxv1i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %head, poison, zeroinitializer - %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) - %v = call @llvm.vp.ashr.nxv1i32( %a, %bext, %allones, i32 %evl) - %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, %allones, i32 %evl) + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.ashr.nxv1i32( %a, %bext, splat (i1 -1), i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, splat (i1 -1), i32 %evl) ret %vr } @@ -45,10 +41,8 @@ define @vsra_vv_nxv1i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, %allones, i32 %evl) - %v = call @llvm.vp.ashr.nxv1i64( %a, %bext, %allones, i32 %evl) + %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.ashr.nxv1i64( %a, %bext, splat (i1 -1), i32 %evl) %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, %m, i32 %evl) ret %vr } @@ -59,10 +53,8 @@ define @vsra_vv_nxv1i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, %allones, i32 %evl) - %v = call @llvm.vp.ashr.nxv1i64( %a, %bext, %allones, i32 %evl) - %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, %allones, i32 %evl) + %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.ashr.nxv1i64( %a, %bext, splat (i1 -1), i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, splat (i1 -1), i32 %evl) ret %vr } diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll index 4c1e53cf9a01b..2b912662b4b94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll @@ -119,9 +119,7 @@ define @vnsrl_wi_i32_nxv1i32_sext( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = sext %splat to + %vb = sext splat (i32 15) to %x = lshr %va, %vb %y = trunc %x to ret %y @@ -192,9 +190,7 @@ define @vnsrl_wi_i32_nxv2i32_sext( %va) { ; CHECK-NEXT: vnsrl.wi v10, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = sext %splat to + %vb = sext splat (i32 15) to %x = lshr %va, %vb %y = trunc %x to ret %y @@ -265,9 +261,7 @@ define @vnsrl_wi_i32_nxv4i32_sext( %va) { ; CHECK-NEXT: vnsrl.wi v12, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = sext %splat to + %vb = sext splat (i32 15) to %x = lshr %va, %vb %y = trunc %x to ret %y @@ -338,9 +332,7 @@ define @vnsrl_wi_i32_nxv8i32_sext( %va) { ; CHECK-NEXT: vnsrl.wi v16, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = sext %splat to + %vb = sext splat (i32 15) to %x = lshr %va, %vb %y = trunc %x to ret %y @@ -406,9 +398,7 @@ define @vnsrl_wi_i32_nxv1i32_zext( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vnsrl.wi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = zext %splat to + %vb = zext splat (i32 15) to %x = lshr %va, %vb %y = trunc %x to ret %y @@ -479,9 +469,7 @@ define @vnsrl_wi_i32_nxv2i32_zext( %va) { ; CHECK-NEXT: vnsrl.wi v10, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = zext %splat to + %vb = zext splat (i32 15) to %x = lshr %va, %vb %y = trunc %x to ret %y @@ -552,9 +540,7 @@ define @vnsrl_wi_i32_nxv4i32_zext( %va) { ; CHECK-NEXT: vnsrl.wi v12, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = zext %splat to + %vb = zext splat (i32 15) to %x = lshr %va, %vb %y = trunc %x to ret %y @@ -625,9 +611,7 @@ define @vnsrl_wi_i32_nxv8i32_zext( %va) { ; CHECK-NEXT: vnsrl.wi v16, v8, 15 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vb = zext %splat to + %vb = zext splat (i32 15) to %x = lshr %va, %vb %y = trunc %x to ret %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll index 059d5bf3e095b..e6e86011745b4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-vp.ll @@ -12,10 +12,8 @@ define @vsra_vv_nxv1i16( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) - %v = call @llvm.vp.lshr.nxv1i32( %a, %bext, %allones, i32 %evl) + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.lshr.nxv1i32( %a, %bext, splat (i1 -1), i32 %evl) %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, %m, i32 %evl) ret %vr } @@ -27,11 +25,9 @@ define @vsra_vv_nxv1i16_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %head, poison, zeroinitializer - %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) - %v = call @llvm.vp.lshr.nxv1i32( %a, %bext, %allones, i32 %evl) - %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, %allones, i32 %evl) + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.lshr.nxv1i32( %a, %bext, splat (i1 -1), i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i16.nxv1i32( %v, splat (i1 -1), i32 %evl) ret %vr } @@ -45,10 +41,8 @@ define @vsra_vv_nxv1i64( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, %allones, i32 %evl) - %v = call @llvm.vp.lshr.nxv1i64( %a, %bext, %allones, i32 %evl) + %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.lshr.nxv1i64( %a, %bext, splat (i1 -1), i32 %evl) %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, %m, i32 %evl) ret %vr } @@ -59,10 +53,8 @@ define @vsra_vv_nxv1i64_unmasked( %a, poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, %allones, i32 %evl) - %v = call @llvm.vp.lshr.nxv1i64( %a, %bext, %allones, i32 %evl) - %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, %allones, i32 %evl) + %bext = call @llvm.vp.sext.nxv1i64.nxv1i32( %b, splat (i1 -1), i32 %evl) + %v = call @llvm.vp.lshr.nxv1i64( %a, %bext, splat (i1 -1), i32 %evl) + %vr = call @llvm.vp.trunc.nxv1i32.nxv1i64( %v, splat (i1 -1), i32 %evl) ret %vr } diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll index 356467698d941..fbbd71cb35445 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll @@ -20,9 +20,7 @@ define @vor_vx_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 -12) ret %vc } @@ -32,9 +30,7 @@ define @vor_vx_nxv1i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 15) ret %vc } @@ -45,9 +41,7 @@ define @vor_vx_nxv1i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 16) ret %vc } @@ -69,9 +63,7 @@ define @vor_vx_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 -12) ret %vc } @@ -81,9 +73,7 @@ define @vor_vx_nxv2i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 15) ret %vc } @@ -94,9 +84,7 @@ define @vor_vx_nxv2i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 16) ret %vc } @@ -118,9 +106,7 @@ define @vor_vx_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 -12) ret %vc } @@ -130,9 +116,7 @@ define @vor_vx_nxv4i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 15) ret %vc } @@ -143,9 +127,7 @@ define @vor_vx_nxv4i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 16) ret %vc } @@ -167,9 +149,7 @@ define @vor_vx_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 -12) ret %vc } @@ -179,9 +159,7 @@ define @vor_vx_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 15) ret %vc } @@ -192,9 +170,7 @@ define @vor_vx_nxv8i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 16) ret %vc } @@ -216,9 +192,7 @@ define @vor_vx_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 -12) ret %vc } @@ -228,9 +202,7 @@ define @vor_vx_nxv16i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 15) ret %vc } @@ -241,9 +213,7 @@ define @vor_vx_nxv16i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 16) ret %vc } @@ -265,9 +235,7 @@ define @vor_vx_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 -12) ret %vc } @@ -277,9 +245,7 @@ define @vor_vx_nxv32i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 15) ret %vc } @@ -290,9 +256,7 @@ define @vor_vx_nxv32i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 16) ret %vc } @@ -314,9 +278,7 @@ define @vor_vx_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 -12) ret %vc } @@ -326,9 +288,7 @@ define @vor_vx_nxv64i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i8 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 15) ret %vc } @@ -339,9 +299,7 @@ define @vor_vx_nxv64i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i8 16) ret %vc } @@ -363,9 +321,7 @@ define @vor_vx_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 -12) ret %vc } @@ -375,9 +331,7 @@ define @vor_vx_nxv1i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 15) ret %vc } @@ -388,9 +342,7 @@ define @vor_vx_nxv1i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 16) ret %vc } @@ -412,9 +364,7 @@ define @vor_vx_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 -12) ret %vc } @@ -424,9 +374,7 @@ define @vor_vx_nxv2i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 15) ret %vc } @@ -437,9 +385,7 @@ define @vor_vx_nxv2i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 16) ret %vc } @@ -461,9 +407,7 @@ define @vor_vx_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 -12) ret %vc } @@ -473,9 +417,7 @@ define @vor_vx_nxv4i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 15) ret %vc } @@ -486,9 +428,7 @@ define @vor_vx_nxv4i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 16) ret %vc } @@ -510,9 +450,7 @@ define @vor_vx_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 -12) ret %vc } @@ -522,9 +460,7 @@ define @vor_vx_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 15) ret %vc } @@ -535,9 +471,7 @@ define @vor_vx_nxv8i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 16) ret %vc } @@ -559,9 +493,7 @@ define @vor_vx_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 -12) ret %vc } @@ -571,9 +503,7 @@ define @vor_vx_nxv16i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 15) ret %vc } @@ -584,9 +514,7 @@ define @vor_vx_nxv16i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 16) ret %vc } @@ -608,9 +536,7 @@ define @vor_vx_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 -12) ret %vc } @@ -620,9 +546,7 @@ define @vor_vx_nxv32i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i16 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 15) ret %vc } @@ -633,9 +557,7 @@ define @vor_vx_nxv32i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i16 16) ret %vc } @@ -657,9 +579,7 @@ define @vor_vx_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 -12) ret %vc } @@ -669,9 +589,7 @@ define @vor_vx_nxv1i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 15) ret %vc } @@ -682,9 +600,7 @@ define @vor_vx_nxv1i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 16) ret %vc } @@ -706,9 +622,7 @@ define @vor_vx_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 -12) ret %vc } @@ -718,9 +632,7 @@ define @vor_vx_nxv2i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 15) ret %vc } @@ -731,9 +643,7 @@ define @vor_vx_nxv2i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 16) ret %vc } @@ -755,9 +665,7 @@ define @vor_vx_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 -12) ret %vc } @@ -767,9 +675,7 @@ define @vor_vx_nxv4i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 15) ret %vc } @@ -780,9 +686,7 @@ define @vor_vx_nxv4i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 16) ret %vc } @@ -804,9 +708,7 @@ define @vor_vx_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 -12) ret %vc } @@ -816,9 +718,7 @@ define @vor_vx_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 15) ret %vc } @@ -829,9 +729,7 @@ define @vor_vx_nxv8i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 16) ret %vc } @@ -853,9 +751,7 @@ define @vor_vx_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 -12) ret %vc } @@ -865,9 +761,7 @@ define @vor_vx_nxv16i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i32 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 15) ret %vc } @@ -878,9 +772,7 @@ define @vor_vx_nxv16i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i32 16) ret %vc } @@ -915,9 +807,7 @@ define @vor_vx_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 -12) ret %vc } @@ -927,9 +817,7 @@ define @vor_vx_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 15) ret %vc } @@ -940,9 +828,7 @@ define @vor_vx_nxv1i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 16) ret %vc } @@ -977,9 +863,7 @@ define @vor_vx_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 -12) ret %vc } @@ -989,9 +873,7 @@ define @vor_vx_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 15) ret %vc } @@ -1002,9 +884,7 @@ define @vor_vx_nxv2i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 16) ret %vc } @@ -1039,9 +919,7 @@ define @vor_vx_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 -12) ret %vc } @@ -1051,9 +929,7 @@ define @vor_vx_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 15) ret %vc } @@ -1064,9 +940,7 @@ define @vor_vx_nxv4i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 16) ret %vc } @@ -1101,9 +975,7 @@ define @vor_vx_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, -12 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -12, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 -12) ret %vc } @@ -1113,9 +985,7 @@ define @vor_vx_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 15, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 15) ret %vc } @@ -1126,9 +996,7 @@ define @vor_vx_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 16) ret %vc } @@ -1139,9 +1007,7 @@ define @vor_vx_nxv8i64_3( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = or %va, %splat + %vc = or %va, splat (i64 -1) ret %vc } @@ -1206,9 +1072,7 @@ define @vor_vi_mask_nxv8i32( %va, poison, i32 7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %splat, zeroinitializer + %vs = select %mask, splat (i32 7), zeroinitializer %vc = or %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll index ccab5e40d450f..b9388e5879704 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll @@ -36,9 +36,7 @@ define @vor_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -62,9 +60,7 @@ define @vor_vx_nxv1i8_unmasked( %va, i8 %b, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -74,9 +70,7 @@ define @vor_vi_nxv1i8( %va, ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -86,11 +80,7 @@ define @vor_vi_nxv1i8_unmasked( %va, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -112,9 +102,7 @@ define @vor_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -138,9 +126,7 @@ define @vor_vx_nxv2i8_unmasked( %va, i8 %b, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -150,9 +136,7 @@ define @vor_vi_nxv2i8( %va, ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -162,11 +146,7 @@ define @vor_vi_nxv2i8_unmasked( %va, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -188,9 +168,7 @@ define @vor_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -214,9 +192,7 @@ define @vor_vx_nxv4i8_unmasked( %va, i8 %b, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -226,9 +202,7 @@ define @vor_vi_nxv4i8( %va, ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -238,11 +212,7 @@ define @vor_vi_nxv4i8_unmasked( %va, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -264,9 +234,7 @@ define @vor_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -290,9 +258,7 @@ define @vor_vx_nxv8i8_unmasked( %va, i8 %b, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -302,9 +268,7 @@ define @vor_vi_nxv8i8( %va, ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5, v0.t ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -314,11 +278,7 @@ define @vor_vi_nxv8i8_unmasked( %va, i32 zero ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -340,9 +300,7 @@ define @vor_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -366,9 +324,7 @@ define @vor_vx_nxv16i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -378,9 +334,7 @@ define @vor_vi_nxv16i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -390,11 +344,7 @@ define @vor_vi_nxv16i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -416,9 +366,7 @@ define @vor_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -442,9 +390,7 @@ define @vor_vx_nxv32i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -454,9 +400,7 @@ define @vor_vi_nxv32i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv32i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -466,11 +410,7 @@ define @vor_vi_nxv32i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv32i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -492,9 +432,7 @@ define @vor_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -518,9 +456,7 @@ define @vor_vx_nxv64i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -530,9 +466,7 @@ define @vor_vi_nxv64i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv64i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -542,11 +476,7 @@ define @vor_vi_nxv64i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv64i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -568,9 +498,7 @@ define @vor_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -594,9 +522,7 @@ define @vor_vx_nxv1i16_unmasked( %va, i16 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -606,9 +532,7 @@ define @vor_vi_nxv1i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -618,11 +542,7 @@ define @vor_vi_nxv1i16_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -644,9 +564,7 @@ define @vor_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -670,9 +588,7 @@ define @vor_vx_nxv2i16_unmasked( %va, i16 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -682,9 +598,7 @@ define @vor_vi_nxv2i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -694,11 +608,7 @@ define @vor_vi_nxv2i16_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -720,9 +630,7 @@ define @vor_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -746,9 +654,7 @@ define @vor_vx_nxv4i16_unmasked( %va, i16 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -758,9 +664,7 @@ define @vor_vi_nxv4i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -770,11 +674,7 @@ define @vor_vi_nxv4i16_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -796,9 +696,7 @@ define @vor_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -822,9 +720,7 @@ define @vor_vx_nxv8i16_unmasked( %va, i16 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -834,9 +730,7 @@ define @vor_vi_nxv8i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -846,11 +740,7 @@ define @vor_vi_nxv8i16_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -872,9 +762,7 @@ define @vor_vv_nxv16i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -898,9 +786,7 @@ define @vor_vx_nxv16i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -910,9 +796,7 @@ define @vor_vi_nxv16i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -922,11 +806,7 @@ define @vor_vi_nxv16i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -948,9 +828,7 @@ define @vor_vv_nxv32i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -974,9 +852,7 @@ define @vor_vx_nxv32i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -986,9 +862,7 @@ define @vor_vi_nxv32i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv32i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -998,11 +872,7 @@ define @vor_vi_nxv32i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv32i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -1024,9 +894,7 @@ define @vor_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1050,9 +918,7 @@ define @vor_vx_nxv1i32_unmasked( %va, i32 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1062,9 +928,7 @@ define @vor_vi_nxv1i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1074,11 +938,7 @@ define @vor_vi_nxv1i32_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1100,9 +960,7 @@ define @vor_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1138,9 +996,7 @@ define @vor_vx_nxv2i32_unmasked( %va, i32 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1152,9 +1008,7 @@ define @vor_vx_nxv2i32_unmasked_commute( %v ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -1164,9 +1018,7 @@ define @vor_vi_nxv2i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1176,11 +1028,7 @@ define @vor_vi_nxv2i32_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1202,9 +1050,7 @@ define @vor_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1228,9 +1074,7 @@ define @vor_vx_nxv4i32_unmasked( %va, i32 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1240,9 +1084,7 @@ define @vor_vi_nxv4i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1252,11 +1094,7 @@ define @vor_vi_nxv4i32_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1278,9 +1116,7 @@ define @vor_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1304,9 +1140,7 @@ define @vor_vx_nxv8i32_unmasked( %va, i32 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1316,9 +1150,7 @@ define @vor_vi_nxv8i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1328,11 +1160,7 @@ define @vor_vi_nxv8i32_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1354,9 +1182,7 @@ define @vor_vv_nxv10i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv10i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv10i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1380,9 +1206,7 @@ define @vor_vx_nxv10i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv10i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv10i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1392,9 +1216,7 @@ define @vor_vi_nxv10i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv10i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv10i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1404,11 +1226,7 @@ define @vor_vi_nxv10i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv10i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv10i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1430,9 +1248,7 @@ define @vor_vv_nxv16i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1456,9 +1272,7 @@ define @vor_vx_nxv16i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1468,9 +1282,7 @@ define @vor_vi_nxv16i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1480,11 +1292,7 @@ define @vor_vi_nxv16i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv16i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1506,9 +1314,7 @@ define @vor_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1560,9 +1366,7 @@ define @vor_vx_nxv1i64_unmasked( %va, i64 % ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1572,9 +1376,7 @@ define @vor_vi_nxv1i64( %va, poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1584,11 +1386,7 @@ define @vor_vi_nxv1i64_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv1i64( %va, splat (i64 5), splat (i1 true), i32 %evl) ret %v } @@ -1610,9 +1408,7 @@ define @vor_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1664,9 +1460,7 @@ define @vor_vx_nxv2i64_unmasked( %va, i64 % ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1676,9 +1470,7 @@ define @vor_vi_nxv2i64( %va, poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1688,11 +1480,7 @@ define @vor_vi_nxv2i64_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv2i64( %va, splat (i64 5), splat (i1 true), i32 %evl) ret %v } @@ -1714,9 +1502,7 @@ define @vor_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1768,9 +1554,7 @@ define @vor_vx_nxv4i64_unmasked( %va, i64 % ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1780,9 +1564,7 @@ define @vor_vi_nxv4i64( %va, poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1792,11 +1574,7 @@ define @vor_vi_nxv4i64_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv4i64( %va, splat (i64 5), splat (i1 true), i32 %evl) ret %v } @@ -1818,9 +1596,7 @@ define @vor_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1872,9 +1648,7 @@ define @vor_vx_nxv8i64_unmasked( %va, i64 % ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1884,9 +1658,7 @@ define @vor_vi_nxv8i64( %va, poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1896,10 +1668,6 @@ define @vor_vi_nxv8i64_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vor.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.or.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.or.nxv8i64( %va, splat (i64 5), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll index cc13a97ddce0e..136f6e7bc9990 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll @@ -26,10 +26,8 @@ define <2 x double> @test_vp_reverse_v2f64(<2 x double> %src, i32 zeroext %evl) ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %dst = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> %src, <2 x i1> %allones, i32 %evl) + %dst = call <2 x double> @llvm.experimental.vp.reverse.v2f64(<2 x double> %src, <2 x i1> splat (i1 1), i32 %evl) ret <2 x double> %dst } @@ -57,10 +55,8 @@ define <4 x float> @test_vp_reverse_v4f32(<4 x float> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %dst = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> %src, <4 x i1> %allones, i32 %evl) + %dst = call <4 x float> @llvm.experimental.vp.reverse.v4f32(<4 x float> %src, <4 x i1> splat (i1 1), i32 %evl) ret <4 x float> %dst } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll index adc1ca6c85868..b235990ab5dd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll @@ -25,10 +25,8 @@ define @test_vp_reverse_nxv1f64( %src ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv1f64( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv1f64( %src, splat (i1 1), i32 %evl) ret %dst } @@ -56,10 +54,8 @@ define @test_vp_reverse_nxv2f32( %src, ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv2f32( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv2f32( %src, splat (i1 1), i32 %evl) ret %dst } @@ -87,10 +83,8 @@ define @test_vp_reverse_nxv2f64( %src ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv2f64( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv2f64( %src, splat (i1 1), i32 %evl) ret %dst } @@ -118,10 +112,8 @@ define @test_vp_reverse_nxv4f32( %src, ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv4f32( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv4f32( %src, splat (i1 1), i32 %evl) ret %dst } @@ -149,10 +141,8 @@ define @test_vp_reverse_nxv4f64( %src ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv4f64( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv4f64( %src, splat (i1 1), i32 %evl) ret %dst } @@ -180,10 +170,8 @@ define @test_vp_reverse_nxv8f32( %src, ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv8f32( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv8f32( %src, splat (i1 1), i32 %evl) ret %dst } @@ -211,10 +199,8 @@ define @test_vp_reverse_nxv8f64( %src ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv8f64( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv8f64( %src, splat (i1 1), i32 %evl) ret %dst } @@ -242,10 +228,8 @@ define @test_vp_reverse_nxv16f32( %sr ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv16f32( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv16f32( %src, splat (i1 1), i32 %evl) ret %dst } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll index d7fc8838f430d..27f16f0285e12 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll @@ -26,10 +26,8 @@ define <2 x i64> @test_vp_reverse_v2i64(<2 x i64> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %dst = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> %src, <2 x i1> %allones, i32 %evl) + %dst = call <2 x i64> @llvm.experimental.vp.reverse.v2i64(<2 x i64> %src, <2 x i1> splat (i1 1), i32 %evl) ret <2 x i64> %dst } @@ -57,10 +55,8 @@ define <4 x i32> @test_vp_reverse_v4i32(<4 x i32> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %dst = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> %src, <4 x i1> %allones, i32 %evl) + %dst = call <4 x i32> @llvm.experimental.vp.reverse.v4i32(<4 x i32> %src, <4 x i1> splat (i1 1), i32 %evl) ret <4 x i32> %dst } @@ -88,10 +84,8 @@ define <8 x i16> @test_vp_reverse_v8i16(<8 x i16> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> undef, i1 1, i32 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer - %dst = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> %src, <8 x i1> %allones, i32 %evl) + %dst = call <8 x i16> @llvm.experimental.vp.reverse.v8i16(<8 x i16> %src, <8 x i1> splat (i1 1), i32 %evl) ret <8 x i16> %dst } @@ -121,10 +115,8 @@ define <16 x i8> @test_vp_reverse_v16i8(<16 x i8> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgatherei16.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> undef, i1 1, i32 0 - %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer - %dst = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> %src, <16 x i1> %allones, i32 %evl) + %dst = call <16 x i8> @llvm.experimental.vp.reverse.v16i8(<16 x i8> %src, <16 x i1> splat (i1 1), i32 %evl) ret <16 x i8> %dst } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll index 47df1b005a0f8..8b1660283cb7d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll @@ -25,10 +25,8 @@ define @test_vp_reverse_nxv1i64( %src, i32 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv1i64( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv1i64( %src, splat (i1 1), i32 %evl) ret %dst } @@ -56,10 +54,8 @@ define @test_vp_reverse_nxv2i32( %src, i32 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv2i32( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv2i32( %src, splat (i1 1), i32 %evl) ret %dst } @@ -87,10 +83,8 @@ define @test_vp_reverse_nxv4i16( %src, i32 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv4i16( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv4i16( %src, splat (i1 1), i32 %evl) ret %dst } @@ -120,10 +114,8 @@ define @test_vp_reverse_nxv8i8( %src, i32 zer ; CHECK-NEXT: vrgatherei16.vv v9, v8, v10 ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv8i8( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv8i8( %src, splat (i1 1), i32 %evl) ret %dst } @@ -151,10 +143,8 @@ define @test_vp_reverse_nxv2i64( %src, i32 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv2i64( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv2i64( %src, splat (i1 1), i32 %evl) ret %dst } @@ -182,10 +172,8 @@ define @test_vp_reverse_nxv4i32( %src, i32 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv4i32( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv4i32( %src, splat (i1 1), i32 %evl) ret %dst } @@ -213,10 +201,8 @@ define @test_vp_reverse_nxv8i16( %src, i32 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv8i16( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv8i16( %src, splat (i1 1), i32 %evl) ret %dst } @@ -246,10 +232,8 @@ define @test_vp_reverse_nxv16i8( %src, i32 ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12 ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv16i8( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv16i8( %src, splat (i1 1), i32 %evl) ret %dst } @@ -277,10 +261,8 @@ define @test_vp_reverse_nxv4i64( %src, i32 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv4i64( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv4i64( %src, splat (i1 1), i32 %evl) ret %dst } @@ -308,10 +290,8 @@ define @test_vp_reverse_nxv8i32( %src, i32 ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv8i32( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv8i32( %src, splat (i1 1), i32 %evl) ret %dst } @@ -339,10 +319,8 @@ define @test_vp_reverse_nxv16i16( %src, i ; CHECK-NEXT: vrgather.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv16i16( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv16i16( %src, splat (i1 1), i32 %evl) ret %dst } @@ -372,10 +350,8 @@ define @test_vp_reverse_nxv32i8( %src, i32 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv32i8( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv32i8( %src, splat (i1 1), i32 %evl) ret %dst } @@ -403,10 +379,8 @@ define @test_vp_reverse_nxv8i64( %src, i32 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv8i64( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv8i64( %src, splat (i1 1), i32 %evl) ret %dst } @@ -434,10 +408,8 @@ define @test_vp_reverse_nxv16i32( %src, i ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv16i32( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv16i32( %src, splat (i1 1), i32 %evl) ret %dst } @@ -465,10 +437,8 @@ define @test_vp_reverse_nxv32i16( %src, i ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv32i16( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv32i16( %src, splat (i1 1), i32 %evl) ret %dst } @@ -510,10 +480,8 @@ define @test_vp_reverse_nxv64i8( %src, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v24, a1 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv64i8( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv64i8( %src, splat (i1 1), i32 %evl) ret %dst } @@ -561,10 +529,8 @@ define @test_vp_reverse_nxv128i8( %src, i ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv128i8( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv128i8( %src, splat (i1 1), i32 %evl) ret %dst } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll index fd608c858650e..a30ebf2d33b50 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll @@ -34,10 +34,8 @@ define <2 x i1> @test_vp_reverse_v2i1(<2 x i1> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %dst = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> %src, <2 x i1> %allones, i32 %evl) + %dst = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> %src, <2 x i1> splat (i1 1), i32 %evl) ret <2 x i1> %dst } @@ -73,10 +71,8 @@ define <4 x i1> @test_vp_reverse_v4i1(<4 x i1> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %dst = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> %src, <4 x i1> %allones, i32 %evl) + %dst = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> %src, <4 x i1> splat (i1 1), i32 %evl) ret <4 x i1> %dst } @@ -112,10 +108,8 @@ define <8 x i1> @test_vp_reverse_v8i1(<8 x i1> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> undef, i1 1, i32 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer - %dst = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> %src, <8 x i1> %allones, i32 %evl) + %dst = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> %src, <8 x i1> splat (i1 1), i32 %evl) ret <8 x i1> %dst } @@ -151,10 +145,8 @@ define <16 x i1> @test_vp_reverse_v16i1(<16 x i1> %src, i32 zeroext %evl) { ; CHECK-NEXT: vrgatherei16.vv v11, v10, v8 ; CHECK-NEXT: vmsne.vi v0, v11, 0 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> undef, i1 1, i32 0 - %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer - %dst = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> %src, <16 x i1> %allones, i32 %evl) + %dst = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> %src, <16 x i1> splat (i1 1), i32 %evl) ret <16 x i1> %dst } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index 29917141fffed..ceb6a164e20df 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -33,10 +33,8 @@ define @test_vp_reverse_nxv1i1( %src, i32 zer ; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv1i1( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv1i1( %src, splat (i1 1), i32 %evl) ret %dst } @@ -72,10 +70,8 @@ define @test_vp_reverse_nxv2i1( %src, i32 zer ; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv2i1( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv2i1( %src, splat (i1 1), i32 %evl) ret %dst } @@ -111,10 +107,8 @@ define @test_vp_reverse_nxv4i1( %src, i32 zer ; CHECK-NEXT: vrgatherei16.vv v10, v9, v8 ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv4i1( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv4i1( %src, splat (i1 1), i32 %evl) ret %dst } @@ -150,10 +144,8 @@ define @test_vp_reverse_nxv8i1( %src, i32 zer ; CHECK-NEXT: vrgatherei16.vv v11, v10, v8 ; CHECK-NEXT: vmsne.vi v0, v11, 0 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv8i1( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv8i1( %src, splat (i1 1), i32 %evl) ret %dst } @@ -190,10 +182,8 @@ define @test_vp_reverse_nxv16i1( %src, i32 ; CHECK-NEXT: vrgatherei16.vv v14, v12, v8 ; CHECK-NEXT: vmsne.vi v0, v14, 0 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv16i1( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv16i1( %src, splat (i1 1), i32 %evl) ret %dst } @@ -230,10 +220,8 @@ define @test_vp_reverse_nxv32i1( %src, i32 ; CHECK-NEXT: vrgatherei16.vv v20, v16, v8 ; CHECK-NEXT: vmsne.vi v0, v20, 0 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv32i1( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv32i1( %src, splat (i1 1), i32 %evl) ret %dst } @@ -285,10 +273,8 @@ define @test_vp_reverse_nxv64i1( %src, i32 ; CHECK-NEXT: vslidedown.vx v8, v16, a1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %dst = call @llvm.experimental.vp.reverse.nxv64i1( %src, %allones, i32 %evl) + %dst = call @llvm.experimental.vp.reverse.nxv64i1( %src, splat (i1 1), i32 %evl) ret %dst } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-fixed-vectors.ll index f7c8c251e197b..494bf46050ccb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-fixed-vectors.ll @@ -19,10 +19,8 @@ define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x i64> %v } @@ -35,10 +33,8 @@ define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64> ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 5 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -5, <2 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x i64> %v } @@ -64,10 +60,8 @@ define <4 x i32> @test_vp_splice_v4i32(<4 x i32> %va, <4 x i32> %vb, i32 zeroext ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x i32> %v } @@ -80,10 +74,8 @@ define <4 x i32> @test_vp_splice_v4i32_negative_offset(<4 x i32> %va, <4 x i32> ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 5 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -5, <4 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x i32> %v } @@ -109,10 +101,8 @@ define <8 x i16> @test_vp_splice_v8i16(<8 x i16> %va, <8 x i16> %vb, i32 zeroext ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> undef, i1 1, i32 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x i16> %v } @@ -125,10 +115,8 @@ define <8 x i16> @test_vp_splice_v8i16_negative_offset(<8 x i16> %va, <8 x i16> ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 5 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> undef, i1 1, i32 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer - %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x i16> %v } @@ -154,10 +142,8 @@ define <16 x i8> @test_vp_splice_v16i8(<16 x i8> %va, <16 x i8> %vb, i32 zeroext ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> undef, i1 1, i32 0 - %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <16 x i8> %v } @@ -170,10 +156,8 @@ define <16 x i8> @test_vp_splice_v16i8_negative_offset(<16 x i8> %va, <16 x i8> ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 5 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> undef, i1 1, i32 0 - %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer - %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <16 x i8> %v } @@ -199,10 +183,8 @@ define <2 x double> @test_vp_splice_v2f64(<2 x double> %va, <2 x double> %vb, i3 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x double> %v } @@ -215,10 +197,8 @@ define <2 x double> @test_vp_splice_v2f64_negative_offset(<2 x double> %va, <2 x ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 5 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 -5, <2 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x double> %v } @@ -244,10 +224,8 @@ define <4 x float> @test_vp_splice_v4f32(<4 x float> %va, <4 x float> %vb, i32 z ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 5, <4 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x float> %v } @@ -260,10 +238,8 @@ define <4 x float> @test_vp_splice_v4f32_negative_offset(<4 x float> %va, <4 x f ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 5 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 -5, <4 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x float> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll index 9579973aee0d6..ce0ae2022885a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll @@ -26,10 +26,8 @@ define <2 x i1> @test_vp_splice_v2i1(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %ev ; CHECK-NEXT: vslideup.vx v9, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %v = call <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1> %va, <2 x i1> %vb, i32 5, <2 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1> %va, <2 x i1> %vb, i32 5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x i1> %v } @@ -52,10 +50,8 @@ define <2 x i1> @test_vp_splice_v2i1_negative_offset(<2 x i1> %va, <2 x i1> %vb, ; CHECK-NEXT: vslideup.vi v9, v8, 5 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret - %head = insertelement <2 x i1> undef, i1 1, i32 0 - %allones = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer - %v = call <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1> %va, <2 x i1> %vb, i32 -5, <2 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1> %va, <2 x i1> %vb, i32 -5, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x i1> %v } @@ -103,10 +99,8 @@ define <4 x i1> @test_vp_splice_v4i1(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %ev ; CHECK-NEXT: vslideup.vx v9, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %v = call <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1> %va, <4 x i1> %vb, i32 5, <4 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1> %va, <4 x i1> %vb, i32 5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x i1> %v } @@ -129,10 +123,8 @@ define <4 x i1> @test_vp_splice_v4i1_negative_offset(<4 x i1> %va, <4 x i1> %vb, ; CHECK-NEXT: vslideup.vi v9, v8, 5 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret - %head = insertelement <4 x i1> undef, i1 1, i32 0 - %allones = shufflevector <4 x i1> %head, <4 x i1> undef, <4 x i32> zeroinitializer - %v = call <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1> %va, <4 x i1> %vb, i32 -5, <4 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1> %va, <4 x i1> %vb, i32 -5, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x i1> %v } @@ -180,10 +172,8 @@ define <8 x i1> @test_vp_splice_v8i1(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %ev ; CHECK-NEXT: vslideup.vx v9, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> undef, i1 1, i32 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1> %va, <8 x i1> %vb, i32 5, <8 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1> %va, <8 x i1> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x i1> %v } @@ -206,10 +196,8 @@ define <8 x i1> @test_vp_splice_v8i1_negative_offset(<8 x i1> %va, <8 x i1> %vb, ; CHECK-NEXT: vslideup.vi v9, v8, 5 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret - %head = insertelement <8 x i1> undef, i1 1, i32 0 - %allones = shufflevector <8 x i1> %head, <8 x i1> undef, <8 x i32> zeroinitializer - %v = call <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1> %va, <8 x i1> %vb, i32 -5, <8 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1> %va, <8 x i1> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x i1> %v } @@ -257,10 +245,8 @@ define <16 x i1> @test_vp_splice_v16i1(<16 x i1> %va, <16 x i1> %vb, i32 zeroext ; CHECK-NEXT: vslideup.vx v9, v8, a0 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> undef, i1 1, i32 0 - %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer - %v = call <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1> %va, <16 x i1> %vb, i32 5, <16 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1> %va, <16 x i1> %vb, i32 5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <16 x i1> %v } @@ -283,10 +269,8 @@ define <16 x i1> @test_vp_splice_v16i1_negative_offset(<16 x i1> %va, <16 x i1> ; CHECK-NEXT: vslideup.vi v9, v8, 5 ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: ret - %head = insertelement <16 x i1> undef, i1 1, i32 0 - %allones = shufflevector <16 x i1> %head, <16 x i1> undef, <16 x i32> zeroinitializer - %v = call <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1> %va, <16 x i1> %vb, i32 -5, <16 x i1> %allones, i32 %evla, i32 %evlb) + %v = call <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1> %va, <16 x i1> %vb, i32 -5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <16 x i1> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index 4eaadb3c24fbb..668cff2342936 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -29,10 +29,8 @@ define @test_vp_splice_nxv1i1( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv1i1( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv1i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -55,10 +53,8 @@ define @test_vp_splice_nxv1i1_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv1i1( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv1i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -106,10 +102,8 @@ define @test_vp_splice_nxv2i1( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv2i1( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv2i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -132,10 +126,8 @@ define @test_vp_splice_nxv2i1_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv2i1( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv2i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -183,10 +175,8 @@ define @test_vp_splice_nxv4i1( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv4i1( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv4i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -209,10 +199,8 @@ define @test_vp_splice_nxv4i1_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv4i1( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv4i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -260,10 +248,8 @@ define @test_vp_splice_nxv8i1( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv8i1( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv8i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -286,10 +272,8 @@ define @test_vp_splice_nxv8i1_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv8i1( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv8i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -337,10 +321,8 @@ define @test_vp_splice_nxv16i1( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -363,10 +345,8 @@ define @test_vp_splice_nxv16i1_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -415,10 +395,8 @@ define @test_vp_splice_nxv32i1( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -441,10 +419,8 @@ define @test_vp_splice_nxv32i1_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -493,10 +469,8 @@ define @test_vp_splice_nxv64i1( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -519,10 +493,8 @@ define @test_vp_splice_nxv64i1_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll index 7d85370e390b0..a4f91c3e7c99e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll @@ -23,10 +23,7 @@ define @test_vp_splice_nxv2i64( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv2i64( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv2i64( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -39,10 +36,7 @@ define @test_vp_splice_nxv2i64_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv2i64( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv2i64( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -68,10 +62,7 @@ define @test_vp_splice_nxv1i64( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv1i64( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv1i64( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -84,10 +75,7 @@ define @test_vp_splice_nxv1i64_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv1i64( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv1i64( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -113,10 +101,7 @@ define @test_vp_splice_nxv2i32( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -129,10 +114,7 @@ define @test_vp_splice_nxv2i32_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -158,10 +140,7 @@ define @test_vp_splice_nxv4i16( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv4i16( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv4i16( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -174,10 +153,7 @@ define @test_vp_splice_nxv4i16_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv4i16( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv4i16( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -203,10 +179,7 @@ define @test_vp_splice_nxv8i8( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv8i8( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv8i8( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -219,10 +192,7 @@ define @test_vp_splice_nxv8i8_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv8i8( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv8i8( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -248,10 +218,7 @@ define @test_vp_splice_nxv1f64( %va, ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: ret - %head = insertelement undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv1f64( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv1f64( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -264,10 +231,7 @@ define @test_vp_splice_nxv1f64_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv1f64( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv1f64( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -293,10 +257,7 @@ define @test_vp_splice_nxv2f32( %va, undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -309,10 +270,7 @@ define @test_vp_splice_nxv2f32_negative_offset( undef, i1 1, i32 0 - %allones = shufflevector %head, undef, zeroinitializer - - %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 -5, %allones, i32 %evla, i32 %evlb) + %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index a5c305d5ac822..c86fee6305931 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -204,9 +204,7 @@ define @vpgather_truemask_nxv4i8( %ptrs, i32 ; RV64-NEXT: vluxei64.v v12, (zero), v8 ; RV64-NEXT: vmv1r.v v8, v12 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.vp.gather.nxv4i8.nxv4p0( %ptrs, %mtrue, i32 %evl) + %v = call @llvm.vp.gather.nxv4i8.nxv4p0( %ptrs, splat (i1 1), i32 %evl) ret %v } @@ -500,9 +498,7 @@ define @vpgather_truemask_nxv4i16( %ptrs, i ; RV64-NEXT: vluxei64.v v12, (zero), v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.vp.gather.nxv4i16.nxv4p0( %ptrs, %mtrue, i32 %evl) + %v = call @llvm.vp.gather.nxv4i16.nxv4p0( %ptrs, splat (i1 1), i32 %evl) ret %v } @@ -729,9 +725,7 @@ define @vpgather_truemask_nxv4i32( %ptrs, i ; RV64-NEXT: vluxei64.v v12, (zero), v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.vp.gather.nxv4i32.nxv4p0( %ptrs, %mtrue, i32 %evl) + %v = call @llvm.vp.gather.nxv4i32.nxv4p0( %ptrs, splat (i1 1), i32 %evl) ret %v } @@ -988,9 +982,7 @@ define @vpgather_truemask_nxv4i64( %ptrs, i ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (zero), v8 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.vp.gather.nxv4i64.nxv4p0( %ptrs, %mtrue, i32 %evl) + %v = call @llvm.vp.gather.nxv4i64.nxv4p0( %ptrs, splat (i1 1), i32 %evl) ret %v } @@ -1319,9 +1311,7 @@ define @vpgather_truemask_nxv4f16( %ptrs, ; RV64-NEXT: vluxei64.v v12, (zero), v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.vp.gather.nxv4f16.nxv4p0( %ptrs, %mtrue, i32 %evl) + %v = call @llvm.vp.gather.nxv4f16.nxv4p0( %ptrs, splat (i1 1), i32 %evl) ret %v } @@ -1506,9 +1496,7 @@ define @vpgather_truemask_nxv4f32( %ptrs, ; RV64-NEXT: vluxei64.v v12, (zero), v8 ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.vp.gather.nxv4f32.nxv4p0( %ptrs, %mtrue, i32 %evl) + %v = call @llvm.vp.gather.nxv4f32.nxv4p0( %ptrs, splat (i1 1), i32 %evl) ret %v } @@ -1765,9 +1753,7 @@ define @vpgather_truemask_nxv4f64( %ptrs ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV64-NEXT: vluxei64.v v8, (zero), v8 ; RV64-NEXT: ret - %mhead = insertelement poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %v = call @llvm.vp.gather.nxv4f64.nxv4p0( %ptrs, %mtrue, i32 %evl) + %v = call @llvm.vp.gather.nxv4f64.nxv4p0( %ptrs, splat (i1 1), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index c203fcb903e56..f07c16476c56a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -22,9 +22,7 @@ define @vpload_nxv1i8_allones_mask(ptr %ptr, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.vp.load.nxv1i8.p0(ptr %ptr, %b, i32 %evl) + %load = call @llvm.vp.load.nxv1i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret %load } @@ -82,9 +80,7 @@ define @vpload_nxv8i8_allones_mask(ptr %ptr, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.vp.load.nxv8i8.p0(ptr %ptr, %b, i32 %evl) + %load = call @llvm.vp.load.nxv8i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret %load } @@ -118,9 +114,7 @@ define @vpload_nxv2i16_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.vp.load.nxv2i16.p0(ptr %ptr, %b, i32 %evl) + %load = call @llvm.vp.load.nxv2i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret %load } @@ -190,9 +184,7 @@ define @vpload_nxv4i32_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.vp.load.nxv4i32.p0(ptr %ptr, %b, i32 %evl) + %load = call @llvm.vp.load.nxv4i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret %load } @@ -226,9 +218,7 @@ define @vpload_nxv1i64_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.vp.load.nxv1i64.p0(ptr %ptr, %b, i32 %evl) + %load = call @llvm.vp.load.nxv1i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret %load } @@ -298,9 +288,7 @@ define @vpload_nxv2f16_allones_mask(ptr %ptr, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.vp.load.nxv2f16.p0(ptr %ptr, %b, i32 %evl) + %load = call @llvm.vp.load.nxv2f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret %load } @@ -382,9 +370,7 @@ define @vpload_nxv8f32_allones_mask(ptr %ptr, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.vp.load.nxv8f32.p0(ptr %ptr, %b, i32 %evl) + %load = call @llvm.vp.load.nxv8f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret %load } @@ -430,9 +416,7 @@ define @vpload_nxv4f64_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call @llvm.vp.load.nxv4f64.p0(ptr %ptr, %b, i32 %evl) + %load = call @llvm.vp.load.nxv4f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index 4f67aac2d2d2b..76efdda15bf77 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -66,9 +66,7 @@ define @vpmerge_vi_nxv1i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv1i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv1i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -103,9 +101,7 @@ define @vpmerge_vi_nxv2i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv2i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv2i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -140,9 +136,7 @@ define @vpmerge_vi_nxv3i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv3i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv3i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -177,9 +171,7 @@ define @vpmerge_vi_nxv4i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv4i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv4i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -214,9 +206,7 @@ define @vpmerge_vi_nxv8i7( %vb, poison, i7 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv8i7( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv8i7( %m, splat (i7 2), %vb, i32 %evl) ret %v } @@ -251,9 +241,7 @@ define @vpmerge_vi_nxv8i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv8i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv8i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -288,9 +276,7 @@ define @vpmerge_vi_nxv16i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv16i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv16i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -325,9 +311,7 @@ define @vpmerge_vi_nxv32i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv32i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv32i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -362,9 +346,7 @@ define @vpmerge_vi_nxv64i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv64i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv64i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -464,9 +446,7 @@ define @vpmerge_vi_nxv128i8( %vb, poison, i8 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv128i8( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv128i8( %m, splat (i8 2), %vb, i32 %evl) ret %v } @@ -501,9 +481,7 @@ define @vpmerge_vi_nxv1i16( %vb, poison, i16 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv1i16( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv1i16( %m, splat (i16 2), %vb, i32 %evl) ret %v } @@ -538,9 +516,7 @@ define @vpmerge_vi_nxv2i16( %vb, poison, i16 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv2i16( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv2i16( %m, splat (i16 2), %vb, i32 %evl) ret %v } @@ -575,9 +551,7 @@ define @vpmerge_vi_nxv4i16( %vb, poison, i16 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv4i16( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv4i16( %m, splat (i16 2), %vb, i32 %evl) ret %v } @@ -612,9 +586,7 @@ define @vpmerge_vi_nxv8i16( %vb, poison, i16 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv8i16( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv8i16( %m, splat (i16 2), %vb, i32 %evl) ret %v } @@ -649,9 +621,7 @@ define @vpmerge_vi_nxv16i16( %vb, poison, i16 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv16i16( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv16i16( %m, splat (i16 2), %vb, i32 %evl) ret %v } @@ -686,9 +656,7 @@ define @vpmerge_vi_nxv32i16( %vb, poison, i16 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv32i16( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv32i16( %m, splat (i16 2), %vb, i32 %evl) ret %v } @@ -723,9 +691,7 @@ define @vpmerge_vi_nxv1i32( %vb, poison, i32 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv1i32( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv1i32( %m, splat (i32 2), %vb, i32 %evl) ret %v } @@ -760,9 +726,7 @@ define @vpmerge_vi_nxv2i32( %vb, poison, i32 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv2i32( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv2i32( %m, splat (i32 2), %vb, i32 %evl) ret %v } @@ -797,9 +761,7 @@ define @vpmerge_vi_nxv4i32( %vb, poison, i32 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv4i32( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv4i32( %m, splat (i32 2), %vb, i32 %evl) ret %v } @@ -834,9 +796,7 @@ define @vpmerge_vi_nxv8i32( %vb, poison, i32 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv8i32( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv8i32( %m, splat (i32 2), %vb, i32 %evl) ret %v } @@ -871,9 +831,7 @@ define @vpmerge_vi_nxv16i32( %vb, poison, i32 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv16i32( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv16i32( %m, splat (i32 2), %vb, i32 %evl) ret %v } @@ -920,9 +878,7 @@ define @vpmerge_vi_nxv1i64( %vb, poison, i64 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv1i64( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv1i64( %m, splat (i64 2), %vb, i32 %evl) ret %v } @@ -969,9 +925,7 @@ define @vpmerge_vi_nxv2i64( %vb, poison, i64 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv2i64( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv2i64( %m, splat (i64 2), %vb, i32 %evl) ret %v } @@ -1018,9 +972,7 @@ define @vpmerge_vi_nxv4i64( %vb, poison, i64 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv4i64( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv4i64( %m, splat (i64 2), %vb, i32 %evl) ret %v } @@ -1067,9 +1019,7 @@ define @vpmerge_vi_nxv8i64( %vb, poison, i64 2, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv8i64( %m, %va, %vb, i32 %evl) + %v = call @llvm.vp.merge.nxv8i64( %m, splat (i64 2), %vb, i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index a907e149b167f..38e4aab4deb34 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -145,9 +145,7 @@ define void @vpscatter_truemask_nxv4i8( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.vp.scatter.nxv4i8.nxv4p0( %val, %ptrs, %mtrue, i32 %evl) + call void @llvm.vp.scatter.nxv4i8.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) ret void } @@ -302,9 +300,7 @@ define void @vpscatter_truemask_nxv4i16( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.vp.scatter.nxv4i16.nxv4p0( %val, %ptrs, %mtrue, i32 %evl) + call void @llvm.vp.scatter.nxv4i16.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) ret void } @@ -604,9 +600,7 @@ define void @vpscatter_truemask_nxv4i32( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.vp.scatter.nxv4i32.nxv4p0( %val, %ptrs, %mtrue, i32 %evl) + call void @llvm.vp.scatter.nxv4i32.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) ret void } @@ -858,9 +852,7 @@ define void @vpscatter_truemask_nxv4i64( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.vp.scatter.nxv4i64.nxv4p0( %val, %ptrs, %mtrue, i32 %evl) + call void @llvm.vp.scatter.nxv4i64.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) ret void } @@ -1180,9 +1172,7 @@ define void @vpscatter_truemask_nxv4f16( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.vp.scatter.nxv4f16.nxv4p0( %val, %ptrs, %mtrue, i32 %evl) + call void @llvm.vp.scatter.nxv4f16.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) ret void } @@ -1361,9 +1351,7 @@ define void @vpscatter_truemask_nxv4f32( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.vp.scatter.nxv4f32.nxv4p0( %val, %ptrs, %mtrue, i32 %evl) + call void @llvm.vp.scatter.nxv4f32.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) ret void } @@ -1615,9 +1603,7 @@ define void @vpscatter_truemask_nxv4f64( %val, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - call void @llvm.vp.scatter.nxv4f64.nxv4p0( %val, %ptrs, %mtrue, i32 %evl) + call void @llvm.vp.scatter.nxv4f64.nxv4p0( %val, %ptrs, splat (i1 1), i32 %evl) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 8b27a61e243db..c12fc0497742a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -358,9 +358,7 @@ define void @vpstore_nxv1i8_allones_mask( %val, ptr %ptr, i32 z ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - call void @llvm.vp.store.nxv1i8.p0( %val, ptr %ptr, %b, i32 %evl) + call void @llvm.vp.store.nxv1i8.p0( %val, ptr %ptr, splat (i1 true), i32 %evl) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll index 1eafb3bdfed2c..3a6ae5fdb2107 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -39,9 +39,7 @@ define @vrem_vi_nxv1i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i8 -7) ret %vc } @@ -93,9 +91,7 @@ define @vrem_vi_nxv2i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i8 -7) ret %vc } @@ -147,9 +143,7 @@ define @vrem_vi_nxv4i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i8 -7) ret %vc } @@ -201,9 +195,7 @@ define @vrem_vi_nxv8i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i8 -7) ret %vc } @@ -255,9 +247,7 @@ define @vrem_vi_nxv16i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i8 -7) ret %vc } @@ -309,9 +299,7 @@ define @vrem_vi_nxv32i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i8 -7) ret %vc } @@ -363,9 +351,7 @@ define @vrem_vi_nxv64i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i8 -7) ret %vc } @@ -404,9 +390,7 @@ define @vrem_vi_nxv1i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i16 -7) ret %vc } @@ -458,9 +442,7 @@ define @vrem_vi_nxv2i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i16 -7) ret %vc } @@ -512,9 +494,7 @@ define @vrem_vi_nxv4i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i16 -7) ret %vc } @@ -566,9 +546,7 @@ define @vrem_vi_nxv8i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i16 -7) ret %vc } @@ -620,9 +598,7 @@ define @vrem_vi_nxv16i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i16 -7) ret %vc } @@ -674,9 +650,7 @@ define @vrem_vi_nxv32i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i16 -7) ret %vc } @@ -730,9 +704,7 @@ define @vrem_vi_nxv1i32_0( %va) { ; RV64-NEXT: li a0, -7 ; RV64-NEXT: vnmsac.vx v8, a0, v9 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i32 -7) ret %vc } @@ -786,9 +758,7 @@ define @vrem_vi_nxv2i32_0( %va) { ; RV64-NEXT: li a0, -7 ; RV64-NEXT: vnmsac.vx v8, a0, v9 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i32 -7) ret %vc } @@ -842,9 +812,7 @@ define @vrem_vi_nxv4i32_0( %va) { ; RV64-NEXT: li a0, -7 ; RV64-NEXT: vnmsac.vx v8, a0, v10 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i32 -7) ret %vc } @@ -898,9 +866,7 @@ define @vrem_vi_nxv8i32_0( %va) { ; RV64-NEXT: li a0, -7 ; RV64-NEXT: vnmsac.vx v8, a0, v12 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i32 -7) ret %vc } @@ -954,9 +920,7 @@ define @vrem_vi_nxv16i32_0( %va) { ; RV64-NEXT: li a0, -7 ; RV64-NEXT: vnmsac.vx v8, a0, v16 ; RV64-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i32 -7) ret %vc } @@ -1039,9 +1003,7 @@ define @vrem_vi_nxv1i64_0( %va) { ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v9 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i64 -7) ret %vc } @@ -1124,9 +1086,7 @@ define @vrem_vi_nxv2i64_0( %va) { ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v10 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i64 -7) ret %vc } @@ -1209,9 +1169,7 @@ define @vrem_vi_nxv4i64_0( %va) { ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v12 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i64 -7) ret %vc } @@ -1294,8 +1252,6 @@ define @vrem_vi_nxv8i64_0( %va) { ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v16 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = srem %va, %splat + %vc = srem %va, splat (i64 -7) ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll index 74a8fce1fcd7f..cf85fd827b51f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll @@ -39,9 +39,7 @@ define @vrem_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -65,9 +63,7 @@ define @vrem_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -89,9 +85,7 @@ define @vrem_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -115,9 +109,7 @@ define @vrem_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -151,9 +143,7 @@ define @vrem_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -177,9 +167,7 @@ define @vrem_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -201,9 +189,7 @@ define @vrem_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -227,9 +213,7 @@ define @vrem_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -251,9 +235,7 @@ define @vrem_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -277,9 +259,7 @@ define @vrem_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -301,9 +281,7 @@ define @vrem_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -327,9 +305,7 @@ define @vrem_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -351,9 +327,7 @@ define @vrem_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -377,9 +351,7 @@ define @vrem_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -401,9 +373,7 @@ define @vrem_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -427,9 +397,7 @@ define @vrem_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -451,9 +419,7 @@ define @vrem_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -477,9 +443,7 @@ define @vrem_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -501,9 +465,7 @@ define @vrem_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -527,9 +489,7 @@ define @vrem_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -551,9 +511,7 @@ define @vrem_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -577,9 +535,7 @@ define @vrem_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -601,9 +557,7 @@ define @vrem_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -627,9 +581,7 @@ define @vrem_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -651,9 +603,7 @@ define @vrem_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -677,9 +627,7 @@ define @vrem_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -701,9 +649,7 @@ define @vrem_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -727,9 +673,7 @@ define @vrem_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -751,9 +695,7 @@ define @vrem_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -777,9 +719,7 @@ define @vrem_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -801,9 +741,7 @@ define @vrem_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -827,9 +765,7 @@ define @vrem_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -851,9 +787,7 @@ define @vrem_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -877,9 +811,7 @@ define @vrem_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -901,9 +833,7 @@ define @vrem_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrem.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -927,9 +857,7 @@ define @vrem_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -951,9 +879,7 @@ define @vrem_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1005,9 +931,7 @@ define @vrem_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1029,9 +953,7 @@ define @vrem_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1083,9 +1005,7 @@ define @vrem_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1107,9 +1027,7 @@ define @vrem_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1161,9 +1079,7 @@ define @vrem_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1185,9 +1101,7 @@ define @vrem_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1239,8 +1153,6 @@ define @vrem_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.srem.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.srem.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll index 428d071cac399..ed40f5af4fa4c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll @@ -36,9 +36,7 @@ define @vremu_vi_nxv1i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i8 -7) ret %vc } @@ -74,9 +72,7 @@ define @vremu_vi_nxv2i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i8 -7) ret %vc } @@ -112,9 +108,7 @@ define @vremu_vi_nxv4i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i8 -7) ret %vc } @@ -150,9 +144,7 @@ define @vremu_vi_nxv8i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i8 -7) ret %vc } @@ -188,9 +180,7 @@ define @vremu_vi_nxv16i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i8 -7) ret %vc } @@ -226,9 +216,7 @@ define @vremu_vi_nxv32i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i8 -7) ret %vc } @@ -264,9 +252,7 @@ define @vremu_vi_nxv64i8_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i8 -7) ret %vc } @@ -303,9 +289,7 @@ define @vremu_vi_nxv1i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i16 -7) ret %vc } @@ -342,9 +326,7 @@ define @vremu_vi_nxv2i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i16 -7) ret %vc } @@ -381,9 +363,7 @@ define @vremu_vi_nxv4i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i16 -7) ret %vc } @@ -420,9 +400,7 @@ define @vremu_vi_nxv8i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i16 -7) ret %vc } @@ -459,9 +437,7 @@ define @vremu_vi_nxv16i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i16 -7) ret %vc } @@ -498,9 +474,7 @@ define @vremu_vi_nxv32i16_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i16 -7) ret %vc } @@ -537,9 +511,7 @@ define @vremu_vi_nxv1i32_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i32 -7) ret %vc } @@ -576,9 +548,7 @@ define @vremu_vi_nxv2i32_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v9 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i32 -7) ret %vc } @@ -615,9 +585,7 @@ define @vremu_vi_nxv4i32_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v10 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i32 -7) ret %vc } @@ -654,9 +622,7 @@ define @vremu_vi_nxv8i32_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i32 -7) ret %vc } @@ -693,9 +659,7 @@ define @vremu_vi_nxv16i32_0( %va) { ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i32 -7) ret %vc } @@ -773,9 +737,7 @@ define @vremu_vi_nxv1i64_0( %va) { ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v9 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i64 -7) ret %vc } @@ -786,9 +748,7 @@ define @vremu_vi_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i64 16) ret %vc } @@ -803,9 +763,7 @@ define @vremu_vi_nxv1i64_2( %va, poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %splat, %vb + %vc = shl splat (i64 16), %vb %vd = urem %va, %vc ret %vd } @@ -884,9 +842,7 @@ define @vremu_vi_nxv2i64_0( %va) { ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v10 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i64 -7) ret %vc } @@ -897,9 +853,7 @@ define @vremu_vi_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i64 16) ret %vc } @@ -914,9 +868,7 @@ define @vremu_vi_nxv2i64_2( %va, poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %splat, %vb + %vc = shl splat (i64 16), %vb %vd = urem %va, %vc ret %vd } @@ -995,9 +947,7 @@ define @vremu_vi_nxv4i64_0( %va) { ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v12 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i64 -7) ret %vc } @@ -1008,9 +958,7 @@ define @vremu_vi_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i64 16) ret %vc } @@ -1025,9 +973,7 @@ define @vremu_vi_nxv4i64_2( %va, poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %splat, %vb + %vc = shl splat (i64 16), %vb %vd = urem %va, %vc ret %vd } @@ -1106,9 +1052,7 @@ define @vremu_vi_nxv8i64_0( %va) { ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v16 ; RV64-V-NEXT: ret - %head = insertelement poison, i64 -7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i64 -7) ret %vc } @@ -1119,9 +1063,7 @@ define @vremu_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 15 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = urem %va, %splat + %vc = urem %va, splat (i64 16) ret %vc } @@ -1136,9 +1078,7 @@ define @vremu_vi_nxv8i64_2( %va, poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %splat, %vb + %vc = shl splat (i64 16), %vb %vd = urem %va, %vc ret %vd } diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll index 1be66bd74f8e1..61bdd5b8d3c8a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll @@ -41,9 +41,7 @@ define @vremu_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -67,9 +65,7 @@ define @vremu_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -91,9 +87,7 @@ define @vremu_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -117,9 +111,7 @@ define @vremu_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -153,9 +145,7 @@ define @vremu_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -179,9 +169,7 @@ define @vremu_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -203,9 +191,7 @@ define @vremu_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -229,9 +215,7 @@ define @vremu_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -253,9 +237,7 @@ define @vremu_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -279,9 +261,7 @@ define @vremu_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -303,9 +283,7 @@ define @vremu_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -329,9 +307,7 @@ define @vremu_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -353,9 +329,7 @@ define @vremu_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -379,9 +353,7 @@ define @vremu_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -403,9 +375,7 @@ define @vremu_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -429,9 +399,7 @@ define @vremu_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -453,9 +421,7 @@ define @vremu_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -479,9 +445,7 @@ define @vremu_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -503,9 +467,7 @@ define @vremu_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -529,9 +491,7 @@ define @vremu_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -553,9 +513,7 @@ define @vremu_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -579,9 +537,7 @@ define @vremu_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -603,9 +559,7 @@ define @vremu_vv_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -629,9 +583,7 @@ define @vremu_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -653,9 +605,7 @@ define @vremu_vv_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -679,9 +629,7 @@ define @vremu_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -703,9 +651,7 @@ define @vremu_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -729,9 +675,7 @@ define @vremu_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -753,9 +697,7 @@ define @vremu_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -779,9 +721,7 @@ define @vremu_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -803,9 +743,7 @@ define @vremu_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -829,9 +767,7 @@ define @vremu_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -853,9 +789,7 @@ define @vremu_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -879,9 +813,7 @@ define @vremu_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -903,9 +835,7 @@ define @vremu_vv_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vremu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -929,9 +859,7 @@ define @vremu_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -953,9 +881,7 @@ define @vremu_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1007,9 +933,7 @@ define @vremu_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1031,9 +955,7 @@ define @vremu_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1085,9 +1007,7 @@ define @vremu_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1109,9 +1029,7 @@ define @vremu_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1163,9 +1081,7 @@ define @vremu_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1187,9 +1103,7 @@ define @vremu_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1241,8 +1155,6 @@ define @vremu_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.urem.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.urem.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll index 127d3ffd79315..e97b1f41ad3d3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll @@ -20,9 +20,7 @@ define @vrsub_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i8 -4), %va ret %vc } @@ -44,9 +42,7 @@ define @vrsub_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i8 -4), %va ret %vc } @@ -68,9 +64,7 @@ define @vrsub_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i8 -4), %va ret %vc } @@ -92,9 +86,7 @@ define @vrsub_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i8 -4), %va ret %vc } @@ -116,9 +108,7 @@ define @vrsub_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i8 -4), %va ret %vc } @@ -140,9 +130,7 @@ define @vrsub_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i8 -4), %va ret %vc } @@ -164,9 +152,7 @@ define @vrsub_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i8 -4), %va ret %vc } @@ -188,9 +174,7 @@ define @vrsub_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i16 -4), %va ret %vc } @@ -212,9 +196,7 @@ define @vrsub_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i16 -4), %va ret %vc } @@ -236,9 +218,7 @@ define @vrsub_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i16 -4), %va ret %vc } @@ -260,9 +240,7 @@ define @vrsub_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i16 -4), %va ret %vc } @@ -284,9 +262,7 @@ define @vrsub_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i16 -4), %va ret %vc } @@ -308,9 +284,7 @@ define @vrsub_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i16 -4), %va ret %vc } @@ -332,9 +306,7 @@ define @vrsub_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i32 -4), %va ret %vc } @@ -356,9 +328,7 @@ define @vrsub_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i32 -4), %va ret %vc } @@ -380,9 +350,7 @@ define @vrsub_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i32 -4), %va ret %vc } @@ -404,9 +372,7 @@ define @vrsub_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i32 -4), %va ret %vc } @@ -428,9 +394,7 @@ define @vrsub_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i32 -4), %va ret %vc } @@ -465,9 +429,7 @@ define @vrsub_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i64 -4), %va ret %vc } @@ -502,9 +464,7 @@ define @vrsub_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i64 -4), %va ret %vc } @@ -539,9 +499,7 @@ define @vrsub_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i64 -4), %va ret %vc } @@ -576,8 +534,6 @@ define @vrsub_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, -4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %splat, %va + %vc = sub splat (i64 -4), %va ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll index a6eb4de03e1cc..be372c9aa54d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll @@ -26,9 +26,7 @@ define @vrsub_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i8( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -38,9 +36,7 @@ define @vrsub_vi_nxv1i8( %va, poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i8( splat (i8 2), %va, %m, i32 %evl) ret %v } @@ -50,11 +46,7 @@ define @vrsub_vi_nxv1i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i8( splat (i8 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -80,9 +72,7 @@ define @vrsub_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i8( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -92,9 +82,7 @@ define @vrsub_vi_nxv2i8( %va, poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i8( splat (i8 2), %va, %m, i32 %evl) ret %v } @@ -104,11 +92,7 @@ define @vrsub_vi_nxv2i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i8( splat (i8 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -134,9 +118,7 @@ define @vrsub_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i8( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -146,9 +128,7 @@ define @vrsub_vi_nxv4i8( %va, poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i8( splat (i8 2), %va, %m, i32 %evl) ret %v } @@ -158,11 +138,7 @@ define @vrsub_vi_nxv4i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i8( splat (i8 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -188,9 +164,7 @@ define @vrsub_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i8( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -200,9 +174,7 @@ define @vrsub_vi_nxv8i8( %va, poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i8( splat (i8 2), %va, %m, i32 %evl) ret %v } @@ -212,11 +184,7 @@ define @vrsub_vi_nxv8i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i8( splat (i8 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -242,9 +210,7 @@ define @vrsub_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i8( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -254,9 +220,7 @@ define @vrsub_vi_nxv16i8( %va, poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i8( splat (i8 2), %va, %m, i32 %evl) ret %v } @@ -266,11 +230,7 @@ define @vrsub_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i8( splat (i8 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -296,9 +256,7 @@ define @vrsub_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i8( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -308,9 +266,7 @@ define @vrsub_vi_nxv32i8( %va, poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i8( splat (i8 2), %va, %m, i32 %evl) ret %v } @@ -320,11 +276,7 @@ define @vrsub_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i8( splat (i8 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -350,9 +302,7 @@ define @vrsub_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv64i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv64i8( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -362,9 +312,7 @@ define @vrsub_vi_nxv64i8( %va, poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv64i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv64i8( splat (i8 2), %va, %m, i32 %evl) ret %v } @@ -374,11 +322,7 @@ define @vrsub_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv64i8( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv64i8( splat (i8 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -404,9 +348,7 @@ define @vrsub_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -416,9 +358,7 @@ define @vrsub_vi_nxv1i16( %va, poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i16( splat (i16 2), %va, %m, i32 %evl) ret %v } @@ -428,11 +368,7 @@ define @vrsub_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i16( splat (i16 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -458,9 +394,7 @@ define @vrsub_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -470,9 +404,7 @@ define @vrsub_vi_nxv2i16( %va, poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i16( splat (i16 2), %va, %m, i32 %evl) ret %v } @@ -482,11 +414,7 @@ define @vrsub_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i16( splat (i16 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -512,9 +440,7 @@ define @vrsub_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -524,9 +450,7 @@ define @vrsub_vi_nxv4i16( %va, poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i16( splat (i16 2), %va, %m, i32 %evl) ret %v } @@ -536,11 +460,7 @@ define @vrsub_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i16( splat (i16 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -566,9 +486,7 @@ define @vrsub_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -578,9 +496,7 @@ define @vrsub_vi_nxv8i16( %va, poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i16( splat (i16 2), %va, %m, i32 %evl) ret %v } @@ -590,11 +506,7 @@ define @vrsub_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i16( splat (i16 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -620,9 +532,7 @@ define @vrsub_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -632,9 +542,7 @@ define @vrsub_vi_nxv16i16( %va, poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i16( splat (i16 2), %va, %m, i32 %evl) ret %v } @@ -644,11 +552,7 @@ define @vrsub_vi_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i16( splat (i16 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -674,9 +578,7 @@ define @vrsub_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i16( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -686,9 +588,7 @@ define @vrsub_vi_nxv32i16( %va, poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i16( splat (i16 2), %va, %m, i32 %evl) ret %v } @@ -698,11 +598,7 @@ define @vrsub_vi_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i16( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i16( splat (i16 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -728,9 +624,7 @@ define @vrsub_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -740,9 +634,7 @@ define @vrsub_vi_nxv1i32( %va, poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i32( splat (i32 2), %va, %m, i32 %evl) ret %v } @@ -752,11 +644,7 @@ define @vrsub_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i32( splat (i32 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -782,9 +670,7 @@ define @vrsub_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -794,9 +680,7 @@ define @vrsub_vi_nxv2i32( %va, poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i32( splat (i32 2), %va, %m, i32 %evl) ret %v } @@ -806,11 +690,7 @@ define @vrsub_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i32( splat (i32 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -836,9 +716,7 @@ define @vrsub_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -848,9 +726,7 @@ define @vrsub_vi_nxv4i32( %va, poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i32( splat (i32 2), %va, %m, i32 %evl) ret %v } @@ -860,11 +736,7 @@ define @vrsub_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i32( splat (i32 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -890,9 +762,7 @@ define @vrsub_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -902,9 +772,7 @@ define @vrsub_vi_nxv8i32( %va, poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i32( splat (i32 2), %va, %m, i32 %evl) ret %v } @@ -914,11 +782,7 @@ define @vrsub_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i32( splat (i32 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -944,9 +808,7 @@ define @vrsub_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i32( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -956,9 +818,7 @@ define @vrsub_vi_nxv16i32( %va, poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i32( splat (i32 2), %va, %m, i32 %evl) ret %v } @@ -968,11 +828,7 @@ define @vrsub_vi_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i32( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i32( splat (i32 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -1026,9 +882,7 @@ define @vrsub_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -1038,9 +892,7 @@ define @vrsub_vi_nxv1i64( %va, poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i64( splat (i64 2), %va, %m, i32 %evl) ret %v } @@ -1050,11 +902,7 @@ define @vrsub_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i64( splat (i64 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -1108,9 +956,7 @@ define @vrsub_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -1120,9 +966,7 @@ define @vrsub_vi_nxv2i64( %va, poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i64( splat (i64 2), %va, %m, i32 %evl) ret %v } @@ -1132,11 +976,7 @@ define @vrsub_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i64( splat (i64 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -1190,9 +1030,7 @@ define @vrsub_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -1202,9 +1040,7 @@ define @vrsub_vi_nxv4i64( %va, poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i64( splat (i64 2), %va, %m, i32 %evl) ret %v } @@ -1214,11 +1050,7 @@ define @vrsub_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i64( splat (i64 2), %va, splat (i1 true), i32 %evl) ret %v } @@ -1272,9 +1104,7 @@ define @vrsub_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i64( %vb, %va, splat (i1 true), i32 %evl) ret %v } @@ -1284,9 +1114,7 @@ define @vrsub_vi_nxv8i64( %va, poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i64( splat (i64 2), %va, %m, i32 %evl) ret %v } @@ -1296,10 +1124,6 @@ define @vrsub_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v8, v8, 2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i64( %vb, %va, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i64( splat (i64 2), %va, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll index 38edd19096f84..6a8b801254057 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll @@ -34,9 +34,7 @@ define @sadd_nxv1i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv1i8( %va, %vb) + %v = call @llvm.sadd.sat.nxv1i8( %va, splat (i8 5)) ret %v } @@ -70,9 +68,7 @@ define @sadd_nxv2i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv2i8( %va, %vb) + %v = call @llvm.sadd.sat.nxv2i8( %va, splat (i8 5)) ret %v } @@ -106,9 +102,7 @@ define @sadd_nxv4i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv4i8( %va, %vb) + %v = call @llvm.sadd.sat.nxv4i8( %va, splat (i8 5)) ret %v } @@ -142,9 +136,7 @@ define @sadd_nxv8i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv8i8( %va, %vb) + %v = call @llvm.sadd.sat.nxv8i8( %va, splat (i8 5)) ret %v } @@ -178,9 +170,7 @@ define @sadd_nxv16i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv16i8( %va, %vb) + %v = call @llvm.sadd.sat.nxv16i8( %va, splat (i8 5)) ret %v } @@ -214,9 +204,7 @@ define @sadd_nxv32i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv32i8( %va, %vb) + %v = call @llvm.sadd.sat.nxv32i8( %va, splat (i8 5)) ret %v } @@ -250,9 +238,7 @@ define @sadd_nxv64i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv64i8( %va, %vb) + %v = call @llvm.sadd.sat.nxv64i8( %va, splat (i8 5)) ret %v } @@ -286,9 +272,7 @@ define @sadd_nxv1i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv1i16( %va, %vb) + %v = call @llvm.sadd.sat.nxv1i16( %va, splat (i16 5)) ret %v } @@ -322,9 +306,7 @@ define @sadd_nxv2i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv2i16( %va, %vb) + %v = call @llvm.sadd.sat.nxv2i16( %va, splat (i16 5)) ret %v } @@ -358,9 +340,7 @@ define @sadd_nxv4i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv4i16( %va, %vb) + %v = call @llvm.sadd.sat.nxv4i16( %va, splat (i16 5)) ret %v } @@ -394,9 +374,7 @@ define @sadd_nxv8i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv8i16( %va, %vb) + %v = call @llvm.sadd.sat.nxv8i16( %va, splat (i16 5)) ret %v } @@ -430,9 +408,7 @@ define @sadd_nxv16i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv16i16( %va, %vb) + %v = call @llvm.sadd.sat.nxv16i16( %va, splat (i16 5)) ret %v } @@ -466,9 +442,7 @@ define @sadd_nxv32i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv32i16( %va, %vb) + %v = call @llvm.sadd.sat.nxv32i16( %va, splat (i16 5)) ret %v } @@ -502,9 +476,7 @@ define @sadd_nxv1i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv1i32( %va, %vb) + %v = call @llvm.sadd.sat.nxv1i32( %va, splat (i32 5)) ret %v } @@ -538,9 +510,7 @@ define @sadd_nxv2i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv2i32( %va, %vb) + %v = call @llvm.sadd.sat.nxv2i32( %va, splat (i32 5)) ret %v } @@ -574,9 +544,7 @@ define @sadd_nxv4i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv4i32( %va, %vb) + %v = call @llvm.sadd.sat.nxv4i32( %va, splat (i32 5)) ret %v } @@ -610,9 +578,7 @@ define @sadd_nxv8i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv8i32( %va, %vb) + %v = call @llvm.sadd.sat.nxv8i32( %va, splat (i32 5)) ret %v } @@ -646,9 +612,7 @@ define @sadd_nxv16i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv16i32( %va, %vb) + %v = call @llvm.sadd.sat.nxv16i32( %va, splat (i32 5)) ret %v } @@ -695,9 +659,7 @@ define @sadd_nxv1i64_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv1i64( %va, %vb) + %v = call @llvm.sadd.sat.nxv1i64( %va, splat (i64 5)) ret %v } @@ -744,9 +706,7 @@ define @sadd_nxv2i64_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv2i64( %va, %vb) + %v = call @llvm.sadd.sat.nxv2i64( %va, splat (i64 5)) ret %v } @@ -793,9 +753,7 @@ define @sadd_nxv4i64_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv4i64( %va, %vb) + %v = call @llvm.sadd.sat.nxv4i64( %va, splat (i64 5)) ret %v } @@ -842,8 +800,6 @@ define @sadd_nxv8i64_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.sadd.sat.nxv8i64( %va, %vb) + %v = call @llvm.sadd.sat.nxv8i64( %va, splat (i64 5)) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll index caaeae55ed78e..f9ea5143cfcb7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll @@ -43,9 +43,7 @@ define @vsadd_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -81,9 +79,7 @@ define @vsadd_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -93,9 +89,7 @@ define @vsadd_vi_nxv1i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -105,11 +99,7 @@ define @vsadd_vi_nxv1i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -131,9 +121,7 @@ define @vsadd_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -157,9 +145,7 @@ define @vsadd_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -169,9 +155,7 @@ define @vsadd_vi_nxv2i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -181,11 +165,7 @@ define @vsadd_vi_nxv2i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -207,9 +187,7 @@ define @vsadd_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -233,9 +211,7 @@ define @vsadd_vx_nxv3i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -245,9 +221,7 @@ define @vsadd_vi_nxv3i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv3i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -257,11 +231,7 @@ define @vsadd_vi_nxv3i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv3i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -283,9 +253,7 @@ define @vsadd_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -309,9 +277,7 @@ define @vsadd_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -321,9 +287,7 @@ define @vsadd_vi_nxv4i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -333,11 +297,7 @@ define @vsadd_vi_nxv4i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -359,9 +319,7 @@ define @vsadd_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -385,9 +343,7 @@ define @vsadd_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -397,9 +353,7 @@ define @vsadd_vi_nxv8i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -409,11 +363,7 @@ define @vsadd_vi_nxv8i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -435,9 +385,7 @@ define @vsadd_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -461,9 +409,7 @@ define @vsadd_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -473,9 +419,7 @@ define @vsadd_vi_nxv16i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -485,11 +429,7 @@ define @vsadd_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -511,9 +451,7 @@ define @vsadd_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -537,9 +475,7 @@ define @vsadd_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -549,9 +485,7 @@ define @vsadd_vi_nxv32i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -561,11 +495,7 @@ define @vsadd_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -587,9 +517,7 @@ define @vsadd_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -613,9 +541,7 @@ define @vsadd_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -625,9 +551,7 @@ define @vsadd_vi_nxv64i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv64i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -637,11 +561,7 @@ define @vsadd_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv64i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -671,9 +591,7 @@ define @vsadd_vi_nxv128i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -695,11 +613,7 @@ define @vsadd_vi_nxv128i8_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv128i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -721,9 +635,7 @@ define @vsadd_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -747,9 +659,7 @@ define @vsadd_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -759,9 +669,7 @@ define @vsadd_vi_nxv1i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -771,11 +679,7 @@ define @vsadd_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -797,9 +701,7 @@ define @vsadd_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -823,9 +725,7 @@ define @vsadd_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -835,9 +735,7 @@ define @vsadd_vi_nxv2i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -847,11 +745,7 @@ define @vsadd_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -873,9 +767,7 @@ define @vsadd_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -899,9 +791,7 @@ define @vsadd_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -911,9 +801,7 @@ define @vsadd_vi_nxv4i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -923,11 +811,7 @@ define @vsadd_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -949,9 +833,7 @@ define @vsadd_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -975,9 +857,7 @@ define @vsadd_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -987,9 +867,7 @@ define @vsadd_vi_nxv8i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -999,11 +877,7 @@ define @vsadd_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1025,9 +899,7 @@ define @vsadd_vv_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1051,9 +923,7 @@ define @vsadd_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1063,9 +933,7 @@ define @vsadd_vi_nxv16i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1075,11 +943,7 @@ define @vsadd_vi_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1101,9 +965,7 @@ define @vsadd_vv_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1127,9 +989,7 @@ define @vsadd_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1139,9 +999,7 @@ define @vsadd_vi_nxv32i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1151,11 +1009,7 @@ define @vsadd_vi_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1177,9 +1031,7 @@ define @vsadd_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1203,9 +1055,7 @@ define @vsadd_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1215,9 +1065,7 @@ define @vsadd_vi_nxv1i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1227,11 +1075,7 @@ define @vsadd_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1253,9 +1097,7 @@ define @vsadd_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1279,9 +1121,7 @@ define @vsadd_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1291,9 +1131,7 @@ define @vsadd_vi_nxv2i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1303,11 +1141,7 @@ define @vsadd_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1329,9 +1163,7 @@ define @vsadd_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1355,9 +1187,7 @@ define @vsadd_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1367,9 +1197,7 @@ define @vsadd_vi_nxv4i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1379,11 +1207,7 @@ define @vsadd_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1405,9 +1229,7 @@ define @vsadd_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1431,9 +1253,7 @@ define @vsadd_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1443,9 +1263,7 @@ define @vsadd_vi_nxv8i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1455,11 +1273,7 @@ define @vsadd_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1481,9 +1295,7 @@ define @vsadd_vv_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsadd.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1507,9 +1319,7 @@ define @vsadd_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1519,9 +1329,7 @@ define @vsadd_vi_nxv16i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1531,11 +1339,7 @@ define @vsadd_vi_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv16i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1566,9 +1370,7 @@ define @vsadd_vi_nxv32i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1590,11 +1392,7 @@ define @vsadd_vi_nxv32i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv32i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1616,9 +1414,7 @@ define @vsadd_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1670,9 +1466,7 @@ define @vsadd_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1682,9 +1476,7 @@ define @vsadd_vi_nxv1i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1694,11 +1486,7 @@ define @vsadd_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv1i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1720,9 +1508,7 @@ define @vsadd_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1774,9 +1560,7 @@ define @vsadd_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1786,9 +1570,7 @@ define @vsadd_vi_nxv2i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1798,11 +1580,7 @@ define @vsadd_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv2i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1824,9 +1602,7 @@ define @vsadd_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1878,9 +1654,7 @@ define @vsadd_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1890,9 +1664,7 @@ define @vsadd_vi_nxv4i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1902,11 +1674,7 @@ define @vsadd_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv4i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1928,9 +1696,7 @@ define @vsadd_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1982,9 +1748,7 @@ define @vsadd_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1994,9 +1758,7 @@ define @vsadd_vi_nxv8i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2006,10 +1768,6 @@ define @vsadd_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sadd.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sadd.sat.nxv8i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll index ac520f8911208..4fe765c34ba6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll @@ -34,9 +34,7 @@ define @uadd_nxv1i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv1i8( %va, %vb) + %v = call @llvm.uadd.sat.nxv1i8( %va, splat (i8 8)) ret %v } @@ -70,9 +68,7 @@ define @uadd_nxv2i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv2i8( %va, %vb) + %v = call @llvm.uadd.sat.nxv2i8( %va, splat (i8 8)) ret %v } @@ -106,9 +102,7 @@ define @uadd_nxv4i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv4i8( %va, %vb) + %v = call @llvm.uadd.sat.nxv4i8( %va, splat (i8 8)) ret %v } @@ -142,9 +136,7 @@ define @uadd_nxv8i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv8i8( %va, %vb) + %v = call @llvm.uadd.sat.nxv8i8( %va, splat (i8 8)) ret %v } @@ -178,9 +170,7 @@ define @uadd_nxv16i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv16i8( %va, %vb) + %v = call @llvm.uadd.sat.nxv16i8( %va, splat (i8 8)) ret %v } @@ -214,9 +204,7 @@ define @uadd_nxv32i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv32i8( %va, %vb) + %v = call @llvm.uadd.sat.nxv32i8( %va, splat (i8 8)) ret %v } @@ -250,9 +238,7 @@ define @uadd_nxv64i8_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv64i8( %va, %vb) + %v = call @llvm.uadd.sat.nxv64i8( %va, splat (i8 8)) ret %v } @@ -286,9 +272,7 @@ define @uadd_nxv1i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv1i16( %va, %vb) + %v = call @llvm.uadd.sat.nxv1i16( %va, splat (i16 8)) ret %v } @@ -322,9 +306,7 @@ define @uadd_nxv2i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv2i16( %va, %vb) + %v = call @llvm.uadd.sat.nxv2i16( %va, splat (i16 8)) ret %v } @@ -358,9 +340,7 @@ define @uadd_nxv4i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv4i16( %va, %vb) + %v = call @llvm.uadd.sat.nxv4i16( %va, splat (i16 8)) ret %v } @@ -394,9 +374,7 @@ define @uadd_nxv8i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv8i16( %va, %vb) + %v = call @llvm.uadd.sat.nxv8i16( %va, splat (i16 8)) ret %v } @@ -430,9 +408,7 @@ define @uadd_nxv16i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv16i16( %va, %vb) + %v = call @llvm.uadd.sat.nxv16i16( %va, splat (i16 8)) ret %v } @@ -466,9 +442,7 @@ define @uadd_nxv32i16_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv32i16( %va, %vb) + %v = call @llvm.uadd.sat.nxv32i16( %va, splat (i16 8)) ret %v } @@ -502,9 +476,7 @@ define @uadd_nxv1i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv1i32( %va, %vb) + %v = call @llvm.uadd.sat.nxv1i32( %va, splat (i32 8)) ret %v } @@ -538,9 +510,7 @@ define @uadd_nxv2i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv2i32( %va, %vb) + %v = call @llvm.uadd.sat.nxv2i32( %va, splat (i32 8)) ret %v } @@ -574,9 +544,7 @@ define @uadd_nxv4i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv4i32( %va, %vb) + %v = call @llvm.uadd.sat.nxv4i32( %va, splat (i32 8)) ret %v } @@ -610,9 +578,7 @@ define @uadd_nxv8i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv8i32( %va, %vb) + %v = call @llvm.uadd.sat.nxv8i32( %va, splat (i32 8)) ret %v } @@ -646,9 +612,7 @@ define @uadd_nxv16i32_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv16i32( %va, %vb) + %v = call @llvm.uadd.sat.nxv16i32( %va, splat (i32 8)) ret %v } @@ -695,9 +659,7 @@ define @uadd_nxv1i64_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv1i64( %va, %vb) + %v = call @llvm.uadd.sat.nxv1i64( %va, splat (i64 8)) ret %v } @@ -744,9 +706,7 @@ define @uadd_nxv2i64_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv2i64( %va, %vb) + %v = call @llvm.uadd.sat.nxv2i64( %va, splat (i64 8)) ret %v } @@ -793,9 +753,7 @@ define @uadd_nxv4i64_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv4i64( %va, %vb) + %v = call @llvm.uadd.sat.nxv4i64( %va, splat (i64 8)) ret %v } @@ -842,8 +800,6 @@ define @uadd_nxv8i64_vi( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, 8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 8, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.uadd.sat.nxv8i64( %va, %vb) + %v = call @llvm.uadd.sat.nxv8i64( %va, splat (i64 8)) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll index c0779e508c0a9..745b93b257085 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll @@ -42,9 +42,7 @@ define @vsaddu_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -80,9 +78,7 @@ define @vsaddu_vx_nxv1i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -92,9 +88,7 @@ define @vsaddu_vi_nxv1i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -104,11 +98,7 @@ define @vsaddu_vi_nxv1i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -130,9 +120,7 @@ define @vsaddu_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -156,9 +144,7 @@ define @vsaddu_vx_nxv2i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -168,9 +154,7 @@ define @vsaddu_vi_nxv2i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -180,11 +164,7 @@ define @vsaddu_vi_nxv2i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -206,9 +186,7 @@ define @vsaddu_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -232,9 +210,7 @@ define @vsaddu_vx_nxv3i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -244,9 +220,7 @@ define @vsaddu_vi_nxv3i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv3i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -256,11 +230,7 @@ define @vsaddu_vi_nxv3i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv3i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -282,9 +252,7 @@ define @vsaddu_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -308,9 +276,7 @@ define @vsaddu_vx_nxv4i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -320,9 +286,7 @@ define @vsaddu_vi_nxv4i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -332,11 +296,7 @@ define @vsaddu_vi_nxv4i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -358,9 +318,7 @@ define @vsaddu_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -384,9 +342,7 @@ define @vsaddu_vx_nxv8i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -396,9 +352,7 @@ define @vsaddu_vi_nxv8i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -408,11 +362,7 @@ define @vsaddu_vi_nxv8i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -434,9 +384,7 @@ define @vsaddu_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -460,9 +408,7 @@ define @vsaddu_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -472,9 +418,7 @@ define @vsaddu_vi_nxv16i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -484,11 +428,7 @@ define @vsaddu_vi_nxv16i8_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -510,9 +450,7 @@ define @vsaddu_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -536,9 +474,7 @@ define @vsaddu_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -548,9 +484,7 @@ define @vsaddu_vi_nxv32i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -560,11 +494,7 @@ define @vsaddu_vi_nxv32i8_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -586,9 +516,7 @@ define @vsaddu_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -612,9 +540,7 @@ define @vsaddu_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -624,9 +550,7 @@ define @vsaddu_vi_nxv64i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv64i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -636,11 +560,7 @@ define @vsaddu_vi_nxv64i8_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv64i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -670,9 +590,7 @@ define @vsaddu_vi_nxv128i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -694,11 +612,7 @@ define @vsaddu_vi_nxv128i8_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv128i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -720,9 +634,7 @@ define @vsaddu_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -746,9 +658,7 @@ define @vsaddu_vx_nxv1i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -758,9 +668,7 @@ define @vsaddu_vi_nxv1i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -770,11 +678,7 @@ define @vsaddu_vi_nxv1i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -796,9 +700,7 @@ define @vsaddu_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -822,9 +724,7 @@ define @vsaddu_vx_nxv2i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -834,9 +734,7 @@ define @vsaddu_vi_nxv2i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -846,11 +744,7 @@ define @vsaddu_vi_nxv2i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -872,9 +766,7 @@ define @vsaddu_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -898,9 +790,7 @@ define @vsaddu_vx_nxv4i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -910,9 +800,7 @@ define @vsaddu_vi_nxv4i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -922,11 +810,7 @@ define @vsaddu_vi_nxv4i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -948,9 +832,7 @@ define @vsaddu_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -974,9 +856,7 @@ define @vsaddu_vx_nxv8i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -986,9 +866,7 @@ define @vsaddu_vi_nxv8i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -998,11 +876,7 @@ define @vsaddu_vi_nxv8i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1024,9 +898,7 @@ define @vsaddu_vv_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1050,9 +922,7 @@ define @vsaddu_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1062,9 +932,7 @@ define @vsaddu_vi_nxv16i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1074,11 +942,7 @@ define @vsaddu_vi_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1100,9 +964,7 @@ define @vsaddu_vv_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1126,9 +988,7 @@ define @vsaddu_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1138,9 +998,7 @@ define @vsaddu_vi_nxv32i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1150,11 +1008,7 @@ define @vsaddu_vi_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1176,9 +1030,7 @@ define @vsaddu_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1202,9 +1054,7 @@ define @vsaddu_vx_nxv1i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1214,9 +1064,7 @@ define @vsaddu_vi_nxv1i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1226,11 +1074,7 @@ define @vsaddu_vi_nxv1i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1252,9 +1096,7 @@ define @vsaddu_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1278,9 +1120,7 @@ define @vsaddu_vx_nxv2i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1290,9 +1130,7 @@ define @vsaddu_vi_nxv2i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1302,11 +1140,7 @@ define @vsaddu_vi_nxv2i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1328,9 +1162,7 @@ define @vsaddu_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1354,9 +1186,7 @@ define @vsaddu_vx_nxv4i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1366,9 +1196,7 @@ define @vsaddu_vi_nxv4i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1378,11 +1206,7 @@ define @vsaddu_vi_nxv4i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1404,9 +1228,7 @@ define @vsaddu_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1430,9 +1252,7 @@ define @vsaddu_vx_nxv8i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1442,9 +1262,7 @@ define @vsaddu_vi_nxv8i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1454,11 +1272,7 @@ define @vsaddu_vi_nxv8i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1480,9 +1294,7 @@ define @vsaddu_vv_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsaddu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1506,9 +1318,7 @@ define @vsaddu_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1518,9 +1328,7 @@ define @vsaddu_vi_nxv16i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1530,11 +1338,7 @@ define @vsaddu_vi_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv16i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1565,9 +1369,7 @@ define @vsaddu_vi_nxv32i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1589,11 +1391,7 @@ define @vsaddu_vi_nxv32i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv32i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1615,9 +1413,7 @@ define @vsaddu_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1669,9 +1465,7 @@ define @vsaddu_vx_nxv1i64_unmasked( %va, i6 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1681,9 +1475,7 @@ define @vsaddu_vi_nxv1i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1693,11 +1485,7 @@ define @vsaddu_vi_nxv1i64_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv1i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1719,9 +1507,7 @@ define @vsaddu_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1773,9 +1559,7 @@ define @vsaddu_vx_nxv2i64_unmasked( %va, i6 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1785,9 +1569,7 @@ define @vsaddu_vi_nxv2i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1797,11 +1579,7 @@ define @vsaddu_vi_nxv2i64_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv2i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1823,9 +1601,7 @@ define @vsaddu_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1877,9 +1653,7 @@ define @vsaddu_vx_nxv4i64_unmasked( %va, i6 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1889,9 +1663,7 @@ define @vsaddu_vi_nxv4i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1901,11 +1673,7 @@ define @vsaddu_vi_nxv4i64_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv4i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1927,9 +1695,7 @@ define @vsaddu_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1981,9 +1747,7 @@ define @vsaddu_vx_nxv8i64_unmasked( %va, i6 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1993,9 +1757,7 @@ define @vsaddu_vi_nxv8i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2005,10 +1767,6 @@ define @vsaddu_vi_nxv8i64_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v8, v8, -1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.uadd.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.uadd.sat.nxv8i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index 17c362fc0a1ae..4457c1002acc7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -138,9 +138,7 @@ define @vfmerge_zv_nxv8f16( %va, poison, half zeroinitializer, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (half zeroinitializer), %va ret %vc } @@ -148,9 +146,7 @@ define @vmerge_truelhs_nxv8f16_0( %va, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %vc = select %mtrue, %va, %vb + %vc = select splat (i1 1), %va, %vb ret %vc } @@ -322,9 +318,7 @@ define @vfmerge_zv_nxv8f32( %va, poison, float zeroinitializer, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (float zeroinitializer), %va ret %vc } @@ -444,9 +438,7 @@ define @vfmerge_zv_nxv8f64( %va, poison, double zeroinitializer, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (double zeroinitializer), %va ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll index 19c7d599cb068..2715ec78bd794 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int.ll @@ -32,9 +32,7 @@ define @vmerge_iv_nxv1i8( %va, poison, i8 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i8 3), %va ret %vc } @@ -66,9 +64,7 @@ define @vmerge_iv_nxv2i8( %va, poison, i8 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i8 3), %va ret %vc } @@ -100,9 +96,7 @@ define @vmerge_iv_nxv3i8( %va, poison, i8 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i8 3), %va ret %vc } @@ -134,9 +128,7 @@ define @vmerge_iv_nxv4i8( %va, poison, i8 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i8 3), %va ret %vc } @@ -168,9 +160,7 @@ define @vmerge_iv_nxv8i8( %va, poison, i8 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i8 3), %va ret %vc } @@ -202,9 +192,7 @@ define @vmerge_iv_nxv16i8( %va, poison, i8 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i8 3), %va ret %vc } @@ -236,9 +224,7 @@ define @vmerge_iv_nxv32i8( %va, poison, i8 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i8 3), %va ret %vc } @@ -270,9 +256,7 @@ define @vmerge_iv_nxv64i8( %va, poison, i8 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i8 3), %va ret %vc } @@ -304,9 +288,7 @@ define @vmerge_iv_nxv1i16( %va, poison, i16 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i16 3), %va ret %vc } @@ -338,9 +320,7 @@ define @vmerge_iv_nxv2i16( %va, poison, i16 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i16 3), %va ret %vc } @@ -372,9 +352,7 @@ define @vmerge_iv_nxv4i16( %va, poison, i16 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i16 3), %va ret %vc } @@ -406,9 +384,7 @@ define @vmerge_iv_nxv8i16( %va, poison, i16 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i16 3), %va ret %vc } @@ -440,9 +416,7 @@ define @vmerge_iv_nxv16i16( %va, poison, i16 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i16 3), %va ret %vc } @@ -474,9 +448,7 @@ define @vmerge_iv_nxv32i16( %va, poison, i16 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i16 3), %va ret %vc } @@ -508,9 +480,7 @@ define @vmerge_iv_nxv1i32( %va, poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i32 3), %va ret %vc } @@ -542,9 +512,7 @@ define @vmerge_iv_nxv2i32( %va, poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i32 3), %va ret %vc } @@ -576,9 +544,7 @@ define @vmerge_iv_nxv4i32( %va, poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i32 3), %va ret %vc } @@ -610,9 +576,7 @@ define @vmerge_iv_nxv8i32( %va, poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i32 3), %va ret %vc } @@ -644,9 +608,7 @@ define @vmerge_iv_nxv16i32( %va, poison, i32 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i32 3), %va ret %vc } @@ -690,9 +652,7 @@ define @vmerge_iv_nxv1i64( %va, poison, i64 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i64 3), %va ret %vc } @@ -736,9 +696,7 @@ define @vmerge_iv_nxv2i64( %va, poison, i64 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i64 3), %va ret %vc } @@ -782,9 +740,7 @@ define @vmerge_iv_nxv4i64( %va, poison, i64 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i64 3), %va ret %vc } @@ -828,9 +784,7 @@ define @vmerge_iv_nxv8i64( %va, poison, i64 3, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va + %vc = select %cond, splat (i64 3), %va ret %vc } @@ -838,9 +792,7 @@ define @vmerge_truelhs_nxv8i64_0( %va, poison, i1 1, i32 0 - %mtrue = shufflevector %mhead, poison, zeroinitializer - %vc = select %mtrue, %va, %vb + %vc = select splat (i1 1), %va, %vb ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll index 93f43f4578ddb..5b101cf673541 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll @@ -20,9 +20,7 @@ define @vshl_vx_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i8 6) ret %vc } @@ -44,9 +42,7 @@ define @vshl_vx_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i8 6) ret %vc } @@ -68,9 +64,7 @@ define @vshl_vx_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i8 6) ret %vc } @@ -92,9 +86,7 @@ define @vshl_vx_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i8 6) ret %vc } @@ -116,9 +108,7 @@ define @vshl_vx_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i8 6) ret %vc } @@ -140,9 +130,7 @@ define @vshl_vx_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i8 6) ret %vc } @@ -164,9 +152,7 @@ define @vshl_vx_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i8 6) ret %vc } @@ -188,9 +174,7 @@ define @vshl_vx_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i16 6) ret %vc } @@ -212,9 +196,7 @@ define @vshl_vx_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i16 6) ret %vc } @@ -236,9 +218,7 @@ define @vshl_vx_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i16 6) ret %vc } @@ -260,9 +240,7 @@ define @vshl_vx_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i16 6) ret %vc } @@ -284,9 +262,7 @@ define @vshl_vx_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i16 6) ret %vc } @@ -308,9 +284,7 @@ define @vshl_vx_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i16 6) ret %vc } @@ -332,9 +306,7 @@ define @vshl_vx_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i32 31) ret %vc } @@ -356,9 +328,7 @@ define @vshl_vx_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i32 31) ret %vc } @@ -380,9 +350,7 @@ define @vshl_vx_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i32 31) ret %vc } @@ -404,9 +372,7 @@ define @vshl_vx_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i32 31) ret %vc } @@ -428,9 +394,7 @@ define @vshl_vx_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i32 31) ret %vc } @@ -452,9 +416,7 @@ define @vshl_vx_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 31) ret %vc } @@ -465,9 +427,7 @@ define @vshl_vx_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vsll.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 32) ret %vc } @@ -477,9 +437,7 @@ define @vshl_vx_nxv1i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 1) ret %vc } @@ -501,9 +459,7 @@ define @vshl_vx_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 31) ret %vc } @@ -514,9 +470,7 @@ define @vshl_vx_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vsll.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 32) ret %vc } @@ -526,9 +480,7 @@ define @vshl_vx_nxv2i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 1) ret %vc } @@ -550,9 +502,7 @@ define @vshl_vx_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 31) ret %vc } @@ -563,9 +513,7 @@ define @vshl_vx_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vsll.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 32) ret %vc } @@ -575,9 +523,7 @@ define @vshl_vx_nxv4i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 1) ret %vc } @@ -599,9 +545,7 @@ define @vshl_vx_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 31) ret %vc } @@ -612,9 +556,7 @@ define @vshl_vx_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vsll.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 32) ret %vc } @@ -624,9 +566,7 @@ define @vshl_vx_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = shl %va, %splat + %vc = shl %va, splat (i64 1) ret %vc } @@ -660,9 +600,7 @@ define @vshl_vi_mask_nxv8i32( %va, poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %splat, zeroinitializer + %vs = select %mask, splat (i32 31), zeroinitializer %vc = shl %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll index 9ab5218e897e7..c04d5ea2da3c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll @@ -40,9 +40,7 @@ define @vsll_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -66,9 +64,7 @@ define @vsll_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -78,9 +74,7 @@ define @vsll_vi_nxv1i8( %va, poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i8( %va, splat (i8 3), %m, i32 %evl) ret %v } @@ -90,11 +84,7 @@ define @vsll_vi_nxv1i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i8( %va, splat (i8 3), splat (i1 true), i32 %evl) ret %v } @@ -116,9 +106,7 @@ define @vsll_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -142,9 +130,7 @@ define @vsll_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -154,9 +140,7 @@ define @vsll_vi_nxv2i8( %va, poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i8( %va, splat (i8 3), %m, i32 %evl) ret %v } @@ -166,11 +150,7 @@ define @vsll_vi_nxv2i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i8( %va, splat (i8 3), splat (i1 true), i32 %evl) ret %v } @@ -192,9 +172,7 @@ define @vsll_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -218,9 +196,7 @@ define @vsll_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -230,9 +206,7 @@ define @vsll_vi_nxv4i8( %va, poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i8( %va, splat (i8 3), %m, i32 %evl) ret %v } @@ -242,11 +216,7 @@ define @vsll_vi_nxv4i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i8( %va, splat (i8 3), splat (i1 true), i32 %evl) ret %v } @@ -280,9 +250,7 @@ define @vsll_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -306,9 +274,7 @@ define @vsll_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -318,9 +284,7 @@ define @vsll_vi_nxv8i8( %va, poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i8( %va, splat (i8 3), %m, i32 %evl) ret %v } @@ -330,11 +294,7 @@ define @vsll_vi_nxv8i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i8( %va, splat (i8 3), splat (i1 true), i32 %evl) ret %v } @@ -356,9 +316,7 @@ define @vsll_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -382,9 +340,7 @@ define @vsll_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -394,9 +350,7 @@ define @vsll_vi_nxv16i8( %va, poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i8( %va, splat (i8 3), %m, i32 %evl) ret %v } @@ -406,11 +360,7 @@ define @vsll_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i8( %va, splat (i8 3), splat (i1 true), i32 %evl) ret %v } @@ -432,9 +382,7 @@ define @vsll_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -458,9 +406,7 @@ define @vsll_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -470,9 +416,7 @@ define @vsll_vi_nxv32i8( %va, poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv32i8( %va, splat (i8 3), %m, i32 %evl) ret %v } @@ -482,11 +426,7 @@ define @vsll_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv32i8( %va, splat (i8 3), splat (i1 true), i32 %evl) ret %v } @@ -508,9 +448,7 @@ define @vsll_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -534,9 +472,7 @@ define @vsll_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -546,9 +482,7 @@ define @vsll_vi_nxv64i8( %va, poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv64i8( %va, splat (i8 3), %m, i32 %evl) ret %v } @@ -558,11 +492,7 @@ define @vsll_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv64i8( %va, splat (i8 3), splat (i1 true), i32 %evl) ret %v } @@ -584,9 +514,7 @@ define @vsll_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -610,9 +538,7 @@ define @vsll_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -622,9 +548,7 @@ define @vsll_vi_nxv1i16( %va, poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i16( %va, splat (i16 3), %m, i32 %evl) ret %v } @@ -634,11 +558,7 @@ define @vsll_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i16( %va, splat (i16 3), splat (i1 true), i32 %evl) ret %v } @@ -660,9 +580,7 @@ define @vsll_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -686,9 +604,7 @@ define @vsll_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -698,9 +614,7 @@ define @vsll_vi_nxv2i16( %va, poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i16( %va, splat (i16 3), %m, i32 %evl) ret %v } @@ -710,11 +624,7 @@ define @vsll_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i16( %va, splat (i16 3), splat (i1 true), i32 %evl) ret %v } @@ -736,9 +646,7 @@ define @vsll_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -762,9 +670,7 @@ define @vsll_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -774,9 +680,7 @@ define @vsll_vi_nxv4i16( %va, poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i16( %va, splat (i16 3), %m, i32 %evl) ret %v } @@ -786,11 +690,7 @@ define @vsll_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i16( %va, splat (i16 3), splat (i1 true), i32 %evl) ret %v } @@ -812,9 +712,7 @@ define @vsll_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -838,9 +736,7 @@ define @vsll_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -850,9 +746,7 @@ define @vsll_vi_nxv8i16( %va, poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i16( %va, splat (i16 3), %m, i32 %evl) ret %v } @@ -862,11 +756,7 @@ define @vsll_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i16( %va, splat (i16 3), splat (i1 true), i32 %evl) ret %v } @@ -888,9 +778,7 @@ define @vsll_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -914,9 +802,7 @@ define @vsll_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -926,9 +812,7 @@ define @vsll_vi_nxv16i16( %va, poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i16( %va, splat (i16 3), %m, i32 %evl) ret %v } @@ -938,11 +822,7 @@ define @vsll_vi_nxv16i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i16( %va, splat (i16 3), splat (i1 true), i32 %evl) ret %v } @@ -964,9 +844,7 @@ define @vsll_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -990,9 +868,7 @@ define @vsll_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1002,9 +878,7 @@ define @vsll_vi_nxv32i16( %va, poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv32i16( %va, splat (i16 3), %m, i32 %evl) ret %v } @@ -1014,11 +888,7 @@ define @vsll_vi_nxv32i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv32i16( %va, splat (i16 3), splat (i1 true), i32 %evl) ret %v } @@ -1040,9 +910,7 @@ define @vsll_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1066,9 +934,7 @@ define @vsll_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1078,9 +944,7 @@ define @vsll_vi_nxv1i32( %va, poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i32( %va, splat (i32 3), %m, i32 %evl) ret %v } @@ -1090,11 +954,7 @@ define @vsll_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i32( %va, splat (i32 3), splat (i1 true), i32 %evl) ret %v } @@ -1116,9 +976,7 @@ define @vsll_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1142,9 +1000,7 @@ define @vsll_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1154,9 +1010,7 @@ define @vsll_vi_nxv2i32( %va, poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i32( %va, splat (i32 3), %m, i32 %evl) ret %v } @@ -1166,11 +1020,7 @@ define @vsll_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i32( %va, splat (i32 3), splat (i1 true), i32 %evl) ret %v } @@ -1192,9 +1042,7 @@ define @vsll_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1218,9 +1066,7 @@ define @vsll_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1230,9 +1076,7 @@ define @vsll_vi_nxv4i32( %va, poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i32( %va, splat (i32 3), %m, i32 %evl) ret %v } @@ -1242,11 +1086,7 @@ define @vsll_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i32( %va, splat (i32 3), splat (i1 true), i32 %evl) ret %v } @@ -1268,9 +1108,7 @@ define @vsll_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1294,9 +1132,7 @@ define @vsll_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1306,9 +1142,7 @@ define @vsll_vi_nxv8i32( %va, poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i32( %va, splat (i32 3), %m, i32 %evl) ret %v } @@ -1318,11 +1152,7 @@ define @vsll_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i32( %va, splat (i32 3), splat (i1 true), i32 %evl) ret %v } @@ -1344,9 +1174,7 @@ define @vsll_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1370,9 +1198,7 @@ define @vsll_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1382,9 +1208,7 @@ define @vsll_vi_nxv16i32( %va, poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i32( %va, splat (i32 3), %m, i32 %evl) ret %v } @@ -1394,11 +1218,7 @@ define @vsll_vi_nxv16i32_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv16i32( %va, splat (i32 3), splat (i1 true), i32 %evl) ret %v } @@ -1420,9 +1240,7 @@ define @vsll_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1458,9 +1276,7 @@ define @vsll_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1470,9 +1286,7 @@ define @vsll_vi_nxv1i64( %va, poison, i64 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i64( %va, splat (i64 3), %m, i32 %evl) ret %v } @@ -1482,11 +1296,7 @@ define @vsll_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv1i64( %va, splat (i64 3), splat (i1 true), i32 %evl) ret %v } @@ -1508,9 +1318,7 @@ define @vsll_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1546,9 +1354,7 @@ define @vsll_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1558,9 +1364,7 @@ define @vsll_vi_nxv2i64( %va, poison, i64 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i64( %va, splat (i64 3), %m, i32 %evl) ret %v } @@ -1570,11 +1374,7 @@ define @vsll_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv2i64( %va, splat (i64 3), splat (i1 true), i32 %evl) ret %v } @@ -1596,9 +1396,7 @@ define @vsll_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1634,9 +1432,7 @@ define @vsll_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1646,9 +1442,7 @@ define @vsll_vi_nxv4i64( %va, poison, i64 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i64( %va, splat (i64 3), %m, i32 %evl) ret %v } @@ -1658,11 +1452,7 @@ define @vsll_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv4i64( %va, splat (i64 3), splat (i1 true), i32 %evl) ret %v } @@ -1684,9 +1474,7 @@ define @vsll_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1722,9 +1510,7 @@ define @vsll_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1734,9 +1520,7 @@ define @vsll_vi_nxv8i64( %va, poison, i64 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i64( %va, splat (i64 3), %m, i32 %evl) ret %v } @@ -1746,10 +1530,6 @@ define @vsll_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsll.vi v8, v8, 3 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 3, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.shl.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.shl.nxv8i64( %va, splat (i64 3), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll index bb0f26046bdb3..707ef8a94432d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll @@ -25,9 +25,7 @@ define @vsplat_zero_nxv8f16() { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, half zeroinitializer, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (half zeroinitializer) } define @vsplat_nxv8f32(float %f) { @@ -47,9 +45,7 @@ define @vsplat_zero_nxv8f32() { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, float zeroinitializer, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (float zeroinitializer) } define @vsplat_nxv8f64(double %f) { @@ -69,9 +65,7 @@ define @vsplat_zero_nxv8f64() { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: ret - %head = insertelement poison, double zeroinitializer, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (double zeroinitializer) } ; Test that we fold this to a vlse with 0 stride. diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll index f462f242f0685..09a713a6826cb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll @@ -8,9 +8,7 @@ define @vsplat_nxv1i1_0() { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 0) } define @vsplat_nxv1i1_1() { @@ -19,9 +17,7 @@ define @vsplat_nxv1i1_1() { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 -1) } define @vsplat_nxv1i1_2(i1 %x) { @@ -58,9 +54,7 @@ define @vsplat_nxv2i1_0() { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 0) } define @vsplat_nxv2i1_1() { @@ -69,9 +63,7 @@ define @vsplat_nxv2i1_1() { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 -1) } define @vsplat_nxv2i1_2(i1 %x) { @@ -93,9 +85,7 @@ define @vsplat_nxv4i1_0() { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 0) } define @vsplat_nxv4i1_1() { @@ -104,9 +94,7 @@ define @vsplat_nxv4i1_1() { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 -1) } define @vsplat_nxv4i1_2(i1 %x) { @@ -128,9 +116,7 @@ define @vsplat_nxv8i1_0() { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 0) } define @vsplat_nxv8i1_1() { @@ -139,9 +125,7 @@ define @vsplat_nxv8i1_1() { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 -1) } define @vsplat_nxv8i1_2(i1 %x) { @@ -163,9 +147,7 @@ define @vsplat_nxv16i1_0() { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 0) } define @vsplat_nxv16i1_1() { @@ -174,9 +156,7 @@ define @vsplat_nxv16i1_1() { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmset.m v0 ; CHECK-NEXT: ret - %head = insertelement poison, i1 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i1 -1) } define @vsplat_nxv16i1_2(i1 %x) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll index d428fceb69116..721d6ef26d61e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll @@ -10,9 +10,7 @@ define @vsplat_nxv8i64_1() { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i64 -1) } define @vsplat_nxv8i64_2() { @@ -21,9 +19,7 @@ define @vsplat_nxv8i64_2() { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: ret - %head = insertelement poison, i64 4, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i64 4) } define @vsplat_nxv8i64_3() { @@ -33,9 +29,7 @@ define @vsplat_nxv8i64_3() { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 255, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i64 255) } define @vsplat_nxv8i64_4() { @@ -61,9 +55,7 @@ define @vsplat_nxv8i64_4() { ; RV64V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64V-NEXT: vmv.v.x v8, a0 ; RV64V-NEXT: ret - %head = insertelement poison, i64 4211079935, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (i64 4211079935) } define @vsplat_nxv8i64_5(i64 %a) { @@ -95,9 +87,7 @@ define @vadd_vx_nxv8i64_6( %v) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, 2 ; CHECK-NEXT: ret - %head = insertelement poison, i64 2, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vret = add %v, %splat + %vret = add %v, splat (i64 2) ret %vret } @@ -107,9 +97,7 @@ define @vadd_vx_nxv8i64_7( %v) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vret = add %v, %splat + %vret = add %v, splat (i64 -1) ret %vret } @@ -120,9 +108,7 @@ define @vadd_vx_nxv8i64_8( %v) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vadd.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 255, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vret = add %v, %splat + %vret = add %v, splat (i64 255) ret %vret } @@ -142,9 +128,7 @@ define @vadd_vx_nxv8i64_9( %v) { ; RV64V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64V-NEXT: vadd.vx v8, v8, a0 ; RV64V-NEXT: ret - %head = insertelement poison, i64 2063596287, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vret = add %v, %splat + %vret = add %v, splat (i64 2063596287) ret %vret } @@ -172,9 +156,7 @@ define @vadd_vx_nxv8i64_10( %v) { ; RV64V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64V-NEXT: vadd.vx v8, v8, a0 ; RV64V-NEXT: ret - %head = insertelement poison, i64 4211079935, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vret = add %v, %splat + %vret = add %v, splat (i64 4211079935) ret %vret } @@ -203,9 +185,7 @@ define @vadd_vx_nxv8i64_11( %v) { ; RV64V-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64V-NEXT: vadd.vx v8, v8, a0 ; RV64V-NEXT: ret - %head = insertelement poison, i64 8506047231, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vret = add %v, %splat + %vret = add %v, splat (i64 8506047231) ret %vret } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-zfa.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-zfa.ll index 59be018efb857..1047860ec8db6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsplats-zfa.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-zfa.ll @@ -11,9 +11,7 @@ define @vsplat_f16_0p625() { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: ret - %head = insertelement poison, half 0.625, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (half 0.625) } define @vsplat_f32_0p75() { @@ -23,9 +21,7 @@ define @vsplat_f32_0p75() { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: ret - %head = insertelement poison, float 0.75, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (float 0.75) } define @vsplat_f64_neg1() { @@ -35,7 +31,5 @@ define @vsplat_f64_neg1() { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: ret - %head = insertelement poison, double -1.0, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - ret %splat + ret splat (double -1.0) } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll index 738e9cf805b46..7bae84142d8ae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll @@ -45,9 +45,7 @@ define @vsra_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i8 6) ret %vc } @@ -94,9 +92,7 @@ define @vsra_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i8 6) ret %vc } @@ -143,9 +139,7 @@ define @vsra_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i8 6) ret %vc } @@ -192,9 +186,7 @@ define @vsra_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i8 6) ret %vc } @@ -241,9 +233,7 @@ define @vsra_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i8 6) ret %vc } @@ -275,9 +265,7 @@ define @vsra_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i8 6) ret %vc } @@ -309,9 +297,7 @@ define @vsra_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i8 6) ret %vc } @@ -358,9 +344,7 @@ define @vsra_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i16 6) ret %vc } @@ -407,9 +391,7 @@ define @vsra_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i16 6) ret %vc } @@ -456,9 +438,7 @@ define @vsra_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i16 6) ret %vc } @@ -505,9 +485,7 @@ define @vsra_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i16 6) ret %vc } @@ -554,9 +532,7 @@ define @vsra_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i16 6) ret %vc } @@ -588,9 +564,7 @@ define @vsra_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i16 6) ret %vc } @@ -622,9 +596,7 @@ define @vsra_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i32 31) ret %vc } @@ -656,9 +628,7 @@ define @vsra_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i32 31) ret %vc } @@ -690,9 +660,7 @@ define @vsra_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i32 31) ret %vc } @@ -724,9 +692,7 @@ define @vsra_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i32 31) ret %vc } @@ -758,9 +724,7 @@ define @vsra_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i32 31) ret %vc } @@ -792,9 +756,7 @@ define @vsra_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i64 31) ret %vc } @@ -805,9 +767,7 @@ define @vsra_vi_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vsra.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i64 32) ret %vc } @@ -839,9 +799,7 @@ define @vsra_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i64 31) ret %vc } @@ -852,9 +810,7 @@ define @vsra_vi_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vsra.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i64 32) ret %vc } @@ -886,9 +842,7 @@ define @vsra_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i64 31) ret %vc } @@ -899,9 +853,7 @@ define @vsra_vi_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vsra.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i64 32) ret %vc } @@ -933,9 +885,7 @@ define @vsra_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i64 31) ret %vc } @@ -946,9 +896,7 @@ define @vsra_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vsra.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = ashr %va, %splat + %vc = ashr %va, splat (i64 32) ret %vc } @@ -982,9 +930,7 @@ define @vsra_vi_mask_nxv8i32( %va, poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %splat, zeroinitializer + %vs = select %mask, splat (i32 31), zeroinitializer %vc = ashr %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll index 35100d781f059..632c4db5c5bb5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll @@ -42,9 +42,7 @@ define @vsra_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -68,9 +66,7 @@ define @vsra_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -80,9 +76,7 @@ define @vsra_vi_nxv1i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -92,11 +86,7 @@ define @vsra_vi_nxv1i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -118,9 +108,7 @@ define @vsra_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -144,9 +132,7 @@ define @vsra_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -156,9 +142,7 @@ define @vsra_vi_nxv2i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -168,11 +152,7 @@ define @vsra_vi_nxv2i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -194,9 +174,7 @@ define @vsra_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -220,9 +198,7 @@ define @vsra_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -232,9 +208,7 @@ define @vsra_vi_nxv4i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -244,11 +218,7 @@ define @vsra_vi_nxv4i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -270,9 +240,7 @@ define @vsra_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -296,9 +264,7 @@ define @vsra_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -308,9 +274,7 @@ define @vsra_vi_nxv8i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -320,11 +284,7 @@ define @vsra_vi_nxv8i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -346,9 +306,7 @@ define @vsra_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -372,9 +330,7 @@ define @vsra_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -384,9 +340,7 @@ define @vsra_vi_nxv16i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -396,11 +350,7 @@ define @vsra_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -422,9 +372,7 @@ define @vsra_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -448,9 +396,7 @@ define @vsra_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -460,9 +406,7 @@ define @vsra_vi_nxv32i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv32i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -472,11 +416,7 @@ define @vsra_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv32i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -498,9 +438,7 @@ define @vsra_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -524,9 +462,7 @@ define @vsra_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -536,9 +472,7 @@ define @vsra_vi_nxv64i8( %va, poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv64i8( %va, splat (i8 5), %m, i32 %evl) ret %v } @@ -548,11 +482,7 @@ define @vsra_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv64i8( %va, splat (i8 5), splat (i1 true), i32 %evl) ret %v } @@ -574,9 +504,7 @@ define @vsra_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -600,9 +528,7 @@ define @vsra_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -612,9 +538,7 @@ define @vsra_vi_nxv1i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -624,11 +548,7 @@ define @vsra_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -650,9 +570,7 @@ define @vsra_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -676,9 +594,7 @@ define @vsra_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -688,9 +604,7 @@ define @vsra_vi_nxv2i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -700,11 +614,7 @@ define @vsra_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -726,9 +636,7 @@ define @vsra_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -752,9 +660,7 @@ define @vsra_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -764,9 +670,7 @@ define @vsra_vi_nxv4i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -776,11 +680,7 @@ define @vsra_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -802,9 +702,7 @@ define @vsra_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -828,9 +726,7 @@ define @vsra_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -840,9 +736,7 @@ define @vsra_vi_nxv8i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -852,11 +746,7 @@ define @vsra_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -878,9 +768,7 @@ define @vsra_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -904,9 +792,7 @@ define @vsra_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -916,9 +802,7 @@ define @vsra_vi_nxv16i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -928,11 +812,7 @@ define @vsra_vi_nxv16i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -954,9 +834,7 @@ define @vsra_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -980,9 +858,7 @@ define @vsra_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -992,9 +868,7 @@ define @vsra_vi_nxv32i16( %va, poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv32i16( %va, splat (i16 5), %m, i32 %evl) ret %v } @@ -1004,11 +878,7 @@ define @vsra_vi_nxv32i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv32i16( %va, splat (i16 5), splat (i1 true), i32 %evl) ret %v } @@ -1030,9 +900,7 @@ define @vsra_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1056,9 +924,7 @@ define @vsra_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1068,9 +934,7 @@ define @vsra_vi_nxv1i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1080,11 +944,7 @@ define @vsra_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1106,9 +966,7 @@ define @vsra_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1132,9 +990,7 @@ define @vsra_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1144,9 +1000,7 @@ define @vsra_vi_nxv2i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1156,11 +1010,7 @@ define @vsra_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1182,9 +1032,7 @@ define @vsra_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1208,9 +1056,7 @@ define @vsra_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1220,9 +1066,7 @@ define @vsra_vi_nxv4i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1232,11 +1076,7 @@ define @vsra_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1258,9 +1098,7 @@ define @vsra_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1284,9 +1122,7 @@ define @vsra_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1296,9 +1132,7 @@ define @vsra_vi_nxv8i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1308,11 +1142,7 @@ define @vsra_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1334,9 +1164,7 @@ define @vsra_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsra.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1360,9 +1188,7 @@ define @vsra_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1372,9 +1198,7 @@ define @vsra_vi_nxv16i32( %va, poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i32( %va, splat (i32 5), %m, i32 %evl) ret %v } @@ -1384,11 +1208,7 @@ define @vsra_vi_nxv16i32_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv16i32( %va, splat (i32 5), splat (i1 true), i32 %evl) ret %v } @@ -1410,9 +1230,7 @@ define @vsra_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1448,9 +1266,7 @@ define @vsra_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1460,9 +1276,7 @@ define @vsra_vi_nxv1i64( %va, poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1472,11 +1286,7 @@ define @vsra_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv1i64( %va, splat (i64 5), splat (i1 true), i32 %evl) ret %v } @@ -1498,9 +1308,7 @@ define @vsra_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1536,9 +1344,7 @@ define @vsra_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1548,9 +1354,7 @@ define @vsra_vi_nxv2i64( %va, poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1560,11 +1364,7 @@ define @vsra_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv2i64( %va, splat (i64 5), splat (i1 true), i32 %evl) ret %v } @@ -1586,9 +1386,7 @@ define @vsra_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1624,9 +1422,7 @@ define @vsra_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1636,9 +1432,7 @@ define @vsra_vi_nxv4i64( %va, poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1648,11 +1442,7 @@ define @vsra_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv4i64( %va, splat (i64 5), splat (i1 true), i32 %evl) ret %v } @@ -1686,9 +1476,7 @@ define @vsra_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1724,9 +1512,7 @@ define @vsra_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1736,9 +1522,7 @@ define @vsra_vi_nxv8i64( %va, poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i64( %va, splat (i64 5), %m, i32 %evl) ret %v } @@ -1748,10 +1532,6 @@ define @vsra_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsra.vi v8, v8, 5 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 5, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ashr.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ashr.nxv8i64( %va, splat (i64 5), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll index be70b20181b14..6f4538926bef2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll @@ -20,9 +20,7 @@ define @vsrl_vx_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i8 6) ret %vc } @@ -44,9 +42,7 @@ define @vsrl_vx_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i8 6) ret %vc } @@ -68,9 +64,7 @@ define @vsrl_vx_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i8 6) ret %vc } @@ -92,9 +86,7 @@ define @vsrl_vx_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i8 6) ret %vc } @@ -116,9 +108,7 @@ define @vsrl_vx_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i8 6) ret %vc } @@ -140,9 +130,7 @@ define @vsrl_vx_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i8 6) ret %vc } @@ -164,9 +152,7 @@ define @vsrl_vx_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i8 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i8 6) ret %vc } @@ -188,9 +174,7 @@ define @vsrl_vx_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i16 6) ret %vc } @@ -212,9 +196,7 @@ define @vsrl_vx_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i16 6) ret %vc } @@ -236,9 +218,7 @@ define @vsrl_vx_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i16 6) ret %vc } @@ -260,9 +240,7 @@ define @vsrl_vx_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i16 6) ret %vc } @@ -284,9 +262,7 @@ define @vsrl_vx_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i16 6) ret %vc } @@ -308,9 +284,7 @@ define @vsrl_vx_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 6 ; CHECK-NEXT: ret - %head = insertelement poison, i16 6, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i16 6) ret %vc } @@ -332,9 +306,7 @@ define @vsrl_vx_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i32 31) ret %vc } @@ -356,9 +328,7 @@ define @vsrl_vx_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i32 31) ret %vc } @@ -380,9 +350,7 @@ define @vsrl_vx_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i32 31) ret %vc } @@ -404,9 +372,7 @@ define @vsrl_vx_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i32 31) ret %vc } @@ -428,9 +394,7 @@ define @vsrl_vx_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i32 31) ret %vc } @@ -452,9 +416,7 @@ define @vsrl_vx_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i64 31) ret %vc } @@ -465,9 +427,7 @@ define @vsrl_vx_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i64 32) ret %vc } @@ -489,9 +449,7 @@ define @vsrl_vx_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i64 31) ret %vc } @@ -502,9 +460,7 @@ define @vsrl_vx_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i64 32) ret %vc } @@ -526,9 +482,7 @@ define @vsrl_vx_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i64 31) ret %vc } @@ -539,9 +493,7 @@ define @vsrl_vx_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i64 32) ret %vc } @@ -563,9 +515,7 @@ define @vsrl_vx_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: ret - %head = insertelement poison, i64 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i64 31) ret %vc } @@ -576,9 +526,7 @@ define @vsrl_vx_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 32, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = lshr %va, %splat + %vc = lshr %va, splat (i64 32) ret %vc } @@ -612,9 +560,7 @@ define @vsrl_vi_mask_nxv8i32( %va, poison, i32 31, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %splat, zeroinitializer + %vs = select %mask, splat (i32 31), zeroinitializer %vc = lshr %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll index 01d1f14da2527..ec5b7f3faf7ca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll @@ -41,9 +41,7 @@ define @vsrl_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -67,9 +65,7 @@ define @vsrl_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -79,9 +75,7 @@ define @vsrl_vi_nxv1i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -91,11 +85,7 @@ define @vsrl_vi_nxv1i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -117,9 +107,7 @@ define @vsrl_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -143,9 +131,7 @@ define @vsrl_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -155,9 +141,7 @@ define @vsrl_vi_nxv2i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -167,11 +151,7 @@ define @vsrl_vi_nxv2i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -193,9 +173,7 @@ define @vsrl_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -219,9 +197,7 @@ define @vsrl_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -231,9 +207,7 @@ define @vsrl_vi_nxv4i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -243,11 +217,7 @@ define @vsrl_vi_nxv4i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -269,9 +239,7 @@ define @vsrl_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -295,9 +263,7 @@ define @vsrl_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -307,9 +273,7 @@ define @vsrl_vi_nxv8i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -319,11 +283,7 @@ define @vsrl_vi_nxv8i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -345,9 +305,7 @@ define @vsrl_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -371,9 +329,7 @@ define @vsrl_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -383,9 +339,7 @@ define @vsrl_vi_nxv16i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -395,11 +349,7 @@ define @vsrl_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -421,9 +371,7 @@ define @vsrl_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -447,9 +395,7 @@ define @vsrl_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -459,9 +405,7 @@ define @vsrl_vi_nxv32i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv32i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -471,11 +415,7 @@ define @vsrl_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv32i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -497,9 +437,7 @@ define @vsrl_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -523,9 +461,7 @@ define @vsrl_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -535,9 +471,7 @@ define @vsrl_vi_nxv64i8( %va, poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv64i8( %va, splat (i8 4), %m, i32 %evl) ret %v } @@ -547,11 +481,7 @@ define @vsrl_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv64i8( %va, splat (i8 4), splat (i1 true), i32 %evl) ret %v } @@ -573,9 +503,7 @@ define @vsrl_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -599,9 +527,7 @@ define @vsrl_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -611,9 +537,7 @@ define @vsrl_vi_nxv1i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -623,11 +547,7 @@ define @vsrl_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -649,9 +569,7 @@ define @vsrl_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -675,9 +593,7 @@ define @vsrl_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -687,9 +603,7 @@ define @vsrl_vi_nxv2i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -699,11 +613,7 @@ define @vsrl_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -725,9 +635,7 @@ define @vsrl_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -751,9 +659,7 @@ define @vsrl_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -763,9 +669,7 @@ define @vsrl_vi_nxv4i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -775,11 +679,7 @@ define @vsrl_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -801,9 +701,7 @@ define @vsrl_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -827,9 +725,7 @@ define @vsrl_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -839,9 +735,7 @@ define @vsrl_vi_nxv8i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -851,11 +745,7 @@ define @vsrl_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -877,9 +767,7 @@ define @vsrl_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -903,9 +791,7 @@ define @vsrl_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -915,9 +801,7 @@ define @vsrl_vi_nxv16i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -927,11 +811,7 @@ define @vsrl_vi_nxv16i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -953,9 +833,7 @@ define @vsrl_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -979,9 +857,7 @@ define @vsrl_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -991,9 +867,7 @@ define @vsrl_vi_nxv32i16( %va, poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv32i16( %va, splat (i16 4), %m, i32 %evl) ret %v } @@ -1003,11 +877,7 @@ define @vsrl_vi_nxv32i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv32i16( %va, splat (i16 4), splat (i1 true), i32 %evl) ret %v } @@ -1029,9 +899,7 @@ define @vsrl_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1055,9 +923,7 @@ define @vsrl_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1067,9 +933,7 @@ define @vsrl_vi_nxv1i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1079,11 +943,7 @@ define @vsrl_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1105,9 +965,7 @@ define @vsrl_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1131,9 +989,7 @@ define @vsrl_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1143,9 +999,7 @@ define @vsrl_vi_nxv2i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1155,11 +1009,7 @@ define @vsrl_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1181,9 +1031,7 @@ define @vsrl_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1207,9 +1055,7 @@ define @vsrl_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1219,9 +1065,7 @@ define @vsrl_vi_nxv4i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1231,11 +1075,7 @@ define @vsrl_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1257,9 +1097,7 @@ define @vsrl_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1283,9 +1121,7 @@ define @vsrl_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1295,9 +1131,7 @@ define @vsrl_vi_nxv8i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1307,11 +1141,7 @@ define @vsrl_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1333,9 +1163,7 @@ define @vsrl_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1359,9 +1187,7 @@ define @vsrl_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1371,9 +1197,7 @@ define @vsrl_vi_nxv16i32( %va, poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i32( %va, splat (i32 4), %m, i32 %evl) ret %v } @@ -1383,11 +1207,7 @@ define @vsrl_vi_nxv16i32_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv16i32( %va, splat (i32 4), splat (i1 true), i32 %evl) ret %v } @@ -1409,9 +1229,7 @@ define @vsrl_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1447,9 +1265,7 @@ define @vsrl_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1459,9 +1275,7 @@ define @vsrl_vi_nxv1i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -1471,11 +1285,7 @@ define @vsrl_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv1i64( %va, splat (i64 4), splat (i1 true), i32 %evl) ret %v } @@ -1497,9 +1307,7 @@ define @vsrl_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1535,9 +1343,7 @@ define @vsrl_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1547,9 +1353,7 @@ define @vsrl_vi_nxv2i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -1559,11 +1363,7 @@ define @vsrl_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv2i64( %va, splat (i64 4), splat (i1 true), i32 %evl) ret %v } @@ -1585,9 +1385,7 @@ define @vsrl_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1623,9 +1421,7 @@ define @vsrl_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1635,9 +1431,7 @@ define @vsrl_vi_nxv4i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -1647,11 +1441,7 @@ define @vsrl_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv4i64( %va, splat (i64 4), splat (i1 true), i32 %evl) ret %v } @@ -1685,9 +1475,7 @@ define @vsrl_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1723,9 +1511,7 @@ define @vsrl_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1735,9 +1521,7 @@ define @vsrl_vi_nxv8i64( %va, poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i64( %va, splat (i64 4), %m, i32 %evl) ret %v } @@ -1747,10 +1531,6 @@ define @vsrl_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsrl.vi v8, v8, 4 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 4, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.lshr.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.lshr.nxv8i64( %va, splat (i64 4), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll index d3289ba6e068a..c043858c02947 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll @@ -35,9 +35,7 @@ define @ssub_nxv1i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv1i8( %va, %vb) + %v = call @llvm.ssub.sat.nxv1i8( %va, splat (i8 1)) ret %v } @@ -72,9 +70,7 @@ define @ssub_nxv2i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv2i8( %va, %vb) + %v = call @llvm.ssub.sat.nxv2i8( %va, splat (i8 1)) ret %v } @@ -109,9 +105,7 @@ define @ssub_nxv4i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv4i8( %va, %vb) + %v = call @llvm.ssub.sat.nxv4i8( %va, splat (i8 1)) ret %v } @@ -146,9 +140,7 @@ define @ssub_nxv8i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv8i8( %va, %vb) + %v = call @llvm.ssub.sat.nxv8i8( %va, splat (i8 1)) ret %v } @@ -183,9 +175,7 @@ define @ssub_nxv16i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv16i8( %va, %vb) + %v = call @llvm.ssub.sat.nxv16i8( %va, splat (i8 1)) ret %v } @@ -220,9 +210,7 @@ define @ssub_nxv32i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv32i8( %va, %vb) + %v = call @llvm.ssub.sat.nxv32i8( %va, splat (i8 1)) ret %v } @@ -257,9 +245,7 @@ define @ssub_nxv64i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv64i8( %va, %vb) + %v = call @llvm.ssub.sat.nxv64i8( %va, splat (i8 1)) ret %v } @@ -294,9 +280,7 @@ define @ssub_nxv1i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv1i16( %va, %vb) + %v = call @llvm.ssub.sat.nxv1i16( %va, splat (i16 1)) ret %v } @@ -331,9 +315,7 @@ define @ssub_nxv2i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv2i16( %va, %vb) + %v = call @llvm.ssub.sat.nxv2i16( %va, splat (i16 1)) ret %v } @@ -368,9 +350,7 @@ define @ssub_nxv4i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv4i16( %va, %vb) + %v = call @llvm.ssub.sat.nxv4i16( %va, splat (i16 1)) ret %v } @@ -405,9 +385,7 @@ define @ssub_nxv8i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv8i16( %va, %vb) + %v = call @llvm.ssub.sat.nxv8i16( %va, splat (i16 1)) ret %v } @@ -442,9 +420,7 @@ define @ssub_nxv16i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv16i16( %va, %vb) + %v = call @llvm.ssub.sat.nxv16i16( %va, splat (i16 1)) ret %v } @@ -479,9 +455,7 @@ define @ssub_nxv32i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv32i16( %va, %vb) + %v = call @llvm.ssub.sat.nxv32i16( %va, splat (i16 1)) ret %v } @@ -516,9 +490,7 @@ define @ssub_nxv1i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv1i32( %va, %vb) + %v = call @llvm.ssub.sat.nxv1i32( %va, splat (i32 1)) ret %v } @@ -553,9 +525,7 @@ define @ssub_nxv2i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv2i32( %va, %vb) + %v = call @llvm.ssub.sat.nxv2i32( %va, splat (i32 1)) ret %v } @@ -590,9 +560,7 @@ define @ssub_nxv4i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv4i32( %va, %vb) + %v = call @llvm.ssub.sat.nxv4i32( %va, splat (i32 1)) ret %v } @@ -627,9 +595,7 @@ define @ssub_nxv8i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv8i32( %va, %vb) + %v = call @llvm.ssub.sat.nxv8i32( %va, splat (i32 1)) ret %v } @@ -664,9 +630,7 @@ define @ssub_nxv16i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv16i32( %va, %vb) + %v = call @llvm.ssub.sat.nxv16i32( %va, splat (i32 1)) ret %v } @@ -714,9 +678,7 @@ define @ssub_nxv1i64_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv1i64( %va, %vb) + %v = call @llvm.ssub.sat.nxv1i64( %va, splat (i64 1)) ret %v } @@ -764,9 +726,7 @@ define @ssub_nxv2i64_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv2i64( %va, %vb) + %v = call @llvm.ssub.sat.nxv2i64( %va, splat (i64 1)) ret %v } @@ -814,9 +774,7 @@ define @ssub_nxv4i64_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv4i64( %va, %vb) + %v = call @llvm.ssub.sat.nxv4i64( %va, splat (i64 1)) ret %v } @@ -864,8 +822,6 @@ define @ssub_nxv8i64_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.ssub.sat.nxv8i64( %va, %vb) + %v = call @llvm.ssub.sat.nxv8i64( %va, splat (i64 1)) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index 2d51a2ee44f65..b56a0f40176cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -43,9 +43,7 @@ define @vssub_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -83,9 +81,7 @@ define @vssub_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -96,9 +92,7 @@ define @vssub_vi_nxv1i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -109,11 +103,7 @@ define @vssub_vi_nxv1i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -135,9 +125,7 @@ define @vssub_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -161,9 +149,7 @@ define @vssub_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -174,9 +160,7 @@ define @vssub_vi_nxv2i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -187,11 +171,7 @@ define @vssub_vi_nxv2i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -213,9 +193,7 @@ define @vssub_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -239,9 +217,7 @@ define @vssub_vx_nxv3i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -252,9 +228,7 @@ define @vssub_vi_nxv3i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv3i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -265,11 +239,7 @@ define @vssub_vi_nxv3i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv3i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -291,9 +261,7 @@ define @vssub_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -317,9 +285,7 @@ define @vssub_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -330,9 +296,7 @@ define @vssub_vi_nxv4i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -343,11 +307,7 @@ define @vssub_vi_nxv4i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -369,9 +329,7 @@ define @vssub_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -395,9 +353,7 @@ define @vssub_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -408,9 +364,7 @@ define @vssub_vi_nxv8i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -421,11 +375,7 @@ define @vssub_vi_nxv8i8_unmasked( %va, i32 ze ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -447,9 +397,7 @@ define @vssub_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -473,9 +421,7 @@ define @vssub_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -486,9 +432,7 @@ define @vssub_vi_nxv16i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -499,11 +443,7 @@ define @vssub_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -525,9 +465,7 @@ define @vssub_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -551,9 +489,7 @@ define @vssub_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -564,9 +500,7 @@ define @vssub_vi_nxv32i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -577,11 +511,7 @@ define @vssub_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -603,9 +533,7 @@ define @vssub_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -629,9 +557,7 @@ define @vssub_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -642,9 +568,7 @@ define @vssub_vi_nxv64i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv64i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -655,11 +579,7 @@ define @vssub_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv64i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -690,9 +610,7 @@ define @vssub_vi_nxv128i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -715,11 +633,7 @@ define @vssub_vi_nxv128i8_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv128i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -741,9 +655,7 @@ define @vssub_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -767,9 +679,7 @@ define @vssub_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -780,9 +690,7 @@ define @vssub_vi_nxv1i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -793,11 +701,7 @@ define @vssub_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -819,9 +723,7 @@ define @vssub_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -845,9 +747,7 @@ define @vssub_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -858,9 +758,7 @@ define @vssub_vi_nxv2i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -871,11 +769,7 @@ define @vssub_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -897,9 +791,7 @@ define @vssub_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -923,9 +815,7 @@ define @vssub_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -936,9 +826,7 @@ define @vssub_vi_nxv4i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -949,11 +837,7 @@ define @vssub_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -975,9 +859,7 @@ define @vssub_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1001,9 +883,7 @@ define @vssub_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1014,9 +894,7 @@ define @vssub_vi_nxv8i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1027,11 +905,7 @@ define @vssub_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1053,9 +927,7 @@ define @vssub_vv_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1079,9 +951,7 @@ define @vssub_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1092,9 +962,7 @@ define @vssub_vi_nxv16i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1105,11 +973,7 @@ define @vssub_vi_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1131,9 +995,7 @@ define @vssub_vv_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1157,9 +1019,7 @@ define @vssub_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1170,9 +1030,7 @@ define @vssub_vi_nxv32i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1183,11 +1041,7 @@ define @vssub_vi_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1209,9 +1063,7 @@ define @vssub_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1235,9 +1087,7 @@ define @vssub_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1248,9 +1098,7 @@ define @vssub_vi_nxv1i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1261,11 +1109,7 @@ define @vssub_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1287,9 +1131,7 @@ define @vssub_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1313,9 +1155,7 @@ define @vssub_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1326,9 +1166,7 @@ define @vssub_vi_nxv2i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1339,11 +1177,7 @@ define @vssub_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1365,9 +1199,7 @@ define @vssub_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1391,9 +1223,7 @@ define @vssub_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1404,9 +1234,7 @@ define @vssub_vi_nxv4i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1417,11 +1245,7 @@ define @vssub_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1443,9 +1267,7 @@ define @vssub_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1469,9 +1291,7 @@ define @vssub_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1482,9 +1302,7 @@ define @vssub_vi_nxv8i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1495,11 +1313,7 @@ define @vssub_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1521,9 +1335,7 @@ define @vssub_vv_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vssub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1547,9 +1359,7 @@ define @vssub_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1560,9 +1370,7 @@ define @vssub_vi_nxv16i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1573,11 +1381,7 @@ define @vssub_vi_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv16i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1609,9 +1413,7 @@ define @vssub_vi_nxv32i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1634,11 +1436,7 @@ define @vssub_vi_nxv32i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv32i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1660,9 +1458,7 @@ define @vssub_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1714,9 +1510,7 @@ define @vssub_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1727,9 +1521,7 @@ define @vssub_vi_nxv1i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1740,11 +1532,7 @@ define @vssub_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv1i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1766,9 +1554,7 @@ define @vssub_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1820,9 +1606,7 @@ define @vssub_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1833,9 +1617,7 @@ define @vssub_vi_nxv2i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1846,11 +1628,7 @@ define @vssub_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv2i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1872,9 +1650,7 @@ define @vssub_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1926,9 +1702,7 @@ define @vssub_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1939,9 +1713,7 @@ define @vssub_vi_nxv4i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1952,11 +1724,7 @@ define @vssub_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv4i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1978,9 +1746,7 @@ define @vssub_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -2032,9 +1798,7 @@ define @vssub_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -2045,9 +1809,7 @@ define @vssub_vi_nxv8i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2058,10 +1820,6 @@ define @vssub_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vssub.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.ssub.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.ssub.sat.nxv8i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll index a5b481917caef..5349548a213bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll @@ -35,9 +35,7 @@ define @usub_nxv1i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv1i8( %va, %vb) + %v = call @llvm.usub.sat.nxv1i8( %va, splat (i8 2)) ret %v } @@ -72,9 +70,7 @@ define @usub_nxv2i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv2i8( %va, %vb) + %v = call @llvm.usub.sat.nxv2i8( %va, splat (i8 2)) ret %v } @@ -109,9 +105,7 @@ define @usub_nxv4i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv4i8( %va, %vb) + %v = call @llvm.usub.sat.nxv4i8( %va, splat (i8 2)) ret %v } @@ -146,9 +140,7 @@ define @usub_nxv8i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv8i8( %va, %vb) + %v = call @llvm.usub.sat.nxv8i8( %va, splat (i8 2)) ret %v } @@ -183,9 +175,7 @@ define @usub_nxv16i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv16i8( %va, %vb) + %v = call @llvm.usub.sat.nxv16i8( %va, splat (i8 2)) ret %v } @@ -220,9 +210,7 @@ define @usub_nxv32i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv32i8( %va, %vb) + %v = call @llvm.usub.sat.nxv32i8( %va, splat (i8 2)) ret %v } @@ -257,9 +245,7 @@ define @usub_nxv64i8_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv64i8( %va, %vb) + %v = call @llvm.usub.sat.nxv64i8( %va, splat (i8 2)) ret %v } @@ -294,9 +280,7 @@ define @usub_nxv1i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv1i16( %va, %vb) + %v = call @llvm.usub.sat.nxv1i16( %va, splat (i16 2)) ret %v } @@ -331,9 +315,7 @@ define @usub_nxv2i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv2i16( %va, %vb) + %v = call @llvm.usub.sat.nxv2i16( %va, splat (i16 2)) ret %v } @@ -368,9 +350,7 @@ define @usub_nxv4i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv4i16( %va, %vb) + %v = call @llvm.usub.sat.nxv4i16( %va, splat (i16 2)) ret %v } @@ -405,9 +385,7 @@ define @usub_nxv8i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv8i16( %va, %vb) + %v = call @llvm.usub.sat.nxv8i16( %va, splat (i16 2)) ret %v } @@ -442,9 +420,7 @@ define @usub_nxv16i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv16i16( %va, %vb) + %v = call @llvm.usub.sat.nxv16i16( %va, splat (i16 2)) ret %v } @@ -479,9 +455,7 @@ define @usub_nxv32i16_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv32i16( %va, %vb) + %v = call @llvm.usub.sat.nxv32i16( %va, splat (i16 2)) ret %v } @@ -516,9 +490,7 @@ define @usub_nxv1i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv1i32( %va, %vb) + %v = call @llvm.usub.sat.nxv1i32( %va, splat (i32 2)) ret %v } @@ -553,9 +525,7 @@ define @usub_nxv2i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv2i32( %va, %vb) + %v = call @llvm.usub.sat.nxv2i32( %va, splat (i32 2)) ret %v } @@ -590,9 +560,7 @@ define @usub_nxv4i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv4i32( %va, %vb) + %v = call @llvm.usub.sat.nxv4i32( %va, splat (i32 2)) ret %v } @@ -627,9 +595,7 @@ define @usub_nxv8i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv8i32( %va, %vb) + %v = call @llvm.usub.sat.nxv8i32( %va, splat (i32 2)) ret %v } @@ -664,9 +630,7 @@ define @usub_nxv16i32_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv16i32( %va, %vb) + %v = call @llvm.usub.sat.nxv16i32( %va, splat (i32 2)) ret %v } @@ -714,9 +678,7 @@ define @usub_nxv1i64_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv1i64( %va, %vb) + %v = call @llvm.usub.sat.nxv1i64( %va, splat (i64 2)) ret %v } @@ -764,9 +726,7 @@ define @usub_nxv2i64_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv2i64( %va, %vb) + %v = call @llvm.usub.sat.nxv2i64( %va, splat (i64 2)) ret %v } @@ -814,9 +774,7 @@ define @usub_nxv4i64_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv4i64( %va, %vb) + %v = call @llvm.usub.sat.nxv4i64( %va, splat (i64 2)) ret %v } @@ -864,8 +822,6 @@ define @usub_nxv8i64_vi( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a0 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 2, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.usub.sat.nxv8i64( %va, %vb) + %v = call @llvm.usub.sat.nxv8i64( %va, splat (i64 2)) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index e5589ce1a9bc6..8275c3081c7c1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -41,9 +41,7 @@ define @vssubu_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -81,9 +79,7 @@ define @vssubu_vx_nxv1i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -94,9 +90,7 @@ define @vssubu_vi_nxv1i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -107,11 +101,7 @@ define @vssubu_vi_nxv1i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -133,9 +123,7 @@ define @vssubu_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -159,9 +147,7 @@ define @vssubu_vx_nxv2i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -172,9 +158,7 @@ define @vssubu_vi_nxv2i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -185,11 +169,7 @@ define @vssubu_vi_nxv2i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -211,9 +191,7 @@ define @vssubu_vv_nxv3i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv3i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv3i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -237,9 +215,7 @@ define @vssubu_vx_nxv3i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv3i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -250,9 +226,7 @@ define @vssubu_vi_nxv3i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv3i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -263,11 +237,7 @@ define @vssubu_vi_nxv3i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv3i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv3i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -289,9 +259,7 @@ define @vssubu_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -315,9 +283,7 @@ define @vssubu_vx_nxv4i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -328,9 +294,7 @@ define @vssubu_vi_nxv4i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -341,11 +305,7 @@ define @vssubu_vi_nxv4i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -367,9 +327,7 @@ define @vssubu_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -393,9 +351,7 @@ define @vssubu_vx_nxv8i8_unmasked( %va, i8 %b ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -406,9 +362,7 @@ define @vssubu_vi_nxv8i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -419,11 +373,7 @@ define @vssubu_vi_nxv8i8_unmasked( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -445,9 +395,7 @@ define @vssubu_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -471,9 +419,7 @@ define @vssubu_vx_nxv16i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -484,9 +430,7 @@ define @vssubu_vi_nxv16i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -497,11 +441,7 @@ define @vssubu_vi_nxv16i8_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -523,9 +463,7 @@ define @vssubu_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -549,9 +487,7 @@ define @vssubu_vx_nxv32i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -562,9 +498,7 @@ define @vssubu_vi_nxv32i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -575,11 +509,7 @@ define @vssubu_vi_nxv32i8_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -601,9 +531,7 @@ define @vssubu_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -627,9 +555,7 @@ define @vssubu_vx_nxv64i8_unmasked( %va, i8 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -640,9 +566,7 @@ define @vssubu_vi_nxv64i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv64i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -653,11 +577,7 @@ define @vssubu_vi_nxv64i8_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv64i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -688,9 +608,7 @@ define @vssubu_vi_nxv128i8( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -713,11 +631,7 @@ define @vssubu_vi_nxv128i8_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv128i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv128i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -739,9 +653,7 @@ define @vssubu_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -765,9 +677,7 @@ define @vssubu_vx_nxv1i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -778,9 +688,7 @@ define @vssubu_vi_nxv1i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -791,11 +699,7 @@ define @vssubu_vi_nxv1i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -817,9 +721,7 @@ define @vssubu_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -843,9 +745,7 @@ define @vssubu_vx_nxv2i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -856,9 +756,7 @@ define @vssubu_vi_nxv2i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -869,11 +767,7 @@ define @vssubu_vi_nxv2i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -895,9 +789,7 @@ define @vssubu_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -921,9 +813,7 @@ define @vssubu_vx_nxv4i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -934,9 +824,7 @@ define @vssubu_vi_nxv4i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -947,11 +835,7 @@ define @vssubu_vi_nxv4i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -973,9 +857,7 @@ define @vssubu_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -999,9 +881,7 @@ define @vssubu_vx_nxv8i16_unmasked( %va, i1 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1012,9 +892,7 @@ define @vssubu_vi_nxv8i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1025,11 +903,7 @@ define @vssubu_vi_nxv8i16_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1051,9 +925,7 @@ define @vssubu_vv_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1077,9 +949,7 @@ define @vssubu_vx_nxv16i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1090,9 +960,7 @@ define @vssubu_vi_nxv16i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1103,11 +971,7 @@ define @vssubu_vi_nxv16i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1129,9 +993,7 @@ define @vssubu_vv_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1155,9 +1017,7 @@ define @vssubu_vx_nxv32i16_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1168,9 +1028,7 @@ define @vssubu_vi_nxv32i16( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1181,11 +1039,7 @@ define @vssubu_vi_nxv32i16_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1207,9 +1061,7 @@ define @vssubu_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1233,9 +1085,7 @@ define @vssubu_vx_nxv1i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1246,9 +1096,7 @@ define @vssubu_vi_nxv1i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1259,11 +1107,7 @@ define @vssubu_vi_nxv1i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1285,9 +1129,7 @@ define @vssubu_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1311,9 +1153,7 @@ define @vssubu_vx_nxv2i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1324,9 +1164,7 @@ define @vssubu_vi_nxv2i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1337,11 +1175,7 @@ define @vssubu_vi_nxv2i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1363,9 +1197,7 @@ define @vssubu_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1389,9 +1221,7 @@ define @vssubu_vx_nxv4i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1402,9 +1232,7 @@ define @vssubu_vi_nxv4i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1415,11 +1243,7 @@ define @vssubu_vi_nxv4i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1441,9 +1265,7 @@ define @vssubu_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1467,9 +1289,7 @@ define @vssubu_vx_nxv8i32_unmasked( %va, i3 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1480,9 +1300,7 @@ define @vssubu_vi_nxv8i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1493,11 +1311,7 @@ define @vssubu_vi_nxv8i32_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1519,9 +1333,7 @@ define @vssubu_vv_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vssubu.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1545,9 +1357,7 @@ define @vssubu_vx_nxv16i32_unmasked( %va, ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1558,9 +1368,7 @@ define @vssubu_vi_nxv16i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1571,11 +1379,7 @@ define @vssubu_vi_nxv16i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv16i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1607,9 +1411,7 @@ define @vssubu_vi_nxv32i32( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1632,11 +1434,7 @@ define @vssubu_vi_nxv32i32_unmasked( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a2 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv32i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv32i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1658,9 +1456,7 @@ define @vssubu_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1712,9 +1508,7 @@ define @vssubu_vx_nxv1i64_unmasked( %va, i6 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1725,9 +1519,7 @@ define @vssubu_vi_nxv1i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1738,11 +1530,7 @@ define @vssubu_vi_nxv1i64_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv1i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1764,9 +1552,7 @@ define @vssubu_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1818,9 +1604,7 @@ define @vssubu_vx_nxv2i64_unmasked( %va, i6 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1831,9 +1615,7 @@ define @vssubu_vi_nxv2i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1844,11 +1626,7 @@ define @vssubu_vi_nxv2i64_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv2i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1870,9 +1648,7 @@ define @vssubu_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1924,9 +1700,7 @@ define @vssubu_vx_nxv4i64_unmasked( %va, i6 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1937,9 +1711,7 @@ define @vssubu_vi_nxv4i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -1950,11 +1722,7 @@ define @vssubu_vi_nxv4i64_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv4i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -1976,9 +1744,7 @@ define @vssubu_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -2030,9 +1796,7 @@ define @vssubu_vx_nxv8i64_unmasked( %va, i6 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -2043,9 +1807,7 @@ define @vssubu_vi_nxv8i64( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2056,10 +1818,6 @@ define @vssubu_vi_nxv8i64_unmasked( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vssubu.vx v8, v8, a1 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.usub.sat.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.usub.sat.nxv8i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll index 5ab028ffc3dec..b7f404c8e5ac9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll @@ -31,9 +31,7 @@ define @vsub_vx_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i8 1) ret %vc } @@ -44,11 +42,7 @@ define @vsub_ii_nxv1i8_1() { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v8, -1 ; CHECK-NEXT: ret - %heada = insertelement poison, i8 2, i32 0 - %splata = shufflevector %heada, poison, zeroinitializer - %headb = insertelement poison, i8 3, i32 0 - %splatb = shufflevector %headb, poison, zeroinitializer - %vc = sub %splata, %splatb + %vc = sub splat (i8 2), splat (i8 3) ret %vc } @@ -81,9 +75,7 @@ define @vsub_vx_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i8 1) ret %vc } @@ -116,9 +108,7 @@ define @vsub_vx_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i8 1) ret %vc } @@ -151,9 +141,7 @@ define @vsub_vx_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i8 1) ret %vc } @@ -186,9 +174,7 @@ define @vsub_vx_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i8 1) ret %vc } @@ -221,9 +207,7 @@ define @vsub_vx_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i8 1) ret %vc } @@ -256,9 +240,7 @@ define @vsub_vx_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i8 1) ret %vc } @@ -291,9 +273,7 @@ define @vsub_vx_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i16 1) ret %vc } @@ -326,9 +306,7 @@ define @vsub_vx_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i16 1) ret %vc } @@ -361,9 +339,7 @@ define @vsub_vx_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i16 1) ret %vc } @@ -396,9 +372,7 @@ define @vsub_vx_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i16 1) ret %vc } @@ -431,9 +405,7 @@ define @vsub_vx_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i16 1) ret %vc } @@ -466,9 +438,7 @@ define @vsub_vx_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i16 1) ret %vc } @@ -501,9 +471,7 @@ define @vsub_vx_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i32 1) ret %vc } @@ -536,9 +504,7 @@ define @vsub_vx_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i32 1) ret %vc } @@ -571,9 +537,7 @@ define @vsub_vx_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i32 1) ret %vc } @@ -606,9 +570,7 @@ define @vsub_vx_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i32 1) ret %vc } @@ -641,9 +603,7 @@ define @vsub_vx_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i32 1) ret %vc } @@ -689,9 +649,7 @@ define @vsub_vx_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i64 1) ret %vc } @@ -737,9 +695,7 @@ define @vsub_vx_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i64 1) ret %vc } @@ -785,9 +741,7 @@ define @vsub_vx_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i64 1) ret %vc } @@ -833,9 +787,7 @@ define @vsub_vx_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vsub.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = sub %va, %splat + %vc = sub %va, splat (i64 1) ret %vc } @@ -902,9 +854,7 @@ define @vsub_vi_mask_nxv8i32( %va, poison, i32 7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %splat, zeroinitializer + %vs = select %mask, splat (i32 7), zeroinitializer %vc = sub %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll index f424c6c3aed94..a2b9285fedeaf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll @@ -36,9 +36,7 @@ define @vsub_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -62,9 +60,7 @@ define @vsub_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -86,9 +82,7 @@ define @vsub_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -112,9 +106,7 @@ define @vsub_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -136,9 +128,7 @@ define @vsub_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -162,9 +152,7 @@ define @vsub_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -186,9 +174,7 @@ define @vsub_vv_nxv5i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv5i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv5i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -212,9 +198,7 @@ define @vsub_vx_nxv5i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv5i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv5i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -236,9 +220,7 @@ define @vsub_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -262,9 +244,7 @@ define @vsub_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -286,9 +266,7 @@ define @vsub_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -312,9 +290,7 @@ define @vsub_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -336,9 +312,7 @@ define @vsub_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -362,9 +336,7 @@ define @vsub_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -386,9 +358,7 @@ define @vsub_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -412,9 +382,7 @@ define @vsub_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -436,9 +404,7 @@ define @vsub_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -462,9 +428,7 @@ define @vsub_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -486,9 +450,7 @@ define @vsub_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -512,9 +474,7 @@ define @vsub_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -536,9 +496,7 @@ define @vsub_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -562,9 +520,7 @@ define @vsub_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -586,9 +542,7 @@ define @vsub_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -612,9 +566,7 @@ define @vsub_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -636,9 +588,7 @@ define @vsub_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -662,9 +612,7 @@ define @vsub_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -686,9 +634,7 @@ define @vsub_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -712,9 +658,7 @@ define @vsub_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -736,9 +680,7 @@ define @vsub_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -762,9 +704,7 @@ define @vsub_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -786,9 +726,7 @@ define @vsub_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -812,9 +750,7 @@ define @vsub_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -836,9 +772,7 @@ define @vsub_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -862,9 +796,7 @@ define @vsub_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -886,9 +818,7 @@ define @vsub_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -912,9 +842,7 @@ define @vsub_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -936,9 +864,7 @@ define @vsub_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -962,9 +888,7 @@ define @vsub_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -986,9 +910,7 @@ define @vsub_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1040,9 +962,7 @@ define @vsub_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1064,9 +984,7 @@ define @vsub_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1118,9 +1036,7 @@ define @vsub_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1142,9 +1058,7 @@ define @vsub_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1196,9 +1110,7 @@ define @vsub_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1220,9 +1132,7 @@ define @vsub_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1274,8 +1184,6 @@ define @vsub_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.sub.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.sub.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll index 66e6883dd1d3e..21ddf1a6e114d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll @@ -1406,7 +1406,7 @@ define @vwaddu_vv_disjoint_or_add( %x.i8, %x.i8 to - %x.shl = shl %x.i16, shufflevector( insertelement( poison, i16 8, i32 0), poison, zeroinitializer) + %x.shl = shl %x.i16, splat (i16 8) %x.i32 = zext %x.shl to %y.i32 = zext %y.i8 to %add = add %x.i32, %y.i32 @@ -1466,3 +1466,159 @@ define @vwadd_wv_disjoint_or( %x.i32, %x.i32, %y.i32 ret %or } + +define @vwadd_vx_splat_zext( %va, i32 %b) { +; RV32-LABEL: vwadd_vx_splat_zext: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vwaddu.wv v16, v16, v8 +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_vx_splat_zext: +; RV64: # %bb.0: +; RV64-NEXT: andi a0, a0, -1 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64-NEXT: vwaddu.vx v16, v8, a0 +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %zb = zext i32 %b to i64 + %head = insertelement poison, i64 %zb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = zext %va to + %ve = add %vc, %splat + ret %ve +} + +define @vwadd_vx_splat_zext_i1( %va, i16 %b) { +; RV32-LABEL: vwadd_vx_splat_zext_i1: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: vadd.vi v8, v8, 1, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_vx_splat_zext_i1: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: vadd.vi v8, v8, 1, v0.t +; RV64-NEXT: ret + %zb = zext i16 %b to i32 + %head = insertelement poison, i32 %zb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = zext %va to + %ve = add %vc, %splat + ret %ve +} + +define @vwadd_wx_splat_zext( %va, i32 %b) { +; RV32-LABEL: vwadd_wx_splat_zext: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_wx_splat_zext: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %zb = zext i32 %b to i64 + %head = insertelement poison, i64 %zb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %ve = add %va, %splat + ret %ve +} + +define @vwadd_vx_splat_sext( %va, i32 %b) { +; RV32-LABEL: vwadd_vx_splat_sext: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; RV32-NEXT: vwadd.wv v16, v16, v8 +; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_vx_splat_sext: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; RV64-NEXT: vwadd.vx v16, v8, a0 +; RV64-NEXT: vmv8r.v v8, v16 +; RV64-NEXT: ret + %sb = sext i32 %b to i64 + %head = insertelement poison, i64 %sb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = sext %va to + %ve = add %vc, %splat + ret %ve +} + +define @vwadd_vx_splat_sext_i1( %va, i16 %b) { +; RV32-LABEL: vwadd_vx_splat_sext_i1: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a0, a0, 16 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: li a0, 1 +; RV32-NEXT: vsub.vx v8, v8, a0, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_vx_splat_sext_i1: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a0, a0, 48 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vsub.vx v8, v8, a0, v0.t +; RV64-NEXT: ret + %sb = sext i16 %b to i32 + %head = insertelement poison, i32 %sb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %vc = sext %va to + %ve = add %vc, %splat + ret %ve +} + +define @vwadd_wx_splat_sext( %va, i32 %b) { +; RV32-LABEL: vwadd_wx_splat_sext: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vadd.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vwadd_wx_splat_sext: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vadd.vx v8, v8, a0 +; RV64-NEXT: ret + %sb = sext i32 %b to i64 + %head = insertelement poison, i64 %sb, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %ve = add %va, %splat + ret %ve +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll index a0b7726d3cb5e..433f5d2717e48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-vp.ll @@ -41,3 +41,61 @@ declare @llvm.vp.sext.nxv2i32.nxv2i8(, @llvm.vp.zext.nxv2i32.nxv2i8(, , i32) declare @llvm.vp.add.nxv2i32(, , , i32) declare @llvm.vp.merge.nxv2i32(, , , i32) + +define @vwadd_vv_vpnxv2i32_vpnxv2i16_vpnxv2i16( %x, %y, %m, i32 signext %evl) { +; CHECK-LABEL: vwadd_vv_vpnxv2i32_vpnxv2i16_vpnxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vwadd.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %x.sext = call @llvm.vp.sext.nxv2i32.nxv2i16( %x, %m, i32 %evl) + %y.sext = call @llvm.vp.sext.nxv2i32.nxv2i16( %y, %m, i32 %evl) + %add = call @llvm.vp.add.nxv2i32( %x.sext, %y.sext, %m, i32 %evl) + ret %add +} + +define @vwadd_vv_vpnxv2i32_vpnxv2i16_nxv2i16( %x, %y, %m, i32 signext %evl) { +; CHECK-LABEL: vwadd_vv_vpnxv2i32_vpnxv2i16_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vwadd.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %x.sext = call @llvm.vp.sext.nxv2i32.nxv2i16( %x, %m, i32 %evl) + %y.sext = sext %y to + %add = call @llvm.vp.add.nxv2i32( %x.sext, %y.sext, %m, i32 %evl) + ret %add +} + +define @vwadd_vv_vpnxv2i32_nxv2i16_nxv2i16( %x, %y, %m, i32 signext %evl) { +; CHECK-LABEL: vwadd_vv_vpnxv2i32_nxv2i16_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vwadd.vv v10, v8, v9, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %x.sext = sext %x to + %y.sext = sext %y to + %add = call @llvm.vp.add.nxv2i32( %x.sext, %y.sext, %m, i32 %evl) + ret %add +} + +define @vwadd_vv_nxv2i32_vpnxv2i16_vpnxv2i16( %x, %y, %m, i32 signext %evl) { +; CHECK-LABEL: vwadd_vv_nxv2i32_vpnxv2i16_vpnxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vwadd.vv v10, v8, v9 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %x.sext = call @llvm.vp.sext.nxv2i32.nxv2i16( %x, %m, i32 %evl) + %y.sext = call @llvm.vp.sext.nxv2i32.nxv2i16( %y, %m, i32 %evl) + %add = add %x.sext, %y.sext + ret %add +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-vp.ll index f5cf4acd592c0..02bc8d2731153 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-vp.ll @@ -17,12 +17,10 @@ define @vwmacc_vv_nxv1i32_unmasked_tu( %a, ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %b, %c, i32 zeroext %evl) { - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.sext.nxv1i32.nxv1i16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) - %abmul = call @llvm.vp.mul.nxv1i32( %aext, %bext, %allones, i32 %evl) - %cadd = call @llvm.vp.add.nxv1i32( %abmul, %c, %allones, i32 %evl) - %ret = call @llvm.vp.merge.nxv1i32( %allones, %cadd, %c, i32 %evl) + %aext = call @llvm.vp.sext.nxv1i32.nxv1i16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, splat (i1 -1), i32 %evl) + %abmul = call @llvm.vp.mul.nxv1i32( %aext, %bext, splat (i1 -1), i32 %evl) + %cadd = call @llvm.vp.add.nxv1i32( %abmul, %c, splat (i1 -1), i32 %evl) + %ret = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %cadd, %c, i32 %evl) ret %ret } diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-vp.ll index 72ef25ee9c318..486a5b09b677c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-vp.ll @@ -18,13 +18,11 @@ define @vwmacc_vv_nxv1i32_unmasked_tu( %a, ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %b, %c, i32 zeroext %evl) { - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.sext.nxv1i32.nxv1i16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.zext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) - %abmul = call @llvm.vp.mul.nxv1i32( %aext, %bext, %allones, i32 %evl) - %cadd = call @llvm.vp.add.nxv1i32( %abmul, %c, %allones, i32 %evl) - %ret = call @llvm.vp.merge.nxv1i32( %allones, %cadd, %c, i32 %evl) + %aext = call @llvm.vp.sext.nxv1i32.nxv1i16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.zext.nxv1i32.nxv1i16( %b, splat (i1 -1), i32 %evl) + %abmul = call @llvm.vp.mul.nxv1i32( %aext, %bext, splat (i1 -1), i32 %evl) + %cadd = call @llvm.vp.add.nxv1i32( %abmul, %c, splat (i1 -1), i32 %evl) + %ret = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %cadd, %c, i32 %evl) ret %ret } @@ -36,12 +34,10 @@ define @vwmacc_vv_nxv1i32_commute_unmasked_tu( %b, %c, i32 zeroext %evl) { - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.zext.nxv1i32.nxv1i16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) - %abmul = call @llvm.vp.mul.nxv1i32( %aext, %bext, %allones, i32 %evl) - %cadd = call @llvm.vp.add.nxv1i32( %abmul, %c, %allones, i32 %evl) - %ret = call @llvm.vp.merge.nxv1i32( %allones, %cadd, %c, i32 %evl) + %aext = call @llvm.vp.zext.nxv1i32.nxv1i16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.sext.nxv1i32.nxv1i16( %b, splat (i1 -1), i32 %evl) + %abmul = call @llvm.vp.mul.nxv1i32( %aext, %bext, splat (i1 -1), i32 %evl) + %cadd = call @llvm.vp.add.nxv1i32( %abmul, %c, splat (i1 -1), i32 %evl) + %ret = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %cadd, %c, i32 %evl) ret %ret } diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-vp.ll index 74dcb92b7cd65..125270be4fc85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-vp.ll @@ -17,12 +17,10 @@ define @vwmacc_vv_nxv1i32_unmasked_tu( %a, ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %b, %c, i32 zeroext %evl) { - %splat = insertelement poison, i1 -1, i32 0 - %allones = shufflevector %splat, poison, zeroinitializer - %aext = call @llvm.vp.zext.nxv1i32.nxv1i16( %a, %allones, i32 %evl) - %bext = call @llvm.vp.zext.nxv1i32.nxv1i16( %b, %allones, i32 %evl) - %abmul = call @llvm.vp.mul.nxv1i32( %aext, %bext, %allones, i32 %evl) - %cadd = call @llvm.vp.add.nxv1i32( %abmul, %c, %allones, i32 %evl) - %ret = call @llvm.vp.merge.nxv1i32( %allones, %cadd, %c, i32 %evl) + %aext = call @llvm.vp.zext.nxv1i32.nxv1i16( %a, splat (i1 -1), i32 %evl) + %bext = call @llvm.vp.zext.nxv1i32.nxv1i16( %b, splat (i1 -1), i32 %evl) + %abmul = call @llvm.vp.mul.nxv1i32( %aext, %bext, splat (i1 -1), i32 %evl) + %cadd = call @llvm.vp.add.nxv1i32( %abmul, %c, splat (i1 -1), i32 %evl) + %ret = call @llvm.vp.merge.nxv1i32( splat (i1 -1), %cadd, %c, i32 %evl) ret %ret } diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll index 082de2e7bf77b..41ec2fc443d02 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,RV32ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,RV64ZVBB ; ============================================================================== ; i32 -> i64 @@ -665,10 +665,10 @@ define @vwsll_vv_nxv2i64_nxv2i8_zext( %a, %a to %y = zext %b to @@ -739,11 +739,8 @@ define @vwsll_vx_i32_nxv2i64_nxv2i8_zext( %a ; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext: ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 +; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0 ; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i32 %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -794,12 +791,9 @@ define @vwsll_vx_i16_nxv2i64_nxv2i8_zext( %a ; ; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 +; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0 ; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i16 %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -850,12 +844,9 @@ define @vwsll_vx_i8_nxv2i64_nxv2i8_zext( %a, ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf4 v10, v8 +; CHECK-ZVBB-NEXT: vwsll.vx v8, v10, a0 ; CHECK-ZVBB-NEXT: ret %head = insertelement poison, i8 %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -873,12 +864,19 @@ define @vwsll_vi_nxv2i64_nxv2i8( %a) { ; CHECK-NEXT: vsll.vi v8, v10, 2 ; CHECK-NEXT: ret ; -; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: -; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 -; CHECK-ZVBB-NEXT: ret +; RV32ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: +; RV32ZVBB: # %bb.0: +; RV32ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32ZVBB-NEXT: vzext.vf8 v10, v8 +; RV32ZVBB-NEXT: vsll.vi v8, v10, 2 +; RV32ZVBB-NEXT: ret +; +; RV64ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: +; RV64ZVBB: # %bb.0: +; RV64ZVBB-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV64ZVBB-NEXT: vzext.vf4 v10, v8 +; RV64ZVBB-NEXT: vwsll.vi v8, v10, 2 +; RV64ZVBB-NEXT: ret %x = zext %a to %z = shl %x, splat (i64 2) ret %z diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll index e8c4f1fed6efb..3f10b10675ca7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll @@ -30,9 +30,7 @@ define @vxor_vi_nxv1i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 -1) ret %vc } @@ -42,9 +40,7 @@ define @vxor_vi_nxv1i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 8) ret %vc } @@ -55,9 +51,7 @@ define @vxor_vi_nxv1i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 16) ret %vc } @@ -89,9 +83,7 @@ define @vxor_vi_nxv2i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 -1) ret %vc } @@ -101,9 +93,7 @@ define @vxor_vi_nxv2i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 8) ret %vc } @@ -114,9 +104,7 @@ define @vxor_vi_nxv2i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 16) ret %vc } @@ -148,9 +136,7 @@ define @vxor_vi_nxv4i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 -1) ret %vc } @@ -160,9 +146,7 @@ define @vxor_vi_nxv4i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 8) ret %vc } @@ -173,9 +157,7 @@ define @vxor_vi_nxv4i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 16) ret %vc } @@ -207,9 +189,7 @@ define @vxor_vi_nxv8i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 -1) ret %vc } @@ -219,9 +199,7 @@ define @vxor_vi_nxv8i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 8) ret %vc } @@ -232,9 +210,7 @@ define @vxor_vi_nxv8i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 16) ret %vc } @@ -266,9 +242,7 @@ define @vxor_vi_nxv16i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 -1) ret %vc } @@ -278,9 +252,7 @@ define @vxor_vi_nxv16i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 8) ret %vc } @@ -291,9 +263,7 @@ define @vxor_vi_nxv16i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 16) ret %vc } @@ -325,9 +295,7 @@ define @vxor_vi_nxv32i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 -1) ret %vc } @@ -337,9 +305,7 @@ define @vxor_vi_nxv32i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 8) ret %vc } @@ -350,9 +316,7 @@ define @vxor_vi_nxv32i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 16) ret %vc } @@ -384,9 +348,7 @@ define @vxor_vi_nxv64i8_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 -1) ret %vc } @@ -396,9 +358,7 @@ define @vxor_vi_nxv64i8_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i8 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 8) ret %vc } @@ -409,9 +369,7 @@ define @vxor_vi_nxv64i8_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i8 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i8 16) ret %vc } @@ -443,9 +401,7 @@ define @vxor_vi_nxv1i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 -1) ret %vc } @@ -455,9 +411,7 @@ define @vxor_vi_nxv1i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 8) ret %vc } @@ -468,9 +422,7 @@ define @vxor_vi_nxv1i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 16) ret %vc } @@ -502,9 +454,7 @@ define @vxor_vi_nxv2i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 -1) ret %vc } @@ -514,9 +464,7 @@ define @vxor_vi_nxv2i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 8) ret %vc } @@ -527,9 +475,7 @@ define @vxor_vi_nxv2i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 16) ret %vc } @@ -561,9 +507,7 @@ define @vxor_vi_nxv4i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 -1) ret %vc } @@ -573,9 +517,7 @@ define @vxor_vi_nxv4i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 8) ret %vc } @@ -586,9 +528,7 @@ define @vxor_vi_nxv4i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 16) ret %vc } @@ -620,9 +560,7 @@ define @vxor_vi_nxv8i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 -1) ret %vc } @@ -632,9 +570,7 @@ define @vxor_vi_nxv8i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 8) ret %vc } @@ -645,9 +581,7 @@ define @vxor_vi_nxv8i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 16) ret %vc } @@ -679,9 +613,7 @@ define @vxor_vi_nxv16i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 -1) ret %vc } @@ -691,9 +623,7 @@ define @vxor_vi_nxv16i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 8) ret %vc } @@ -704,9 +634,7 @@ define @vxor_vi_nxv16i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 16) ret %vc } @@ -738,9 +666,7 @@ define @vxor_vi_nxv32i16_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 -1) ret %vc } @@ -750,9 +676,7 @@ define @vxor_vi_nxv32i16_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i16 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 8) ret %vc } @@ -763,9 +687,7 @@ define @vxor_vi_nxv32i16_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i16 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i16 16) ret %vc } @@ -797,9 +719,7 @@ define @vxor_vi_nxv1i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 -1) ret %vc } @@ -809,9 +729,7 @@ define @vxor_vi_nxv1i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 8) ret %vc } @@ -822,9 +740,7 @@ define @vxor_vi_nxv1i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 16) ret %vc } @@ -856,9 +772,7 @@ define @vxor_vi_nxv2i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 -1) ret %vc } @@ -868,9 +782,7 @@ define @vxor_vi_nxv2i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 8) ret %vc } @@ -881,9 +793,7 @@ define @vxor_vi_nxv2i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 16) ret %vc } @@ -915,9 +825,7 @@ define @vxor_vi_nxv4i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 -1) ret %vc } @@ -927,9 +835,7 @@ define @vxor_vi_nxv4i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 8) ret %vc } @@ -940,9 +846,7 @@ define @vxor_vi_nxv4i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 16) ret %vc } @@ -974,9 +878,7 @@ define @vxor_vi_nxv8i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 -1) ret %vc } @@ -986,9 +888,7 @@ define @vxor_vi_nxv8i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 8) ret %vc } @@ -999,9 +899,7 @@ define @vxor_vi_nxv8i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 16) ret %vc } @@ -1033,9 +931,7 @@ define @vxor_vi_nxv16i32_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 -1) ret %vc } @@ -1045,9 +941,7 @@ define @vxor_vi_nxv16i32_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i32 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 8) ret %vc } @@ -1058,9 +952,7 @@ define @vxor_vi_nxv16i32_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i32 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i32 16) ret %vc } @@ -1105,9 +997,7 @@ define @vxor_vi_nxv1i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 -1) ret %vc } @@ -1117,9 +1007,7 @@ define @vxor_vi_nxv1i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 8) ret %vc } @@ -1130,9 +1018,7 @@ define @vxor_vi_nxv1i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 16) ret %vc } @@ -1177,9 +1063,7 @@ define @vxor_vi_nxv2i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 -1) ret %vc } @@ -1189,9 +1073,7 @@ define @vxor_vi_nxv2i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 8) ret %vc } @@ -1202,9 +1084,7 @@ define @vxor_vi_nxv2i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 16) ret %vc } @@ -1249,9 +1129,7 @@ define @vxor_vi_nxv4i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 -1) ret %vc } @@ -1261,9 +1139,7 @@ define @vxor_vi_nxv4i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 8) ret %vc } @@ -1274,9 +1150,7 @@ define @vxor_vi_nxv4i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 16) ret %vc } @@ -1321,9 +1195,7 @@ define @vxor_vi_nxv8i64_0( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 -1, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 -1) ret %vc } @@ -1333,9 +1205,7 @@ define @vxor_vi_nxv8i64_1( %va) { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 8 ; CHECK-NEXT: ret - %head = insertelement poison, i64 8, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 8) ret %vc } @@ -1346,9 +1216,7 @@ define @vxor_vi_nxv8i64_2( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vxor.vx v8, v8, a0 ; CHECK-NEXT: ret - %head = insertelement poison, i64 16, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = xor %va, %splat + %vc = xor %va, splat (i64 16) ret %vc } @@ -1413,9 +1281,7 @@ define @vxor_vi_mask_nxv8i32( %va, poison, i32 7, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vs = select %mask, %splat, zeroinitializer + %vs = select %mask, splat (i32 7), zeroinitializer %vc = xor %va, %vs ret %vc } diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll index 96b86ff6179d4..f2235b4fdc94b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll @@ -36,9 +36,7 @@ define @vxor_vv_nxv1i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -62,9 +60,7 @@ define @vxor_vx_nxv1i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -74,9 +70,7 @@ define @vxor_vi_nxv1i8( %va, poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -86,11 +80,7 @@ define @vxor_vi_nxv1i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i8( %va, splat (i8 7), splat (i1 true), i32 %evl) ret %v } @@ -100,9 +90,7 @@ define @vxor_vi_nxv1i8_1( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -112,11 +100,7 @@ define @vxor_vi_nxv1i8_unmasked_1( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -138,9 +122,7 @@ define @vxor_vv_nxv2i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -164,9 +146,7 @@ define @vxor_vx_nxv2i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -176,9 +156,7 @@ define @vxor_vi_nxv2i8( %va, poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -188,11 +166,7 @@ define @vxor_vi_nxv2i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i8( %va, splat (i8 7), splat (i1 true), i32 %evl) ret %v } @@ -202,9 +176,7 @@ define @vxor_vi_nxv2i8_1( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -214,11 +186,7 @@ define @vxor_vi_nxv2i8_unmasked_1( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -240,9 +208,7 @@ define @vxor_vv_nxv4i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -266,9 +232,7 @@ define @vxor_vx_nxv4i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -278,9 +242,7 @@ define @vxor_vi_nxv4i8( %va, poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -290,11 +252,7 @@ define @vxor_vi_nxv4i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i8( %va, splat (i8 7), splat (i1 true), i32 %evl) ret %v } @@ -304,9 +262,7 @@ define @vxor_vi_nxv4i8_1( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -316,11 +272,7 @@ define @vxor_vi_nxv4i8_unmasked_1( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -342,9 +294,7 @@ define @vxor_vv_nxv8i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -368,9 +318,7 @@ define @vxor_vx_nxv8i8_unmasked( %va, i8 %b, ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -380,9 +328,7 @@ define @vxor_vi_nxv8i8( %va, poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -392,11 +338,7 @@ define @vxor_vi_nxv8i8_unmasked( %va, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i8( %va, splat (i8 7), splat (i1 true), i32 %evl) ret %v } @@ -406,9 +348,7 @@ define @vxor_vi_nxv8i8_1( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -418,11 +358,7 @@ define @vxor_vi_nxv8i8_unmasked_1( %va, i32 z ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -444,9 +380,7 @@ define @vxor_vv_nxv15i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv15i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv15i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -470,9 +404,7 @@ define @vxor_vx_nxv15i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv15i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv15i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -482,9 +414,7 @@ define @vxor_vi_nxv15i8( %va, poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv15i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv15i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -494,11 +424,7 @@ define @vxor_vi_nxv15i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv15i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv15i8( %va, splat (i8 7), splat (i1 true), i32 %evl) ret %v } @@ -508,9 +434,7 @@ define @vxor_vi_nxv15i8_1( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv15i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv15i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -520,11 +444,7 @@ define @vxor_vi_nxv15i8_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv15i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv15i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -546,9 +466,7 @@ define @vxor_vv_nxv16i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -572,9 +490,7 @@ define @vxor_vx_nxv16i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -584,9 +500,7 @@ define @vxor_vi_nxv16i8( %va, poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -596,11 +510,7 @@ define @vxor_vi_nxv16i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i8( %va, splat (i8 7), splat (i1 true), i32 %evl) ret %v } @@ -610,9 +520,7 @@ define @vxor_vi_nxv16i8_1( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -622,11 +530,7 @@ define @vxor_vi_nxv16i8_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -648,9 +552,7 @@ define @vxor_vv_nxv32i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -674,9 +576,7 @@ define @vxor_vx_nxv32i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -686,9 +586,7 @@ define @vxor_vi_nxv32i8( %va, poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -698,11 +596,7 @@ define @vxor_vi_nxv32i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i8( %va, splat (i8 7), splat (i1 true), i32 %evl) ret %v } @@ -712,9 +606,7 @@ define @vxor_vi_nxv32i8_1( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -724,11 +616,7 @@ define @vxor_vi_nxv32i8_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -750,9 +638,7 @@ define @vxor_vv_nxv64i8_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv64i8( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv64i8( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -776,9 +662,7 @@ define @vxor_vx_nxv64i8_unmasked( %va, i8 % ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv64i8( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -788,9 +672,7 @@ define @vxor_vi_nxv64i8( %va, poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv64i8( %va, splat (i8 7), %m, i32 %evl) ret %v } @@ -800,11 +682,7 @@ define @vxor_vi_nxv64i8_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv64i8( %va, splat (i8 7), splat (i1 true), i32 %evl) ret %v } @@ -814,9 +692,7 @@ define @vxor_vi_nxv64i8_1( %va, poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv64i8( %va, splat (i8 -1), %m, i32 %evl) ret %v } @@ -826,11 +702,7 @@ define @vxor_vi_nxv64i8_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i8 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv64i8( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv64i8( %va, splat (i8 -1), splat (i1 true), i32 %evl) ret %v } @@ -852,9 +724,7 @@ define @vxor_vv_nxv1i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -890,9 +760,7 @@ define @vxor_vx_nxv1i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -902,9 +770,7 @@ define @vxor_vi_nxv1i16( %va, poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i16( %va, splat (i16 7), %m, i32 %evl) ret %v } @@ -914,11 +780,7 @@ define @vxor_vi_nxv1i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i16( %va, splat (i16 7), splat (i1 true), i32 %evl) ret %v } @@ -928,9 +790,7 @@ define @vxor_vi_nxv1i16_1( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -940,11 +800,7 @@ define @vxor_vi_nxv1i16_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -966,9 +822,7 @@ define @vxor_vv_nxv2i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -992,9 +846,7 @@ define @vxor_vx_nxv2i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1004,9 +856,7 @@ define @vxor_vi_nxv2i16( %va, poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i16( %va, splat (i16 7), %m, i32 %evl) ret %v } @@ -1016,11 +866,7 @@ define @vxor_vi_nxv2i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i16( %va, splat (i16 7), splat (i1 true), i32 %evl) ret %v } @@ -1030,9 +876,7 @@ define @vxor_vi_nxv2i16_1( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1042,11 +886,7 @@ define @vxor_vi_nxv2i16_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1068,9 +908,7 @@ define @vxor_vv_nxv4i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1094,9 +932,7 @@ define @vxor_vx_nxv4i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1106,9 +942,7 @@ define @vxor_vi_nxv4i16( %va, poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i16( %va, splat (i16 7), %m, i32 %evl) ret %v } @@ -1118,11 +952,7 @@ define @vxor_vi_nxv4i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i16( %va, splat (i16 7), splat (i1 true), i32 %evl) ret %v } @@ -1132,9 +962,7 @@ define @vxor_vi_nxv4i16_1( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1144,11 +972,7 @@ define @vxor_vi_nxv4i16_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1170,9 +994,7 @@ define @vxor_vv_nxv8i16_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1196,9 +1018,7 @@ define @vxor_vx_nxv8i16_unmasked( %va, i16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1208,9 +1028,7 @@ define @vxor_vi_nxv8i16( %va, poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i16( %va, splat (i16 7), %m, i32 %evl) ret %v } @@ -1220,11 +1038,7 @@ define @vxor_vi_nxv8i16_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i16( %va, splat (i16 7), splat (i1 true), i32 %evl) ret %v } @@ -1234,9 +1048,7 @@ define @vxor_vi_nxv8i16_1( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1246,11 +1058,7 @@ define @vxor_vi_nxv8i16_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1272,9 +1080,7 @@ define @vxor_vv_nxv16i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v12 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1298,9 +1104,7 @@ define @vxor_vx_nxv16i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1310,9 +1114,7 @@ define @vxor_vi_nxv16i16( %va, poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i16( %va, splat (i16 7), %m, i32 %evl) ret %v } @@ -1322,11 +1124,7 @@ define @vxor_vi_nxv16i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i16( %va, splat (i16 7), splat (i1 true), i32 %evl) ret %v } @@ -1336,9 +1134,7 @@ define @vxor_vi_nxv16i16_1( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1348,11 +1144,7 @@ define @vxor_vi_nxv16i16_unmasked_1( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1374,9 +1166,7 @@ define @vxor_vv_nxv32i16_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i16( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i16( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1400,9 +1190,7 @@ define @vxor_vx_nxv32i16_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i16 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i16( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1412,9 +1200,7 @@ define @vxor_vi_nxv32i16( %va, poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i16( %va, splat (i16 7), %m, i32 %evl) ret %v } @@ -1424,11 +1210,7 @@ define @vxor_vi_nxv32i16_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i16( %va, splat (i16 7), splat (i1 true), i32 %evl) ret %v } @@ -1438,9 +1220,7 @@ define @vxor_vi_nxv32i16_1( %va, poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i16( %va, splat (i16 -1), %m, i32 %evl) ret %v } @@ -1450,11 +1230,7 @@ define @vxor_vi_nxv32i16_unmasked_1( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i16 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv32i16( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv32i16( %va, splat (i16 -1), splat (i1 true), i32 %evl) ret %v } @@ -1476,9 +1252,7 @@ define @vxor_vv_nxv1i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1502,9 +1276,7 @@ define @vxor_vx_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1514,9 +1286,7 @@ define @vxor_vi_nxv1i32( %va, poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i32( %va, splat (i32 7), %m, i32 %evl) ret %v } @@ -1526,11 +1296,7 @@ define @vxor_vi_nxv1i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i32( %va, splat (i32 7), splat (i1 true), i32 %evl) ret %v } @@ -1540,9 +1306,7 @@ define @vxor_vi_nxv1i32_1( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1552,11 +1316,7 @@ define @vxor_vi_nxv1i32_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1578,9 +1338,7 @@ define @vxor_vv_nxv2i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1604,9 +1362,7 @@ define @vxor_vx_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1616,9 +1372,7 @@ define @vxor_vi_nxv2i32( %va, poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i32( %va, splat (i32 7), %m, i32 %evl) ret %v } @@ -1628,11 +1382,7 @@ define @vxor_vi_nxv2i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i32( %va, splat (i32 7), splat (i1 true), i32 %evl) ret %v } @@ -1642,9 +1392,7 @@ define @vxor_vi_nxv2i32_1( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1654,11 +1402,7 @@ define @vxor_vi_nxv2i32_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1680,9 +1424,7 @@ define @vxor_vv_nxv4i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1706,9 +1448,7 @@ define @vxor_vx_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1718,9 +1458,7 @@ define @vxor_vi_nxv4i32( %va, poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i32( %va, splat (i32 7), %m, i32 %evl) ret %v } @@ -1730,11 +1468,7 @@ define @vxor_vi_nxv4i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i32( %va, splat (i32 7), splat (i1 true), i32 %evl) ret %v } @@ -1744,9 +1478,7 @@ define @vxor_vi_nxv4i32_1( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1756,11 +1488,7 @@ define @vxor_vi_nxv4i32_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1782,9 +1510,7 @@ define @vxor_vv_nxv8i32_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1808,9 +1534,7 @@ define @vxor_vx_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1820,9 +1544,7 @@ define @vxor_vi_nxv8i32( %va, poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i32( %va, splat (i32 7), %m, i32 %evl) ret %v } @@ -1832,11 +1554,7 @@ define @vxor_vi_nxv8i32_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i32( %va, splat (i32 7), splat (i1 true), i32 %evl) ret %v } @@ -1846,9 +1564,7 @@ define @vxor_vi_nxv8i32_1( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1858,11 +1574,7 @@ define @vxor_vi_nxv8i32_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1884,9 +1596,7 @@ define @vxor_vv_nxv16i32_unmasked( %va, < ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vxor.vv v8, v8, v16 ; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i32( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i32( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -1910,9 +1620,7 @@ define @vxor_vx_nxv16i32_unmasked( %va, i ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i32( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -1922,9 +1630,7 @@ define @vxor_vi_nxv16i32( %va, poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i32( %va, splat (i32 7), %m, i32 %evl) ret %v } @@ -1934,11 +1640,7 @@ define @vxor_vi_nxv16i32_unmasked( %va, i ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i32( %va, splat (i32 7), splat (i1 true), i32 %evl) ret %v } @@ -1948,9 +1650,7 @@ define @vxor_vi_nxv16i32_1( %va, poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i32( %va, splat (i32 -1), %m, i32 %evl) ret %v } @@ -1960,11 +1660,7 @@ define @vxor_vi_nxv16i32_unmasked_1( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i32 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv16i32( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv16i32( %va, splat (i32 -1), splat (i1 true), i32 %evl) ret %v } @@ -1986,9 +1682,7 @@ define @vxor_vv_nxv1i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -2040,9 +1734,7 @@ define @vxor_vx_nxv1i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -2052,9 +1744,7 @@ define @vxor_vi_nxv1i64( %va, poison, i64 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i64( %va, splat (i64 7), %m, i32 %evl) ret %v } @@ -2064,11 +1754,7 @@ define @vxor_vi_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i64( %va, splat (i64 7), splat (i1 true), i32 %evl) ret %v } @@ -2078,9 +1764,7 @@ define @vxor_vi_nxv1i64_1( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2090,11 +1774,7 @@ define @vxor_vi_nxv1i64_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv1i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv1i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -2116,9 +1796,7 @@ define @vxor_vv_nxv2i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -2170,9 +1848,7 @@ define @vxor_vx_nxv2i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -2182,9 +1858,7 @@ define @vxor_vi_nxv2i64( %va, poison, i64 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i64( %va, splat (i64 7), %m, i32 %evl) ret %v } @@ -2194,11 +1868,7 @@ define @vxor_vi_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i64( %va, splat (i64 7), splat (i1 true), i32 %evl) ret %v } @@ -2208,9 +1878,7 @@ define @vxor_vi_nxv2i64_1( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2220,11 +1888,7 @@ define @vxor_vi_nxv2i64_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv2i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv2i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -2246,9 +1910,7 @@ define @vxor_vv_nxv4i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -2300,9 +1962,7 @@ define @vxor_vx_nxv4i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -2312,9 +1972,7 @@ define @vxor_vi_nxv4i64( %va, poison, i64 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i64( %va, splat (i64 7), %m, i32 %evl) ret %v } @@ -2324,11 +1982,7 @@ define @vxor_vi_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i64( %va, splat (i64 7), splat (i1 true), i32 %evl) ret %v } @@ -2338,9 +1992,7 @@ define @vxor_vi_nxv4i64_1( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2350,11 +2002,7 @@ define @vxor_vi_nxv4i64_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv4i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv4i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } @@ -2376,9 +2024,7 @@ define @vxor_vv_nxv8i64_unmasked( %va, poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i64( %va, %b, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i64( %va, %b, splat (i1 true), i32 %evl) ret %v } @@ -2430,9 +2076,7 @@ define @vxor_vx_nxv8i64_unmasked( %va, i64 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i64( %va, %vb, splat (i1 true), i32 %evl) ret %v } @@ -2442,9 +2086,7 @@ define @vxor_vi_nxv8i64( %va, poison, i64 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i64( %va, splat (i64 7), %m, i32 %evl) ret %v } @@ -2454,11 +2096,7 @@ define @vxor_vi_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vxor.vi v8, v8, 7 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 7, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i64( %va, splat (i64 7), splat (i1 true), i32 %evl) ret %v } @@ -2468,9 +2106,7 @@ define @vxor_vi_nxv8i64_1( %va, poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i64( %va, splat (i64 -1), %m, i32 %evl) ret %v } @@ -2480,10 +2116,6 @@ define @vxor_vi_nxv8i64_unmasked_1( %va, i3 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: ret - %elt.head = insertelement poison, i64 -1, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %v = call @llvm.vp.xor.nxv8i64( %va, %vb, %m, i32 %evl) + %v = call @llvm.vp.xor.nxv8i64( %va, splat (i64 -1), splat (i1 true), i32 %evl) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir index 85bb54471ed3c..a44a93449332f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir @@ -7,30 +7,24 @@ name: copy_zvlsseg_N2 body: | bb.0: ; CHECK-LABEL: name: copy_zvlsseg_N2 - ; CHECK: $v2 = VMV1R_V $v4 - ; CHECK-NEXT: $v3 = VMV1R_V $v5 + ; CHECK: $v2m2 = VMV2R_V $v4m2 ; CHECK-NEXT: $v3 = VMV1R_V $v4 ; CHECK-NEXT: $v4 = VMV1R_V $v5 ; CHECK-NEXT: $v6 = VMV1R_V $v5 ; CHECK-NEXT: $v5 = VMV1R_V $v4 - ; CHECK-NEXT: $v6 = VMV1R_V $v4 - ; CHECK-NEXT: $v7 = VMV1R_V $v5 - ; CHECK-NEXT: $v0m2 = VMV2R_V $v4m2 - ; CHECK-NEXT: $v2m2 = VMV2R_V $v6m2 + ; CHECK-NEXT: $v6m2 = VMV2R_V $v4m2 + ; CHECK-NEXT: $v0m4 = VMV4R_V $v4m4 ; CHECK-NEXT: $v2m2 = VMV2R_V $v4m2 ; CHECK-NEXT: $v4m2 = VMV2R_V $v6m2 ; CHECK-NEXT: $v8m2 = VMV2R_V $v6m2 ; CHECK-NEXT: $v6m2 = VMV2R_V $v4m2 - ; CHECK-NEXT: $v8m2 = VMV2R_V $v4m2 - ; CHECK-NEXT: $v10m2 = VMV2R_V $v6m2 - ; CHECK-NEXT: $v0m4 = VMV4R_V $v8m4 - ; CHECK-NEXT: $v4m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v8m4 = VMV4R_V $v4m4 + ; CHECK-NEXT: $v0m8 = VMV8R_V $v8m8 ; CHECK-NEXT: $v4m4 = VMV4R_V $v8m4 ; CHECK-NEXT: $v8m4 = VMV4R_V $v12m4 ; CHECK-NEXT: $v16m4 = VMV4R_V $v12m4 ; CHECK-NEXT: $v12m4 = VMV4R_V $v8m4 - ; CHECK-NEXT: $v16m4 = VMV4R_V $v8m4 - ; CHECK-NEXT: $v20m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v16m8 = VMV8R_V $v8m8 $v2_v3 = COPY $v4_v5 $v3_v4 = COPY $v4_v5 $v5_v6 = COPY $v4_v5 @@ -55,25 +49,20 @@ body: | ; CHECK-NEXT: $v3 = VMV1R_V $v6 ; CHECK-NEXT: $v4 = VMV1R_V $v7 ; CHECK-NEXT: $v3 = VMV1R_V $v5 - ; CHECK-NEXT: $v4 = VMV1R_V $v6 - ; CHECK-NEXT: $v5 = VMV1R_V $v7 + ; CHECK-NEXT: $v4m2 = VMV2R_V $v6m2 ; CHECK-NEXT: $v4 = VMV1R_V $v5 ; CHECK-NEXT: $v5 = VMV1R_V $v6 ; CHECK-NEXT: $v6 = VMV1R_V $v7 - ; CHECK-NEXT: $v9 = VMV1R_V $v7 - ; CHECK-NEXT: $v8 = VMV1R_V $v6 + ; CHECK-NEXT: $v8m2 = VMV2R_V $v6m2 ; CHECK-NEXT: $v7 = VMV1R_V $v5 ; CHECK-NEXT: $v9 = VMV1R_V $v5 - ; CHECK-NEXT: $v10 = VMV1R_V $v6 - ; CHECK-NEXT: $v11 = VMV1R_V $v7 + ; CHECK-NEXT: $v10m2 = VMV2R_V $v6m2 ; CHECK-NEXT: $v0m2 = VMV2R_V $v6m2 ; CHECK-NEXT: $v2m2 = VMV2R_V $v8m2 ; CHECK-NEXT: $v4m2 = VMV2R_V $v10m2 ; CHECK-NEXT: $v2m2 = VMV2R_V $v6m2 - ; CHECK-NEXT: $v4m2 = VMV2R_V $v8m2 - ; CHECK-NEXT: $v6m2 = VMV2R_V $v10m2 - ; CHECK-NEXT: $v14m2 = VMV2R_V $v10m2 - ; CHECK-NEXT: $v12m2 = VMV2R_V $v8m2 + ; CHECK-NEXT: $v4m4 = VMV4R_V $v8m4 + ; CHECK-NEXT: $v12m4 = VMV4R_V $v8m4 ; CHECK-NEXT: $v10m2 = VMV2R_V $v6m2 ; CHECK-NEXT: $v12m2 = VMV2R_V $v6m2 ; CHECK-NEXT: $v14m2 = VMV2R_V $v8m2 @@ -94,10 +83,8 @@ name: copy_zvlsseg_N4 body: | bb.0: ; CHECK-LABEL: name: copy_zvlsseg_N4 - ; CHECK: $v6 = VMV1R_V $v10 - ; CHECK-NEXT: $v7 = VMV1R_V $v11 - ; CHECK-NEXT: $v8 = VMV1R_V $v12 - ; CHECK-NEXT: $v9 = VMV1R_V $v13 + ; CHECK: $v6m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v8m2 = VMV2R_V $v12m2 ; CHECK-NEXT: $v7 = VMV1R_V $v10 ; CHECK-NEXT: $v8 = VMV1R_V $v11 ; CHECK-NEXT: $v9 = VMV1R_V $v12 @@ -106,13 +93,10 @@ body: | ; CHECK-NEXT: $v15 = VMV1R_V $v12 ; CHECK-NEXT: $v14 = VMV1R_V $v11 ; CHECK-NEXT: $v13 = VMV1R_V $v10 - ; CHECK-NEXT: $v14 = VMV1R_V $v10 - ; CHECK-NEXT: $v15 = VMV1R_V $v11 - ; CHECK-NEXT: $v16 = VMV1R_V $v12 - ; CHECK-NEXT: $v17 = VMV1R_V $v13 + ; CHECK-NEXT: $v14m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v16m2 = VMV2R_V $v12m2 ; CHECK-NEXT: $v2m2 = VMV2R_V $v10m2 - ; CHECK-NEXT: $v4m2 = VMV2R_V $v12m2 - ; CHECK-NEXT: $v6m2 = VMV2R_V $v14m2 + ; CHECK-NEXT: $v4m4 = VMV4R_V $v12m4 ; CHECK-NEXT: $v8m2 = VMV2R_V $v16m2 ; CHECK-NEXT: $v4m2 = VMV2R_V $v10m2 ; CHECK-NEXT: $v6m2 = VMV2R_V $v12m2 @@ -123,8 +107,7 @@ body: | ; CHECK-NEXT: $v18m2 = VMV2R_V $v12m2 ; CHECK-NEXT: $v16m2 = VMV2R_V $v10m2 ; CHECK-NEXT: $v18m2 = VMV2R_V $v10m2 - ; CHECK-NEXT: $v20m2 = VMV2R_V $v12m2 - ; CHECK-NEXT: $v22m2 = VMV2R_V $v14m2 + ; CHECK-NEXT: $v20m4 = VMV4R_V $v12m4 ; CHECK-NEXT: $v24m2 = VMV2R_V $v16m2 $v6_v7_v8_v9 = COPY $v10_v11_v12_v13 $v7_v8_v9_v10 = COPY $v10_v11_v12_v13 @@ -146,57 +129,59 @@ body: | ; CHECK-NEXT: $v7 = VMV1R_V $v12 ; CHECK-NEXT: $v8 = VMV1R_V $v13 ; CHECK-NEXT: $v9 = VMV1R_V $v14 - ; CHECK-NEXT: $v6 = VMV1R_V $v10 - ; CHECK-NEXT: $v7 = VMV1R_V $v11 - ; CHECK-NEXT: $v8 = VMV1R_V $v12 - ; CHECK-NEXT: $v9 = VMV1R_V $v13 + ; CHECK-NEXT: $v6m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v8m2 = VMV2R_V $v12m2 ; CHECK-NEXT: $v10 = VMV1R_V $v14 ; CHECK-NEXT: $v18 = VMV1R_V $v14 - ; CHECK-NEXT: $v17 = VMV1R_V $v13 - ; CHECK-NEXT: $v16 = VMV1R_V $v12 - ; CHECK-NEXT: $v15 = VMV1R_V $v11 - ; CHECK-NEXT: $v14 = VMV1R_V $v10 + ; CHECK-NEXT: $v16m2 = VMV2R_V $v12m2 + ; CHECK-NEXT: $v14m2 = VMV2R_V $v10m2 ; CHECK-NEXT: $v15 = VMV1R_V $v10 ; CHECK-NEXT: $v16 = VMV1R_V $v11 ; CHECK-NEXT: $v17 = VMV1R_V $v12 ; CHECK-NEXT: $v18 = VMV1R_V $v13 ; CHECK-NEXT: $v19 = VMV1R_V $v14 + ; CHECK-NEXT: $v7 = VMV1R_V $v11 + ; CHECK-NEXT: $v8m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v16m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v15 = VMV1R_V $v11 $v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14 $v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14 $v14_v15_v16_v17_v18 = COPY $v10_v11_v12_v13_v14 $v15_v16_v17_v18_v19 = COPY $v10_v11_v12_v13_v14 + $v7_v8_v9_v10_v11 = COPY $v11_v12_v13_v14_v15 + $v15_v16_v17_v18_v19 = COPY $v11_v12_v13_v14_v15 ... --- name: copy_zvlsseg_N6 body: | bb.0: ; CHECK-LABEL: name: copy_zvlsseg_N6 - ; CHECK: $v4 = VMV1R_V $v10 - ; CHECK-NEXT: $v5 = VMV1R_V $v11 - ; CHECK-NEXT: $v6 = VMV1R_V $v12 - ; CHECK-NEXT: $v7 = VMV1R_V $v13 - ; CHECK-NEXT: $v8 = VMV1R_V $v14 - ; CHECK-NEXT: $v9 = VMV1R_V $v15 + ; CHECK: $v4m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v6m2 = VMV2R_V $v12m2 + ; CHECK-NEXT: $v8m2 = VMV2R_V $v14m2 ; CHECK-NEXT: $v5 = VMV1R_V $v10 ; CHECK-NEXT: $v6 = VMV1R_V $v11 ; CHECK-NEXT: $v7 = VMV1R_V $v12 ; CHECK-NEXT: $v8 = VMV1R_V $v13 ; CHECK-NEXT: $v9 = VMV1R_V $v14 ; CHECK-NEXT: $v10 = VMV1R_V $v15 + ; CHECK-NEXT: $v6m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v8m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v16m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v14m2 = VMV2R_V $v10m2 ; CHECK-NEXT: $v20 = VMV1R_V $v15 ; CHECK-NEXT: $v19 = VMV1R_V $v14 ; CHECK-NEXT: $v18 = VMV1R_V $v13 ; CHECK-NEXT: $v17 = VMV1R_V $v12 ; CHECK-NEXT: $v16 = VMV1R_V $v11 ; CHECK-NEXT: $v15 = VMV1R_V $v10 - ; CHECK-NEXT: $v16 = VMV1R_V $v10 - ; CHECK-NEXT: $v17 = VMV1R_V $v11 - ; CHECK-NEXT: $v18 = VMV1R_V $v12 - ; CHECK-NEXT: $v19 = VMV1R_V $v13 - ; CHECK-NEXT: $v20 = VMV1R_V $v14 - ; CHECK-NEXT: $v21 = VMV1R_V $v15 + ; CHECK-NEXT: $v16m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v18m2 = VMV2R_V $v12m2 + ; CHECK-NEXT: $v20m2 = VMV2R_V $v14m2 $v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15 $v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15 + $v6_v7_v8_v9_v10_v11 = COPY $v10_v11_v12_v13_v14_v15 + $v14_v15_v16_v17_v18_v19 = COPY $v10_v11_v12_v13_v14_v15 $v15_v16_v17_v18_v19_v20 = COPY $v10_v11_v12_v13_v14_v15 $v16_v17_v18_v19_v20_v21 = COPY $v10_v11_v12_v13_v14_v15 ... @@ -212,20 +197,17 @@ body: | ; CHECK-NEXT: $v7 = VMV1R_V $v14 ; CHECK-NEXT: $v8 = VMV1R_V $v15 ; CHECK-NEXT: $v9 = VMV1R_V $v16 - ; CHECK-NEXT: $v4 = VMV1R_V $v10 - ; CHECK-NEXT: $v5 = VMV1R_V $v11 - ; CHECK-NEXT: $v6 = VMV1R_V $v12 - ; CHECK-NEXT: $v7 = VMV1R_V $v13 - ; CHECK-NEXT: $v8 = VMV1R_V $v14 - ; CHECK-NEXT: $v9 = VMV1R_V $v15 + ; CHECK-NEXT: $v4m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v6m2 = VMV2R_V $v12m2 + ; CHECK-NEXT: $v8m2 = VMV2R_V $v14m2 ; CHECK-NEXT: $v10 = VMV1R_V $v16 + ; CHECK-NEXT: $v20 = VMV1R_V $v16 + ; CHECK-NEXT: $v16m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v14m2 = VMV2R_V $v10m2 ; CHECK-NEXT: $v22 = VMV1R_V $v16 - ; CHECK-NEXT: $v21 = VMV1R_V $v15 - ; CHECK-NEXT: $v20 = VMV1R_V $v14 - ; CHECK-NEXT: $v19 = VMV1R_V $v13 - ; CHECK-NEXT: $v18 = VMV1R_V $v12 - ; CHECK-NEXT: $v17 = VMV1R_V $v11 - ; CHECK-NEXT: $v16 = VMV1R_V $v10 + ; CHECK-NEXT: $v20m2 = VMV2R_V $v14m2 + ; CHECK-NEXT: $v18m2 = VMV2R_V $v12m2 + ; CHECK-NEXT: $v16m2 = VMV2R_V $v10m2 ; CHECK-NEXT: $v17 = VMV1R_V $v10 ; CHECK-NEXT: $v18 = VMV1R_V $v11 ; CHECK-NEXT: $v19 = VMV1R_V $v12 @@ -233,24 +215,28 @@ body: | ; CHECK-NEXT: $v21 = VMV1R_V $v14 ; CHECK-NEXT: $v22 = VMV1R_V $v15 ; CHECK-NEXT: $v23 = VMV1R_V $v16 + ; CHECK-NEXT: $v22 = VMV1R_V $v21 + ; CHECK-NEXT: $v21 = VMV1R_V $v20 + ; CHECK-NEXT: $v20 = VMV1R_V $v19 + ; CHECK-NEXT: $v19 = VMV1R_V $v18 + ; CHECK-NEXT: $v18 = VMV1R_V $v17 + ; CHECK-NEXT: $v17 = VMV1R_V $v16 + ; CHECK-NEXT: $v16 = VMV1R_V $v15 $v3_v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15_v16 $v4_v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15_v16 + $v14_v15_v16_v17_v18_v19_v20 = COPY $v10_v11_v12_v13_v14_v15_v16 $v16_v17_v18_v19_v20_v21_v22 = COPY $v10_v11_v12_v13_v14_v15_v16 $v17_v18_v19_v20_v21_v22_v23 = COPY $v10_v11_v12_v13_v14_v15_v16 + $v16_v17_v18_v19_v20_v21_v22 = COPY $v15_v16_v17_v18_v19_v20_v21 ... --- name: copy_zvlsseg_N8 body: | bb.0: ; CHECK-LABEL: name: copy_zvlsseg_N8 - ; CHECK: $v2 = VMV1R_V $v10 - ; CHECK-NEXT: $v3 = VMV1R_V $v11 - ; CHECK-NEXT: $v4 = VMV1R_V $v12 - ; CHECK-NEXT: $v5 = VMV1R_V $v13 - ; CHECK-NEXT: $v6 = VMV1R_V $v14 - ; CHECK-NEXT: $v7 = VMV1R_V $v15 - ; CHECK-NEXT: $v8 = VMV1R_V $v16 - ; CHECK-NEXT: $v9 = VMV1R_V $v17 + ; CHECK: $v2m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v4m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v8m2 = VMV2R_V $v16m2 ; CHECK-NEXT: $v3 = VMV1R_V $v10 ; CHECK-NEXT: $v4 = VMV1R_V $v11 ; CHECK-NEXT: $v5 = VMV1R_V $v12 @@ -267,16 +253,15 @@ body: | ; CHECK-NEXT: $v19 = VMV1R_V $v12 ; CHECK-NEXT: $v18 = VMV1R_V $v11 ; CHECK-NEXT: $v17 = VMV1R_V $v10 - ; CHECK-NEXT: $v18 = VMV1R_V $v10 - ; CHECK-NEXT: $v19 = VMV1R_V $v11 - ; CHECK-NEXT: $v20 = VMV1R_V $v12 - ; CHECK-NEXT: $v21 = VMV1R_V $v13 - ; CHECK-NEXT: $v22 = VMV1R_V $v14 - ; CHECK-NEXT: $v23 = VMV1R_V $v15 - ; CHECK-NEXT: $v24 = VMV1R_V $v16 - ; CHECK-NEXT: $v25 = VMV1R_V $v17 + ; CHECK-NEXT: $v18m2 = VMV2R_V $v10m2 + ; CHECK-NEXT: $v20m4 = VMV4R_V $v12m4 + ; CHECK-NEXT: $v24m2 = VMV2R_V $v16m2 + ; CHECK-NEXT: $v8m8 = VMV8R_V $v0m8 + ; CHECK-NEXT: $v0m8 = VMV8R_V $v8m8 $v2_v3_v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15_v16_v17 $v3_v4_v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15_v16_v17 $v17_v18_v19_v20_v21_v22_v23_v24 = COPY $v10_v11_v12_v13_v14_v15_v16_v17 $v18_v19_v20_v21_v22_v23_v24_v25 = COPY $v10_v11_v12_v13_v14_v15_v16_v17 + $v8_v9_v10_v11_v12_v13_v14_v15 = COPY $v0_v1_v2_v3_v4_v5_v6_v7 + $v0_v1_v2_v3_v4_v5_v6_v7 = COPY $v8_v9_v10_v11_v12_v13_v14_v15 ... diff --git a/llvm/test/CodeGen/RISCV/stack-offset.ll b/llvm/test/CodeGen/RISCV/stack-offset.ll new file mode 100644 index 0000000000000..cc81fd62eba9d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-offset.ll @@ -0,0 +1,376 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32,RV32I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zba < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32,RV32ZBA +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64,RV64I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+zba < %s \ +; RUN: | FileCheck %s -check-prefixes=RV64,RV64ZBA + +declare void @inspect(...) + +define void @test() { +; RV32I-LABEL: test: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi sp, sp, -2048 +; RV32I-NEXT: addi sp, sp, -1120 +; RV32I-NEXT: .cfi_def_cfa_offset 5200 +; RV32I-NEXT: addi a0, sp, 12 +; RV32I-NEXT: addi a1, sp, 2047 +; RV32I-NEXT: addi a1, a1, 13 +; RV32I-NEXT: lui a2, 1 +; RV32I-NEXT: addi a2, a2, 12 +; RV32I-NEXT: add a2, sp, a2 +; RV32I-NEXT: lui a3, 1 +; RV32I-NEXT: addi a3, a3, 1036 +; RV32I-NEXT: add a3, sp, a3 +; RV32I-NEXT: call inspect +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: addi sp, sp, 1136 +; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: test: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: addi sp, sp, -2032 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: addi sp, sp, -2048 +; RV32ZBA-NEXT: addi sp, sp, -1120 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 5200 +; RV32ZBA-NEXT: addi a0, sp, 12 +; RV32ZBA-NEXT: addi a1, sp, 2047 +; RV32ZBA-NEXT: addi a1, a1, 13 +; RV32ZBA-NEXT: li a2, 1027 +; RV32ZBA-NEXT: sh2add a2, a2, sp +; RV32ZBA-NEXT: li a3, 1283 +; RV32ZBA-NEXT: sh2add a3, a3, sp +; RV32ZBA-NEXT: call inspect +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: addi sp, sp, 1136 +; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: ret +; +; RV64I-LABEL: test: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi sp, sp, -2048 +; RV64I-NEXT: addi sp, sp, -1120 +; RV64I-NEXT: .cfi_def_cfa_offset 5200 +; RV64I-NEXT: addi a0, sp, 8 +; RV64I-NEXT: addi a1, sp, 2047 +; RV64I-NEXT: addi a1, a1, 9 +; RV64I-NEXT: lui a2, 1 +; RV64I-NEXT: addiw a2, a2, 8 +; RV64I-NEXT: add a2, sp, a2 +; RV64I-NEXT: lui a3, 1 +; RV64I-NEXT: addiw a3, a3, 1032 +; RV64I-NEXT: add a3, sp, a3 +; RV64I-NEXT: call inspect +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: addi sp, sp, 1136 +; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: test: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: addi sp, sp, -2032 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64ZBA-NEXT: .cfi_offset ra, -8 +; RV64ZBA-NEXT: addi sp, sp, -2048 +; RV64ZBA-NEXT: addi sp, sp, -1120 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 5200 +; RV64ZBA-NEXT: addi a0, sp, 8 +; RV64ZBA-NEXT: addi a1, sp, 2047 +; RV64ZBA-NEXT: addi a1, a1, 9 +; RV64ZBA-NEXT: li a2, 513 +; RV64ZBA-NEXT: sh3add a2, a2, sp +; RV64ZBA-NEXT: li a3, 641 +; RV64ZBA-NEXT: sh3add a3, a3, sp +; RV64ZBA-NEXT: call inspect +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: addi sp, sp, 1136 +; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: ret + %p4 = alloca [64 x i8], align 1 + %p3 = alloca [1024 x i8], align 1 + %p2 = alloca [2048 x i8], align 1 + %p1 = alloca [2048 x i8], align 1 + call void (...) @inspect(ptr %p1, ptr %p2, ptr %p3, ptr %p4) + ret void +} + +define void @align_8() { +; RV32I-LABEL: align_8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi sp, sp, -2048 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 4112 +; RV32I-NEXT: addi a0, sp, 7 +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: addi a1, a1, 8 +; RV32I-NEXT: add a1, sp, a1 +; RV32I-NEXT: call inspect +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: align_8: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: addi sp, sp, -2032 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: addi sp, sp, -2048 +; RV32ZBA-NEXT: addi sp, sp, -32 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 4112 +; RV32ZBA-NEXT: addi a0, sp, 7 +; RV32ZBA-NEXT: li a1, 513 +; RV32ZBA-NEXT: sh3add a1, a1, sp +; RV32ZBA-NEXT: call inspect +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: addi sp, sp, 48 +; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: ret +; +; RV64I-LABEL: align_8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi sp, sp, -2048 +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 4128 +; RV64I-NEXT: addi a0, sp, 15 +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, 16 +; RV64I-NEXT: add a1, sp, a1 +; RV64I-NEXT: call inspect +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: align_8: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: addi sp, sp, -2032 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64ZBA-NEXT: .cfi_offset ra, -8 +; RV64ZBA-NEXT: addi sp, sp, -2048 +; RV64ZBA-NEXT: addi sp, sp, -48 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 4128 +; RV64ZBA-NEXT: addi a0, sp, 15 +; RV64ZBA-NEXT: li a1, 514 +; RV64ZBA-NEXT: sh3add a1, a1, sp +; RV64ZBA-NEXT: call inspect +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: addi sp, sp, 64 +; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: ret + %p2 = alloca i8, align 8 + %p1 = alloca [4097 x i8], align 1 + call void (...) @inspect(ptr %p1, ptr %p2) + ret void +} + +define void @align_4() { +; RV32I-LABEL: align_4: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -2032 +; RV32I-NEXT: .cfi_def_cfa_offset 2032 +; RV32I-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: addi sp, sp, -2048 +; RV32I-NEXT: addi sp, sp, -32 +; RV32I-NEXT: .cfi_def_cfa_offset 4112 +; RV32I-NEXT: addi a0, sp, 7 +; RV32I-NEXT: lui a1, 1 +; RV32I-NEXT: addi a1, a1, 8 +; RV32I-NEXT: add a1, sp, a1 +; RV32I-NEXT: call inspect +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: addi sp, sp, 48 +; RV32I-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 2032 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: align_4: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: addi sp, sp, -2032 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV32ZBA-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset ra, -4 +; RV32ZBA-NEXT: addi sp, sp, -2048 +; RV32ZBA-NEXT: addi sp, sp, -32 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 4112 +; RV32ZBA-NEXT: addi a0, sp, 7 +; RV32ZBA-NEXT: li a1, 513 +; RV32ZBA-NEXT: sh3add a1, a1, sp +; RV32ZBA-NEXT: call inspect +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: addi sp, sp, 48 +; RV32ZBA-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 2032 +; RV32ZBA-NEXT: ret +; +; RV64I-LABEL: align_4: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -2032 +; RV64I-NEXT: .cfi_def_cfa_offset 2032 +; RV64I-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: addi sp, sp, -2048 +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: .cfi_def_cfa_offset 4128 +; RV64I-NEXT: addi a0, sp, 19 +; RV64I-NEXT: lui a1, 1 +; RV64I-NEXT: addiw a1, a1, 20 +; RV64I-NEXT: add a1, sp, a1 +; RV64I-NEXT: call inspect +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 2032 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: align_4: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: addi sp, sp, -2032 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 2032 +; RV64ZBA-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64ZBA-NEXT: .cfi_offset ra, -8 +; RV64ZBA-NEXT: addi sp, sp, -2048 +; RV64ZBA-NEXT: addi sp, sp, -48 +; RV64ZBA-NEXT: .cfi_def_cfa_offset 4128 +; RV64ZBA-NEXT: addi a0, sp, 19 +; RV64ZBA-NEXT: li a1, 1029 +; RV64ZBA-NEXT: sh2add a1, a1, sp +; RV64ZBA-NEXT: call inspect +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: addi sp, sp, 64 +; RV64ZBA-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64ZBA-NEXT: addi sp, sp, 2032 +; RV64ZBA-NEXT: ret + %p2 = alloca i8, align 4 + %p1 = alloca [4097 x i8], align 1 + call void (...) @inspect(ptr %p1, ptr %p2) + ret void +} + +define void @align_2() { +; RV32-LABEL: align_2: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -2032 +; RV32-NEXT: .cfi_def_cfa_offset 2032 +; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: addi sp, sp, -2048 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 4112 +; RV32-NEXT: addi a0, sp, 9 +; RV32-NEXT: lui a1, 1 +; RV32-NEXT: addi a1, a1, 10 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: call inspect +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: ret +; +; RV64-LABEL: align_2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -2032 +; RV64-NEXT: .cfi_def_cfa_offset 2032 +; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: addi sp, sp, -2048 +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: .cfi_def_cfa_offset 4128 +; RV64-NEXT: addi a0, sp, 21 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: addiw a1, a1, 22 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: call inspect +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: ret + %p2 = alloca i8, align 2 + %p1 = alloca [4097 x i8], align 1 + call void (...) @inspect(ptr %p1, ptr %p2) + ret void +} + + +define void @align_1() { +; RV32-LABEL: align_1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -2032 +; RV32-NEXT: .cfi_def_cfa_offset 2032 +; RV32-NEXT: sw ra, 2028(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: addi sp, sp, -2048 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 4112 +; RV32-NEXT: addi a0, sp, 10 +; RV32-NEXT: lui a1, 1 +; RV32-NEXT: addi a1, a1, 11 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: call inspect +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: lw ra, 2028(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 2032 +; RV32-NEXT: ret +; +; RV64-LABEL: align_1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -2032 +; RV64-NEXT: .cfi_def_cfa_offset 2032 +; RV64-NEXT: sd ra, 2024(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: addi sp, sp, -2048 +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: .cfi_def_cfa_offset 4128 +; RV64-NEXT: addi a0, sp, 22 +; RV64-NEXT: lui a1, 1 +; RV64-NEXT: addiw a1, a1, 23 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: call inspect +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ld ra, 2024(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 2032 +; RV64-NEXT: ret + %p2 = alloca i8, align 1 + %p1 = alloca [4097 x i8], align 1 + call void (...) @inspect(ptr %p1, ptr %p2) + ret void +} diff --git a/llvm/test/CodeGen/SPARC/inlineasm-bad.ll b/llvm/test/CodeGen/SPARC/inlineasm-bad.ll index 5bf2adbeb75c9..07eb67df6e5f7 100644 --- a/llvm/test/CodeGen/SPARC/inlineasm-bad.ll +++ b/llvm/test/CodeGen/SPARC/inlineasm-bad.ll @@ -11,3 +11,12 @@ entry: tail call void asm sideeffect "faddq $0,$1,$2", "{f38},{f0},{f0}"(fp128 0xL0, fp128 0xL0, fp128 0xL0) ret void } + +; CHECK-label:test_twinword_error +; CHECK: error: Hi part of pair should point to an even-numbered register +; CHECK: error: (note that in some cases it might be necessary to manually bind the input/output registers instead of relying on automatic allocation) + +define i64 @test_twinword_error(){ + %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i1}"() + ret i64 %1 +} diff --git a/llvm/test/CodeGen/SPARC/inlineasm.ll b/llvm/test/CodeGen/SPARC/inlineasm.ll index ec27598e5e83b..9817d7c6971f5 100644 --- a/llvm/test/CodeGen/SPARC/inlineasm.ll +++ b/llvm/test/CodeGen/SPARC/inlineasm.ll @@ -143,3 +143,12 @@ entry: %1 = call double asm sideeffect "faddd $1, $2, $0", "=f,f,e"(i64 0, i64 0) ret void } + +; CHECK-label:test_twinword +; CHECK: rd %asr5, %i1 +; CHECK: srlx %i1, 32, %i0 + +define i64 @test_twinword(){ + %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i0}"() + ret i64 %1 +} diff --git a/llvm/test/CodeGen/SPIRV/branching/OpSwitchUnreachable.ll b/llvm/test/CodeGen/SPIRV/branching/OpSwitchUnreachable.ll index e73efbeade70d..6eb36e5756ecf 100644 --- a/llvm/test/CodeGen/SPIRV/branching/OpSwitchUnreachable.ll +++ b/llvm/test/CodeGen/SPIRV/branching/OpSwitchUnreachable.ll @@ -1,8 +1,9 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} define void @test_switch_with_unreachable_block(i1 %a) { %value = zext i1 %a to i32 -; CHECK-SPIRV: OpSwitch %[[#]] %[[#REACHABLE:]] +; CHECK-SPIRV: OpSwitch %[[#]] %[[#UNREACHABLE:]] 0 %[[#REACHABLE:]] 1 %[[#REACHABLE:]] switch i32 %value, label %unreachable [ i32 0, label %reachable i32 1, label %reachable @@ -13,7 +14,7 @@ reachable: ; CHECK-SPIRV-NEXT: OpReturn ret void -; CHECK-SPIRV: %[[#]] = OpLabel +; CHECK-SPIRV: %[[#UNREACHABLE]] = OpLabel ; CHECK-SPIRV-NEXT: OpUnreachable unreachable: unreachable diff --git a/llvm/test/CodeGen/SPIRV/branching/switch-range-check.ll b/llvm/test/CodeGen/SPIRV/branching/switch-range-check.ll new file mode 100644 index 0000000000000..85a4d4db089cb --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/branching/switch-range-check.ll @@ -0,0 +1,73 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#Var:]] = OpPhi +; CHECK: OpSwitch %[[#Var]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] [[#]] %[[#]] +; CHECK-COUNT-11: OpBranch +; CHECK-NOT: OpBranch + +define spir_func void @foo(i64 noundef %addr, i64 noundef %as) { +entry: + %src = inttoptr i64 %as to ptr addrspace(4) + %val = load i8, ptr addrspace(4) %src + %cmp = icmp sgt i8 %val, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %add.ptr = getelementptr inbounds i8, ptr addrspace(4) %src, i64 1 + %cond = load i8, ptr addrspace(4) %add.ptr + br label %if.end + +if.end: + %swval = phi i8 [ %cond, %if.then ], [ %val, %entry ] + switch i8 %swval, label %sw.default [ + i8 -127, label %sw.epilog + i8 -126, label %sw.bb3 + i8 -125, label %sw.bb4 + i8 -111, label %sw.bb5 + i8 -110, label %sw.bb6 + i8 -109, label %sw.bb7 + i8 -15, label %sw.bb8 + i8 -14, label %sw.bb8 + i8 -13, label %sw.bb8 + i8 -124, label %sw.bb9 + i8 -95, label %sw.bb10 + i8 -123, label %sw.bb11 + ] + +sw.bb3: + br label %sw.epilog + +sw.bb4: + br label %sw.epilog + +sw.bb5: + br label %sw.epilog + +sw.bb6: + br label %sw.epilog + +sw.bb7: + br label %sw.epilog + +sw.bb8: + br label %sw.epilog + +sw.bb9: + br label %sw.epilog + +sw.bb10: + br label %sw.epilog + +sw.bb11: + br label %sw.epilog + +sw.default: + br label %sw.epilog + +sw.epilog: + br label %exit + +exit: + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmax.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmax.ll index 48e916581f9ff..159d4ac19c8cc 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmax.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmax.ll @@ -1,25 +1,24 @@ ; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; TODO: This need to be NMax: See https://github.com/llvm/llvm-project/issues/87072 ; CHECK: OpExtInstImport "GLSL.std.450" define noundef half @test_fmax_half(half noundef %a, half noundef %b) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FMax %[[#]] %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] NMax %[[#]] %[[#]] %0 = call half @llvm.maxnum.f16(half %a, half %b) ret half %0 } define noundef float @test_fmax_float(float noundef %a, float noundef %b) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FMax %[[#]] %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] NMax %[[#]] %[[#]] %0 = call float @llvm.maxnum.f32(float %a, float %b) ret float %0 } define noundef double @test_fmax_double(double noundef %a, double noundef %b) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FMax %[[#]] %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] NMax %[[#]] %[[#]] %0 = call double @llvm.maxnum.f64(double %a, double %b) ret double %0 } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmin.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmin.ll index 5bfd69c972a3f..15946b5038eec 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmin.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmin.ll @@ -1,27 +1,26 @@ ; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; TODO: This need to be NMin: See https://github.com/llvm/llvm-project/issues/87072 ; CHECK: OpExtInstImport "GLSL.std.450" ; CHECK: OpMemoryModel Logical GLSL450 define noundef half @test_fmax_half(half noundef %a, half noundef %b) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FMin %[[#]] %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] NMin %[[#]] %[[#]] %0 = call half @llvm.minnum.f16(half %a, half %b) ret half %0 } define noundef float @test_fmax_float(float noundef %a, float noundef %b) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FMin %[[#]] %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] NMin %[[#]] %[[#]] %0 = call float @llvm.minnum.f32(float %a, float %b) ret float %0 } define noundef double @test_fmax_double(double noundef %a, double noundef %b) { entry: -; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] FMin %[[#]] %[[#]] +; CHECK: %[[#]] = OpExtInst %[[#]] %[[#]] NMin %[[#]] %[[#]] %0 = call double @llvm.minnum.f64(double %a, double %b) ret double %0 } diff --git a/llvm/test/CodeGen/Thumb/pr35836.ll b/llvm/test/CodeGen/Thumb/pr35836.ll index 96a6fe5d14202..ba33a8184bcc7 100644 --- a/llvm/test/CodeGen/Thumb/pr35836.ll +++ b/llvm/test/CodeGen/Thumb/pr35836.ll @@ -35,18 +35,18 @@ while.body: br label %while.body } ; CHECK: adds r3, r0, r1 -; CHECK: push {r5} -; CHECK: pop {r1} +; CHECK: mov r12, r5 +; CHECK: mov r1, r12 ; CHECK: adcs r1, r5 ; CHECK: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK: ldr r2, [sp, #8] @ 4-byte Reload ; CHECK: adds r2, r0, r2 -; CHECK: push {r5} -; CHECK: pop {r4} +; CHECK: mov r12, r5 +; CHECK: mov r4, r12 ; CHECK: adcs r4, r5 ; CHECK: adds r0, r2, r5 -; CHECK: push {r3} -; CHECK: pop {r0} +; CHECK: mov r12, r3 +; CHECK: mov r0, r12 ; CHECK: adcs r0, r4 ; CHECK: ldr r6, [sp, #4] @ 4-byte Reload ; CHECK: str r0, [r6] diff --git a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll index aa5deb6542b2b..61a741445b81c 100644 --- a/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/Thumb/urem-seteq-illegal-types.ll @@ -122,8 +122,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: cmp r0, #170 -; CHECK-NEXT: push {r3} -; CHECK-NEXT: pop {r0} +; CHECK-NEXT: mov r12, r3 +; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: bhi .LBB4_2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: movs r0, r4 @@ -134,8 +134,8 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; CHECK-NEXT: movs r1, #73 ; CHECK-NEXT: lsls r1, r1, #23 ; CHECK-NEXT: cmp r5, r1 -; CHECK-NEXT: push {r3} -; CHECK-NEXT: pop {r1} +; CHECK-NEXT: mov r12, r3 +; CHECK-NEXT: mov r1, r12 ; CHECK-NEXT: bhi .LBB4_4 ; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: movs r1, r4 diff --git a/llvm/test/CodeGen/WebAssembly/multi-return.ll b/llvm/test/CodeGen/WebAssembly/multi-return.ll index 3429cd512a46e..293a1b35c39c6 100644 --- a/llvm/test/CodeGen/WebAssembly/multi-return.ll +++ b/llvm/test/CodeGen/WebAssembly/multi-return.ll @@ -78,18 +78,16 @@ define i64 @test4() { define { i64, i128 } @test5() { ; CHECK-LABEL: test5: ; CHECK: call return_multi_multi -; CHECK: i32.const $push8=, 8 -; CHECK: i32.add $push9=, $[[SP:[0-9]+]], $pop8 -; CHECK: i32.const $push0=, 16 -; CHECK: i32.add $push1=, $pop9, $pop0 +; CHECK: i32.const $push0=, 24 +; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) ; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]]) ; CHECK: i64.load $push2=, 16($[[SP]]) ; CHECK: i64.store 8($0), $pop2 +; CHECK: i64.store 16($0), $[[L1]] ; CHECK: i64.store 0($0), $[[L2]] -; CHECK: i32.const $push12=, 16 -; CHECK: i32.add $push3=, $0, $pop12 -; CHECK: i64.store 0($pop3), $[[L1]] +; CHECK: i32.const $push5=, 80 +; CHECK: i32.add $push6=, $3, $pop5 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 @@ -101,20 +99,20 @@ define { i64, i128 } @test5() { define { i128, i128 } @test6() { ; CHECK-LABEL: test6: ; CHECK: call return_multi_multi -; CHECK: i32.const $push0=, 64 +; CHECK: i32.const $push0=, 24 ; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) -; CHECK: i32.const $push2=, 24 +; CHECK: i32.const $push2=, 64 ; CHECK: i32.add $push3=, $[[SP]], $pop2 ; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) ; CHECK: i64.load $[[L3:[0-9]+]]=, 16($[[SP]]) ; CHECK: i64.load $push4=, 56($[[SP]]) ; CHECK: i64.store 16($0), $pop4 +; CHECK: i64.store 24($0), $[[L2]] ; CHECK: i64.store 0($0), $[[L3]] -; CHECK: i64.store 8($0), $[[L2]] -; CHECK: i32.const $push5=, 24 -; CHECK: i32.add $push6=, $0, $pop5 -; CHECK: i64.store 0($pop6), $[[L1]] +; CHECK: i64.store 8($0), $[[L1]] +; CHECK: i32.const $push7=, 80 +; CHECK: i32.add $push8=, $4, $pop7 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 %r3 = extractvalue { i64, i128, i192, i128, i64 } %t0, 3 @@ -129,19 +127,17 @@ define { i64, i192 } @test7() { ; CHECK: i32.const $push0=, 40 ; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) +; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]]) +; CHECK: i64.load $[[L3:[0-9]+]]=, 32($[[SP]]) ; CHECK: i32.const $push2=, 48 ; CHECK: i32.add $push3=, $[[SP]], $pop2 -; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) -; CHECK: i64.load $[[L3:[0-9]+]]=, 8($[[SP]]) -; CHECK: i64.load $push4=, 32($[[SP]]) -; CHECK: i64.store 8($0), $pop4 -; CHECK: i64.store 0($0), $[[L3]] -; CHECK: i32.const $push5=, 24 -; CHECK: i32.add $push6=, $0, $pop5 -; CHECK: i64.store 0($pop6), $[[L2]] -; CHECK: i32.const $push7=, 16 -; CHECK: i32.add $push8=, $0, $pop7 -; CHECK: i64.store 0($pop8), $[[L1]] +; CHECK: i64.load $push4=, 0($pop3) +; CHECK: i64.store 24($0), $pop4 +; CHECK: i64.store 8($0), $[[L3]] +; CHECK: i64.store 16($0), $[[L1]] +; CHECK: i64.store 0($0), $[[L2]] +; CHECK: i32.const $push7=, 80 +; CHECK: i32.add $push8=, $4, $pop7 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r2 = extractvalue { i64, i128, i192, i128, i64 } %t0, 2 @@ -153,18 +149,16 @@ define { i64, i192 } @test7() { define { i128, i192, i128, i64 } @test8() { ; CHECK-LABEL: test8: ; CHECK: call return_multi_multi -; CHECK: i32.const $push18=, 8 -; CHECK: i32.add $push19=, $[[SP:[0-9]+]], $pop18 -; CHECK: i32.const $push0=, 32 -; CHECK: i32.add $push1=, $pop19, $pop0 +; CHECK: i32.const $push0=, 64 +; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) -; CHECK: i32.const $push2=, 48 +; CHECK: i32.const $push2=, 40 ; CHECK: i32.add $push3=, $[[SP]], $pop2 ; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) -; CHECK: i32.const $push4=, 24 +; CHECK: i32.const $push4=, 48 ; CHECK: i32.add $push5=, $[[SP]], $pop4 ; CHECK: i64.load $[[L3:[0-9]+]]=, 0($pop5) -; CHECK: i32.const $push6=, 64 +; CHECK: i32.const $push6=, 24 ; CHECK: i32.add $push7=, $[[SP]], $pop6 ; CHECK: i64.load $[[L4:[0-9]+]]=, 0($pop7) ; CHECK: i64.load $[[L5:[0-9]+]]=, 8($[[SP]]) @@ -172,19 +166,15 @@ define { i128, i192, i128, i64 } @test8() { ; CHECK: i64.load $[[L7:[0-9]+]]=, 32($[[SP]]) ; CHECK: i64.load $push8=, 16($[[SP]]) ; CHECK: i64.store 40($0), $pop8 +; CHECK: i64.store 48($0), $[[L4]] +; CHECK: i64.store 32($0), $[[L3]] ; CHECK: i64.store 16($0), $[[L7]] +; CHECK: i64.store 24($0), $[[L2]] ; CHECK: i64.store 0($0), $[[L6]] -; CHECK: i64.store 8($0), $[[L4]] +; CHECK: i64.store 8($0), $[[L1]] ; CHECK: i64.store 56($0), $[[L5]] -; CHECK: i32.const $push9=, 48 -; CHECK: i32.add $push10=, $0, $pop9 -; CHECK: i64.store 0($pop10), $[[L3]] -; CHECK: i32.const $push22=, 32 -; CHECK: i32.add $push11=, $0, $pop22 -; CHECK: i64.store 0($pop11), $[[L2]] -; CHECK: i32.const $push12=, 24 -; CHECK: i32.add $push13=, $0, $pop12 -; CHECK: i64.store 0($pop13), $[[L1]] +; CHECK: i32.const $push11=, 80 +; CHECK: i32.add $push12=, $8, $pop11 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index 3a806b955f853..761a75418a00f 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -31,60 +31,38 @@ define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: add_v16i8: ; NO-SIMD128: .functype add_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.add $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.add $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.add $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.add $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.add $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.add $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.add $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.add $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.add $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.add $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.add $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.add $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.add $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.add $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.add $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.add $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.add $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.add $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.add $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.add $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v16i8: @@ -96,54 +74,32 @@ define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.add $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y ret <16 x i8> %a @@ -165,60 +121,38 @@ define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: sub_v16i8: ; NO-SIMD128: .functype sub_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.sub $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.sub $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.sub $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.sub $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.sub $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.sub $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.sub $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.sub $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.sub $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.sub $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.sub $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.sub $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.sub $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.sub $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.sub $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.sub $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.sub $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.sub $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.sub $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.sub $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.sub $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.sub $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.sub $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.sub $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.sub $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.sub $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v16i8: @@ -230,54 +164,32 @@ define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.sub $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.sub $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.sub $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.sub $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.sub $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.sub $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> %x, %y ret <16 x i8> %a @@ -425,60 +337,38 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: mul_v16i8: ; NO-SIMD128: .functype mul_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.mul $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.mul $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.mul $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.mul $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.mul $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.mul $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.mul $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.mul $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.mul $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.mul $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.mul $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.mul $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.mul $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.mul $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.mul $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.mul $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.mul $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.mul $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.mul $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.mul $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.mul $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.mul $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.mul $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.mul $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.mul $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.mul $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v16i8: @@ -490,54 +380,32 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.mul $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.mul $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.mul $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.mul $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.mul $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = mul <16 x i8> %x, %y ret <16 x i8> %a @@ -559,108 +427,86 @@ define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: min_s_v16i8: ; NO-SIMD128: .functype min_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31 -; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 13 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $31 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $15, $31, $pop6 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop7 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $30 +; NO-SIMD128-NEXT: i32.lt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $14, $30, $pop10 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $29 ; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push22=, 12 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29 -; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push28=, 11 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28 +; NO-SIMD128-NEXT: i32.select $push15=, $13, $29, $pop14 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $28 +; NO-SIMD128-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $12, $28, $pop18 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop19 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $27 +; NO-SIMD128-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $11, $27, $pop22 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop23 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push24=, $26 ; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24 -; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push34=, 10 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27 -; NO-SIMD128-NEXT: i32.lt_s $push32=, $pop31, $pop30 -; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push40=, 9 -; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-NEXT: i32.select $push27=, $10, $26, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $25 +; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.select $push31=, $9, $25, $pop30 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop31 +; NO-SIMD128-NEXT: i32.extend8_s $push33=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $24 +; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.select $push35=, $8, $24, $pop34 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop35 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $23 ; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25 -; NO-SIMD128-NEXT: i32.lt_s $push44=, $pop43, $pop42 -; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-NEXT: i32.const $push50=, 7 -; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24 -; NO-SIMD128-NEXT: i32.lt_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 6 -; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23 +; NO-SIMD128-NEXT: i32.select $push39=, $7, $23, $pop38 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $22 +; NO-SIMD128-NEXT: i32.lt_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.select $push43=, $6, $22, $pop42 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop43 +; NO-SIMD128-NEXT: i32.extend8_s $push45=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $21 +; NO-SIMD128-NEXT: i32.lt_s $push46=, $pop45, $pop44 +; NO-SIMD128-NEXT: i32.select $push47=, $5, $21, $pop46 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop47 +; NO-SIMD128-NEXT: i32.extend8_s $push49=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push48=, $20 +; NO-SIMD128-NEXT: i32.lt_s $push50=, $pop49, $pop48 +; NO-SIMD128-NEXT: i32.select $push51=, $4, $20, $pop50 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop51 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $19 ; NO-SIMD128-NEXT: i32.lt_s $push54=, $pop53, $pop52 -; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54 -; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55 -; NO-SIMD128-NEXT: i32.const $push62=, 5 -; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62 -; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22 -; NO-SIMD128-NEXT: i32.lt_s $push60=, $pop59, $pop58 -; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60 -; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61 -; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21 -; NO-SIMD128-NEXT: i32.lt_s $push66=, $pop65, $pop64 -; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop67 -; NO-SIMD128-NEXT: i32.const $push72=, 3 -; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20 -; NO-SIMD128-NEXT: i32.lt_s $push70=, $pop69, $pop68 -; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70 -; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19 -; NO-SIMD128-NEXT: i32.lt_s $push76=, $pop75, $pop74 -; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop77 -; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18 -; NO-SIMD128-NEXT: i32.lt_s $push80=, $pop79, $pop78 -; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop81 -; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17 -; NO-SIMD128-NEXT: i32.lt_s $push84=, $pop83, $pop82 -; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop85 +; NO-SIMD128-NEXT: i32.select $push55=, $3, $19, $pop54 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop55 +; NO-SIMD128-NEXT: i32.extend8_s $push57=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push56=, $18 +; NO-SIMD128-NEXT: i32.lt_s $push58=, $pop57, $pop56 +; NO-SIMD128-NEXT: i32.select $push59=, $2, $18, $pop58 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop59 +; NO-SIMD128-NEXT: i32.extend8_s $push61=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push60=, $17 +; NO-SIMD128-NEXT: i32.lt_s $push62=, $pop61, $pop60 +; NO-SIMD128-NEXT: i32.select $push63=, $1, $17, $pop62 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop63 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v16i8: @@ -681,93 +527,71 @@ define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $21 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $22 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $24 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $24, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $25 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push34=, $pop33, $pop32 +; NO-SIMD128-FAST-NEXT: i32.select $push35=, $9, $25, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push38=, $pop37, $pop36 +; NO-SIMD128-FAST-NEXT: i32.select $push39=, $10, $26, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $27 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push42=, $pop41, $pop40 -; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26 +; NO-SIMD128-FAST-NEXT: i32.select $push43=, $11, $27, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop43 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $28 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push46=, $pop45, $pop44 -; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push52=, $pop51, $pop50 -; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28 +; NO-SIMD128-FAST-NEXT: i32.select $push47=, $12, $28, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop47 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push48=, $29 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push50=, $pop49, $pop48 +; NO-SIMD128-FAST-NEXT: i32.select $push51=, $13, $29, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop51 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push52=, $30 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push54=, $pop53, $pop52 +; NO-SIMD128-FAST-NEXT: i32.select $push55=, $14, $30, $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop55 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $31 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push64=, $pop63, $pop62 -; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push76=, $pop75, $pop74 -; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push82=, $pop81, $pop80 -; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83 +; NO-SIMD128-FAST-NEXT: i32.select $push59=, $15, $31, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop59 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push61=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $32 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push62=, $pop61, $pop60 +; NO-SIMD128-FAST-NEXT: i32.select $push63=, $16, $32, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop63 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -790,140 +614,118 @@ define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: min_u_v16i8: ; NO-SIMD128: .functype min_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 255 ; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 -; NO-SIMD128-NEXT: i32.const $push117=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop95 ; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push116=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115 -; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push114=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113 -; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push112=, 255 -; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112 -; NO-SIMD128-NEXT: i32.const $push111=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111 -; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push110=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 255 -; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109 -; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.const $push108=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107 -; NO-SIMD128-NEXT: i32.lt_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push106=, 255 -; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106 -; NO-SIMD128-NEXT: i32.const $push105=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105 -; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push104=, 255 -; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103 -; NO-SIMD128-NEXT: i32.lt_u $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.const $push102=, 255 -; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101 -; NO-SIMD128-NEXT: i32.lt_u $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99 -; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97 -; NO-SIMD128-NEXT: i32.lt_u $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95 -; NO-SIMD128-NEXT: i32.lt_u $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94 +; NO-SIMD128-NEXT: i32.and $push6=, $15, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93 -; NO-SIMD128-NEXT: i32.lt_u $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.and $push5=, $31, $pop93 +; NO-SIMD128-NEXT: i32.lt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $15, $31, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92 +; NO-SIMD128-NEXT: i32.and $push10=, $14, $pop92 ; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91 -; NO-SIMD128-NEXT: i32.lt_u $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.and $push9=, $30, $pop91 +; NO-SIMD128-NEXT: i32.lt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $14, $30, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90 +; NO-SIMD128-NEXT: i32.and $push14=, $13, $pop90 ; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89 -; NO-SIMD128-NEXT: i32.lt_u $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.and $push13=, $29, $pop89 +; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $13, $29, $pop15 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 ; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88 +; NO-SIMD128-NEXT: i32.and $push18=, $12, $pop88 ; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87 -; NO-SIMD128-NEXT: i32.lt_u $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.and $push17=, $28, $pop87 +; NO-SIMD128-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $12, $28, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push86=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $11, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $27, $pop85 +; NO-SIMD128-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $11, $27, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $10, $pop84 +; NO-SIMD128-NEXT: i32.const $push83=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $26, $pop83 +; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $10, $26, $pop27 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push82=, 255 +; NO-SIMD128-NEXT: i32.and $push30=, $9, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $25, $pop81 +; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $9, $25, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.const $push80=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $8, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 255 +; NO-SIMD128-NEXT: i32.and $push33=, $24, $pop79 +; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.select $push36=, $8, $24, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push78=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $7, $pop78 +; NO-SIMD128-NEXT: i32.const $push77=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $23, $pop77 +; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $7, $23, $pop39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.const $push76=, 255 +; NO-SIMD128-NEXT: i32.and $push42=, $6, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $22, $pop75 +; NO-SIMD128-NEXT: i32.lt_u $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.select $push44=, $6, $22, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.const $push74=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $5, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 255 +; NO-SIMD128-NEXT: i32.and $push45=, $21, $pop73 +; NO-SIMD128-NEXT: i32.lt_u $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.select $push48=, $5, $21, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push50=, $4, $pop72 +; NO-SIMD128-NEXT: i32.const $push71=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $20, $pop71 +; NO-SIMD128-NEXT: i32.lt_u $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.select $push52=, $4, $20, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push54=, $3, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop69 +; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.select $push56=, $3, $19, $pop55 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push58=, $2, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push57=, $18, $pop67 +; NO-SIMD128-NEXT: i32.lt_u $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.select $push60=, $2, $18, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push62=, $1, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push61=, $17, $pop65 +; NO-SIMD128-NEXT: i32.lt_u $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.select $push64=, $1, $17, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v16i8: @@ -931,138 +733,116 @@ define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop95 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop93 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop91 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop89 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106 -; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $21, $pop87 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $21, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $22, $pop85 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $22, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop83 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $24, $pop81 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $24, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop79 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.select $push36=, $9, $25, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $10, $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $26, $pop77 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push40=, $10, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop75 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99 +; NO-SIMD128-FAST-NEXT: i32.select $push44=, $11, $27, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $12, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $28, $pop73 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95 +; NO-SIMD128-FAST-NEXT: i32.select $push48=, $12, $28, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $13, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $29, $pop71 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.select $push52=, $13, $29, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push54=, $14, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $30, $pop69 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.select $push56=, $14, $30, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $15, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $31, $pop67 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.select $push60=, $15, $31, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $16, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $32, $pop65 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.select $push64=, $16, $32, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1085,108 +865,86 @@ define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: max_s_v16i8: ; NO-SIMD128: .functype max_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31 -; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 13 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $31 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $15, $31, $pop6 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop7 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $30 +; NO-SIMD128-NEXT: i32.gt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $14, $30, $pop10 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $29 ; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push22=, 12 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29 -; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push28=, 11 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28 +; NO-SIMD128-NEXT: i32.select $push15=, $13, $29, $pop14 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $28 +; NO-SIMD128-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $12, $28, $pop18 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop19 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $27 +; NO-SIMD128-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $11, $27, $pop22 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop23 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push24=, $26 ; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24 -; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push34=, 10 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27 -; NO-SIMD128-NEXT: i32.gt_s $push32=, $pop31, $pop30 -; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push40=, 9 -; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-NEXT: i32.select $push27=, $10, $26, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $25 +; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.select $push31=, $9, $25, $pop30 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop31 +; NO-SIMD128-NEXT: i32.extend8_s $push33=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $24 +; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.select $push35=, $8, $24, $pop34 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop35 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $23 ; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25 -; NO-SIMD128-NEXT: i32.gt_s $push44=, $pop43, $pop42 -; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-NEXT: i32.const $push50=, 7 -; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24 -; NO-SIMD128-NEXT: i32.gt_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 6 -; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23 +; NO-SIMD128-NEXT: i32.select $push39=, $7, $23, $pop38 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $22 +; NO-SIMD128-NEXT: i32.gt_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.select $push43=, $6, $22, $pop42 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop43 +; NO-SIMD128-NEXT: i32.extend8_s $push45=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $21 +; NO-SIMD128-NEXT: i32.gt_s $push46=, $pop45, $pop44 +; NO-SIMD128-NEXT: i32.select $push47=, $5, $21, $pop46 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop47 +; NO-SIMD128-NEXT: i32.extend8_s $push49=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push48=, $20 +; NO-SIMD128-NEXT: i32.gt_s $push50=, $pop49, $pop48 +; NO-SIMD128-NEXT: i32.select $push51=, $4, $20, $pop50 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop51 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $19 ; NO-SIMD128-NEXT: i32.gt_s $push54=, $pop53, $pop52 -; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54 -; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55 -; NO-SIMD128-NEXT: i32.const $push62=, 5 -; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62 -; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22 -; NO-SIMD128-NEXT: i32.gt_s $push60=, $pop59, $pop58 -; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60 -; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61 -; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21 -; NO-SIMD128-NEXT: i32.gt_s $push66=, $pop65, $pop64 -; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop67 -; NO-SIMD128-NEXT: i32.const $push72=, 3 -; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20 -; NO-SIMD128-NEXT: i32.gt_s $push70=, $pop69, $pop68 -; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70 -; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19 -; NO-SIMD128-NEXT: i32.gt_s $push76=, $pop75, $pop74 -; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop77 -; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18 -; NO-SIMD128-NEXT: i32.gt_s $push80=, $pop79, $pop78 -; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop81 -; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17 -; NO-SIMD128-NEXT: i32.gt_s $push84=, $pop83, $pop82 -; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop85 +; NO-SIMD128-NEXT: i32.select $push55=, $3, $19, $pop54 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop55 +; NO-SIMD128-NEXT: i32.extend8_s $push57=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push56=, $18 +; NO-SIMD128-NEXT: i32.gt_s $push58=, $pop57, $pop56 +; NO-SIMD128-NEXT: i32.select $push59=, $2, $18, $pop58 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop59 +; NO-SIMD128-NEXT: i32.extend8_s $push61=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push60=, $17 +; NO-SIMD128-NEXT: i32.gt_s $push62=, $pop61, $pop60 +; NO-SIMD128-NEXT: i32.select $push63=, $1, $17, $pop62 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop63 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v16i8: @@ -1207,93 +965,71 @@ define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $21 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $22 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $24 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $24, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $25 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push34=, $pop33, $pop32 +; NO-SIMD128-FAST-NEXT: i32.select $push35=, $9, $25, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push38=, $pop37, $pop36 +; NO-SIMD128-FAST-NEXT: i32.select $push39=, $10, $26, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $27 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push42=, $pop41, $pop40 -; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26 +; NO-SIMD128-FAST-NEXT: i32.select $push43=, $11, $27, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop43 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $28 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push46=, $pop45, $pop44 -; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push52=, $pop51, $pop50 -; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28 +; NO-SIMD128-FAST-NEXT: i32.select $push47=, $12, $28, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop47 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push48=, $29 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push50=, $pop49, $pop48 +; NO-SIMD128-FAST-NEXT: i32.select $push51=, $13, $29, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop51 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push52=, $30 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push54=, $pop53, $pop52 +; NO-SIMD128-FAST-NEXT: i32.select $push55=, $14, $30, $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop55 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $31 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push64=, $pop63, $pop62 -; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push76=, $pop75, $pop74 -; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push82=, $pop81, $pop80 -; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83 +; NO-SIMD128-FAST-NEXT: i32.select $push59=, $15, $31, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop59 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push61=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $32 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push62=, $pop61, $pop60 +; NO-SIMD128-FAST-NEXT: i32.select $push63=, $16, $32, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop63 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1316,140 +1052,118 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: max_u_v16i8: ; NO-SIMD128: .functype max_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 255 ; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 -; NO-SIMD128-NEXT: i32.const $push117=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop95 ; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push116=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115 -; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push114=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113 -; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push112=, 255 -; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112 -; NO-SIMD128-NEXT: i32.const $push111=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111 -; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push110=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 255 -; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109 -; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.const $push108=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107 -; NO-SIMD128-NEXT: i32.gt_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push106=, 255 -; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106 -; NO-SIMD128-NEXT: i32.const $push105=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105 -; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push104=, 255 -; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103 -; NO-SIMD128-NEXT: i32.gt_u $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.const $push102=, 255 -; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101 -; NO-SIMD128-NEXT: i32.gt_u $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99 -; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97 -; NO-SIMD128-NEXT: i32.gt_u $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95 -; NO-SIMD128-NEXT: i32.gt_u $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94 +; NO-SIMD128-NEXT: i32.and $push6=, $15, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93 -; NO-SIMD128-NEXT: i32.gt_u $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.and $push5=, $31, $pop93 +; NO-SIMD128-NEXT: i32.gt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $15, $31, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92 +; NO-SIMD128-NEXT: i32.and $push10=, $14, $pop92 ; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91 -; NO-SIMD128-NEXT: i32.gt_u $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.and $push9=, $30, $pop91 +; NO-SIMD128-NEXT: i32.gt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $14, $30, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90 +; NO-SIMD128-NEXT: i32.and $push14=, $13, $pop90 ; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89 -; NO-SIMD128-NEXT: i32.gt_u $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.and $push13=, $29, $pop89 +; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $13, $29, $pop15 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 ; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88 +; NO-SIMD128-NEXT: i32.and $push18=, $12, $pop88 ; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87 -; NO-SIMD128-NEXT: i32.gt_u $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.and $push17=, $28, $pop87 +; NO-SIMD128-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $12, $28, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push86=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $11, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $27, $pop85 +; NO-SIMD128-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $11, $27, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $10, $pop84 +; NO-SIMD128-NEXT: i32.const $push83=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $26, $pop83 +; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $10, $26, $pop27 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push82=, 255 +; NO-SIMD128-NEXT: i32.and $push30=, $9, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $25, $pop81 +; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $9, $25, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.const $push80=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $8, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 255 +; NO-SIMD128-NEXT: i32.and $push33=, $24, $pop79 +; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.select $push36=, $8, $24, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push78=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $7, $pop78 +; NO-SIMD128-NEXT: i32.const $push77=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $23, $pop77 +; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $7, $23, $pop39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.const $push76=, 255 +; NO-SIMD128-NEXT: i32.and $push42=, $6, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $22, $pop75 +; NO-SIMD128-NEXT: i32.gt_u $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.select $push44=, $6, $22, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.const $push74=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $5, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 255 +; NO-SIMD128-NEXT: i32.and $push45=, $21, $pop73 +; NO-SIMD128-NEXT: i32.gt_u $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.select $push48=, $5, $21, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push50=, $4, $pop72 +; NO-SIMD128-NEXT: i32.const $push71=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $20, $pop71 +; NO-SIMD128-NEXT: i32.gt_u $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.select $push52=, $4, $20, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push54=, $3, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop69 +; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.select $push56=, $3, $19, $pop55 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push58=, $2, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push57=, $18, $pop67 +; NO-SIMD128-NEXT: i32.gt_u $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.select $push60=, $2, $18, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push62=, $1, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push61=, $17, $pop65 +; NO-SIMD128-NEXT: i32.gt_u $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.select $push64=, $1, $17, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v16i8: @@ -1457,138 +1171,116 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop95 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop93 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop91 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop89 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106 -; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $21, $pop87 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $21, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $22, $pop85 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $22, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop83 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $24, $pop81 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $24, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop79 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.select $push36=, $9, $25, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $10, $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $26, $pop77 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push40=, $10, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop75 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99 +; NO-SIMD128-FAST-NEXT: i32.select $push44=, $11, $27, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $12, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $28, $pop73 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95 +; NO-SIMD128-FAST-NEXT: i32.select $push48=, $12, $28, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $13, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $29, $pop71 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.select $push52=, $13, $29, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push54=, $14, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $30, $pop69 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.select $push56=, $14, $30, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $15, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $31, $pop67 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.select $push60=, $15, $31, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $16, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $32, $pop65 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.select $push64=, $16, $32, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1611,156 +1303,134 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: avgr_u_v16i8: ; NO-SIMD128: .functype avgr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $16, $32 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 254 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push133=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push132=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132 -; NO-SIMD128-NEXT: i32.const $push131=, 254 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131 -; NO-SIMD128-NEXT: i32.const $push130=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push129=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129 -; NO-SIMD128-NEXT: i32.const $push128=, 254 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128 -; NO-SIMD128-NEXT: i32.const $push127=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.add $push22=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push126=, 1 -; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126 -; NO-SIMD128-NEXT: i32.const $push125=, 254 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125 -; NO-SIMD128-NEXT: i32.const $push124=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25 -; NO-SIMD128-NEXT: i32.const $push26=, 11 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.add $push28=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push123=, 1 -; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123 -; NO-SIMD128-NEXT: i32.const $push122=, 254 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122 -; NO-SIMD128-NEXT: i32.const $push121=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121 -; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31 -; NO-SIMD128-NEXT: i32.const $push32=, 10 -; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-NEXT: i32.add $push34=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push120=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120 -; NO-SIMD128-NEXT: i32.const $push119=, 254 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119 -; NO-SIMD128-NEXT: i32.const $push118=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118 -; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37 -; NO-SIMD128-NEXT: i32.const $push38=, 9 -; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-NEXT: i32.add $push40=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push117=, 1 -; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117 -; NO-SIMD128-NEXT: i32.const $push116=, 254 -; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115 -; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43 -; NO-SIMD128-NEXT: i32.add $push44=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push114=, 1 -; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 254 -; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113 -; NO-SIMD128-NEXT: i32.const $push112=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop47 -; NO-SIMD128-NEXT: i32.const $push48=, 7 -; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-NEXT: i32.add $push50=, $8, $24 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 254 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 ; NO-SIMD128-NEXT: i32.const $push111=, 1 -; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111 -; NO-SIMD128-NEXT: i32.const $push110=, 254 -; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109 -; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53 -; NO-SIMD128-NEXT: i32.const $push54=, 6 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.add $push56=, $7, $23 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop111 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push110=, 1 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 254 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop109 ; NO-SIMD128-NEXT: i32.const $push108=, 1 -; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 254 -; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107 -; NO-SIMD128-NEXT: i32.const $push106=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59 -; NO-SIMD128-NEXT: i32.const $push60=, 5 -; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-NEXT: i32.add $push62=, $6, $22 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop108 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push107=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 254 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-NEXT: i32.const $push105=, 1 -; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105 -; NO-SIMD128-NEXT: i32.const $push104=, 254 -; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103 -; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65 -; NO-SIMD128-NEXT: i32.add $push66=, $5, $21 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push104=, 1 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 254 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-NEXT: i32.const $push102=, 1 -; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 254 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101 -; NO-SIMD128-NEXT: i32.const $push100=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop69 -; NO-SIMD128-NEXT: i32.const $push70=, 3 -; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-NEXT: i32.add $push72=, $4, $20 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push101=, 1 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 254 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-NEXT: i32.const $push99=, 1 -; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99 -; NO-SIMD128-NEXT: i32.const $push98=, 254 -; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97 -; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-NEXT: i32.add $push76=, $3, $19 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push98=, 1 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 254 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-NEXT: i32.const $push96=, 1 -; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 254 -; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95 -; NO-SIMD128-NEXT: i32.const $push94=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop79 -; NO-SIMD128-NEXT: i32.add $push80=, $2, $18 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push95=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 254 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 1 -; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93 -; NO-SIMD128-NEXT: i32.const $push92=, 254 -; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop83 -; NO-SIMD128-NEXT: i32.add $push84=, $1, $17 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push92=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 254 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-NEXT: i32.const $push90=, 1 -; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 254 -; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-NEXT: i32.const $push88=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop33 +; NO-SIMD128-NEXT: i32.add $push34=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push89=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 254 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop37 +; NO-SIMD128-NEXT: i32.add $push38=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push86=, 1 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 254 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-NEXT: i32.const $push84=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push42=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push83=, 1 +; NO-SIMD128-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-NEXT: i32.const $push82=, 254 +; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop45 +; NO-SIMD128-NEXT: i32.add $push46=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push80=, 1 +; NO-SIMD128-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 254 +; NO-SIMD128-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-NEXT: i32.const $push78=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop49 +; NO-SIMD128-NEXT: i32.add $push50=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push77=, 1 +; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-NEXT: i32.const $push76=, 254 +; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop53 +; NO-SIMD128-NEXT: i32.add $push54=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push74=, 1 +; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 254 +; NO-SIMD128-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-NEXT: i32.const $push72=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop57 +; NO-SIMD128-NEXT: i32.add $push58=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push71=, 1 +; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-NEXT: i32.const $push70=, 254 +; NO-SIMD128-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop61 +; NO-SIMD128-NEXT: i32.add $push62=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push68=, 1 +; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 254 +; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-NEXT: i32.const $push66=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop65 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v16i8: @@ -1771,151 +1441,129 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop111 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132 -; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131 -; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop109 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop108 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129 -; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128 -; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126 -; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125 -; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123 -; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122 -; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120 -; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119 -; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69 -; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76 -; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82 -; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop53 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop57 +; NO-SIMD128-FAST-NEXT: i32.add $push58=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop61 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop65 ; NO-SIMD128-FAST-NEXT: return %a = add nuw <16 x i8> %x, %y %b = add nuw <16 x i8> %a, @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: avgr_u_v16i8_wrap: ; NO-SIMD128: .functype avgr_u_v16i8_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $16, $32 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 254 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push133=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push132=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132 -; NO-SIMD128-NEXT: i32.const $push131=, 254 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131 -; NO-SIMD128-NEXT: i32.const $push130=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push129=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129 -; NO-SIMD128-NEXT: i32.const $push128=, 254 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128 -; NO-SIMD128-NEXT: i32.const $push127=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.add $push22=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push126=, 1 -; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126 -; NO-SIMD128-NEXT: i32.const $push125=, 254 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125 -; NO-SIMD128-NEXT: i32.const $push124=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25 -; NO-SIMD128-NEXT: i32.const $push26=, 11 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.add $push28=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push123=, 1 -; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123 -; NO-SIMD128-NEXT: i32.const $push122=, 254 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122 -; NO-SIMD128-NEXT: i32.const $push121=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121 -; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31 -; NO-SIMD128-NEXT: i32.const $push32=, 10 -; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-NEXT: i32.add $push34=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push120=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120 -; NO-SIMD128-NEXT: i32.const $push119=, 254 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119 -; NO-SIMD128-NEXT: i32.const $push118=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118 -; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37 -; NO-SIMD128-NEXT: i32.const $push38=, 9 -; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-NEXT: i32.add $push40=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push117=, 1 -; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117 -; NO-SIMD128-NEXT: i32.const $push116=, 254 -; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115 -; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43 -; NO-SIMD128-NEXT: i32.add $push44=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push114=, 1 -; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 254 -; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113 -; NO-SIMD128-NEXT: i32.const $push112=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop47 -; NO-SIMD128-NEXT: i32.const $push48=, 7 -; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-NEXT: i32.add $push50=, $8, $24 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 254 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 ; NO-SIMD128-NEXT: i32.const $push111=, 1 -; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111 -; NO-SIMD128-NEXT: i32.const $push110=, 254 -; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109 -; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53 -; NO-SIMD128-NEXT: i32.const $push54=, 6 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.add $push56=, $7, $23 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop111 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push110=, 1 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 254 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop109 ; NO-SIMD128-NEXT: i32.const $push108=, 1 -; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 254 -; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107 -; NO-SIMD128-NEXT: i32.const $push106=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59 -; NO-SIMD128-NEXT: i32.const $push60=, 5 -; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-NEXT: i32.add $push62=, $6, $22 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop108 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push107=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 254 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-NEXT: i32.const $push105=, 1 -; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105 -; NO-SIMD128-NEXT: i32.const $push104=, 254 -; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103 -; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65 -; NO-SIMD128-NEXT: i32.add $push66=, $5, $21 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push104=, 1 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 254 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-NEXT: i32.const $push102=, 1 -; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 254 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101 -; NO-SIMD128-NEXT: i32.const $push100=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop69 -; NO-SIMD128-NEXT: i32.const $push70=, 3 -; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-NEXT: i32.add $push72=, $4, $20 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push101=, 1 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 254 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-NEXT: i32.const $push99=, 1 -; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99 -; NO-SIMD128-NEXT: i32.const $push98=, 254 -; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97 -; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-NEXT: i32.add $push76=, $3, $19 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push98=, 1 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 254 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-NEXT: i32.const $push96=, 1 -; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 254 -; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95 -; NO-SIMD128-NEXT: i32.const $push94=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop79 -; NO-SIMD128-NEXT: i32.add $push80=, $2, $18 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push95=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 254 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 1 -; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93 -; NO-SIMD128-NEXT: i32.const $push92=, 254 -; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop83 -; NO-SIMD128-NEXT: i32.add $push84=, $1, $17 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push92=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 254 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-NEXT: i32.const $push90=, 1 -; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 254 -; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-NEXT: i32.const $push88=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop33 +; NO-SIMD128-NEXT: i32.add $push34=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push89=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 254 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop37 +; NO-SIMD128-NEXT: i32.add $push38=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push86=, 1 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 254 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-NEXT: i32.const $push84=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push42=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push83=, 1 +; NO-SIMD128-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-NEXT: i32.const $push82=, 254 +; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop45 +; NO-SIMD128-NEXT: i32.add $push46=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push80=, 1 +; NO-SIMD128-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 254 +; NO-SIMD128-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-NEXT: i32.const $push78=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop49 +; NO-SIMD128-NEXT: i32.add $push50=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push77=, 1 +; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-NEXT: i32.const $push76=, 254 +; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop53 +; NO-SIMD128-NEXT: i32.add $push54=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push74=, 1 +; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 254 +; NO-SIMD128-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-NEXT: i32.const $push72=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop57 +; NO-SIMD128-NEXT: i32.add $push58=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push71=, 1 +; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-NEXT: i32.const $push70=, 254 +; NO-SIMD128-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop61 +; NO-SIMD128-NEXT: i32.add $push62=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push68=, 1 +; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 254 +; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-NEXT: i32.const $push66=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop65 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v16i8_wrap: @@ -2109,151 +1735,129 @@ define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop111 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132 -; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131 -; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop109 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop108 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129 -; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128 -; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126 -; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125 -; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123 -; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122 -; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120 -; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119 -; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69 -; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76 -; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82 -; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop53 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop57 +; NO-SIMD128-FAST-NEXT: i32.add $push58=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop61 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop65 ; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y %b = add <16 x i8> %a, @abs_v16i8(<16 x i8> %x) { ; NO-SIMD128-LABEL: abs_v16i8: ; NO-SIMD128: .functype abs_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $16 ; NO-SIMD128-NEXT: i32.const $push1=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push117=, $pop0, $pop1 -; NO-SIMD128-NEXT: local.tee $push116=, $17=, $pop117 -; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop116 +; NO-SIMD128-NEXT: i32.shr_s $push95=, $pop0, $pop1 +; NO-SIMD128-NEXT: local.tee $push94=, $17=, $pop95 +; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop94 ; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $15 -; NO-SIMD128-NEXT: i32.const $push115=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push114=, $pop6, $pop115 -; NO-SIMD128-NEXT: local.tee $push113=, $16=, $pop114 -; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop113 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $14 -; NO-SIMD128-NEXT: i32.const $push112=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push111=, $pop11, $pop112 -; NO-SIMD128-NEXT: local.tee $push110=, $16=, $pop111 -; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop110 -; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop13 -; NO-SIMD128-NEXT: i32.const $push19=, 12 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend8_s $push16=, $13 -; NO-SIMD128-NEXT: i32.const $push109=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push108=, $pop16, $pop109 -; NO-SIMD128-NEXT: local.tee $push107=, $16=, $pop108 -; NO-SIMD128-NEXT: i32.xor $push17=, $13, $pop107 -; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 11 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.extend8_s $push21=, $12 -; NO-SIMD128-NEXT: i32.const $push106=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push105=, $pop21, $pop106 -; NO-SIMD128-NEXT: local.tee $push104=, $16=, $pop105 -; NO-SIMD128-NEXT: i32.xor $push22=, $12, $pop104 -; NO-SIMD128-NEXT: i32.sub $push23=, $pop22, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 10 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push26=, $11 -; NO-SIMD128-NEXT: i32.const $push103=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push102=, $pop26, $pop103 -; NO-SIMD128-NEXT: local.tee $push101=, $16=, $pop102 -; NO-SIMD128-NEXT: i32.xor $push27=, $11, $pop101 -; NO-SIMD128-NEXT: i32.sub $push28=, $pop27, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 9 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $10 -; NO-SIMD128-NEXT: i32.const $push100=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push99=, $pop31, $pop100 -; NO-SIMD128-NEXT: local.tee $push98=, $16=, $pop99 -; NO-SIMD128-NEXT: i32.xor $push32=, $10, $pop98 -; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $9 -; NO-SIMD128-NEXT: i32.const $push97=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push96=, $pop36, $pop97 -; NO-SIMD128-NEXT: local.tee $push95=, $16=, $pop96 -; NO-SIMD128-NEXT: i32.xor $push37=, $9, $pop95 -; NO-SIMD128-NEXT: i32.sub $push38=, $pop37, $16 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop38 -; NO-SIMD128-NEXT: i32.const $push94=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop94 -; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $15 ; NO-SIMD128-NEXT: i32.const $push93=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop39, $pop93 +; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop4, $pop93 ; NO-SIMD128-NEXT: local.tee $push91=, $16=, $pop92 -; NO-SIMD128-NEXT: i32.xor $push40=, $8, $pop91 -; NO-SIMD128-NEXT: i32.sub $push41=, $pop40, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop41 -; NO-SIMD128-NEXT: i32.const $push46=, 6 -; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7 +; NO-SIMD128-NEXT: i32.xor $push5=, $15, $pop91 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $14 ; NO-SIMD128-NEXT: i32.const $push90=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop43, $pop90 +; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop7, $pop90 ; NO-SIMD128-NEXT: local.tee $push88=, $16=, $pop89 -; NO-SIMD128-NEXT: i32.xor $push44=, $7, $pop88 -; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.extend8_s $push48=, $6 +; NO-SIMD128-NEXT: i32.xor $push8=, $14, $pop88 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop8, $16 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $13 ; NO-SIMD128-NEXT: i32.const $push87=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop48, $pop87 +; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop10, $pop87 ; NO-SIMD128-NEXT: local.tee $push85=, $16=, $pop86 -; NO-SIMD128-NEXT: i32.xor $push49=, $6, $pop85 -; NO-SIMD128-NEXT: i32.sub $push50=, $pop49, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $5 +; NO-SIMD128-NEXT: i32.xor $push11=, $13, $pop85 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop11, $16 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $12 ; NO-SIMD128-NEXT: i32.const $push84=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop53, $pop84 +; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop13, $pop84 ; NO-SIMD128-NEXT: local.tee $push82=, $16=, $pop83 -; NO-SIMD128-NEXT: i32.xor $push54=, $5, $pop82 -; NO-SIMD128-NEXT: i32.sub $push55=, $pop54, $16 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop55 -; NO-SIMD128-NEXT: i32.const $push59=, 3 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.extend8_s $push56=, $4 +; NO-SIMD128-NEXT: i32.xor $push14=, $12, $pop82 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop14, $16 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $11 ; NO-SIMD128-NEXT: i32.const $push81=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop56, $pop81 +; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop16, $pop81 ; NO-SIMD128-NEXT: local.tee $push79=, $16=, $pop80 -; NO-SIMD128-NEXT: i32.xor $push57=, $4, $pop79 -; NO-SIMD128-NEXT: i32.sub $push58=, $pop57, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.extend8_s $push61=, $3 +; NO-SIMD128-NEXT: i32.xor $push17=, $11, $pop79 +; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $10 ; NO-SIMD128-NEXT: i32.const $push78=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop61, $pop78 +; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop19, $pop78 ; NO-SIMD128-NEXT: local.tee $push76=, $16=, $pop77 -; NO-SIMD128-NEXT: i32.xor $push62=, $3, $pop76 -; NO-SIMD128-NEXT: i32.sub $push63=, $pop62, $16 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop63 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $2 +; NO-SIMD128-NEXT: i32.xor $push20=, $10, $pop76 +; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $16 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $9 ; NO-SIMD128-NEXT: i32.const $push75=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop64, $pop75 +; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop22, $pop75 ; NO-SIMD128-NEXT: local.tee $push73=, $16=, $pop74 -; NO-SIMD128-NEXT: i32.xor $push65=, $2, $pop73 -; NO-SIMD128-NEXT: i32.sub $push66=, $pop65, $16 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop66 -; NO-SIMD128-NEXT: i32.extend8_s $push67=, $1 +; NO-SIMD128-NEXT: i32.xor $push23=, $9, $pop73 +; NO-SIMD128-NEXT: i32.sub $push24=, $pop23, $16 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $8 ; NO-SIMD128-NEXT: i32.const $push72=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop67, $pop72 +; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop25, $pop72 ; NO-SIMD128-NEXT: local.tee $push70=, $16=, $pop71 -; NO-SIMD128-NEXT: i32.xor $push68=, $1, $pop70 -; NO-SIMD128-NEXT: i32.sub $push69=, $pop68, $16 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop69 +; NO-SIMD128-NEXT: i32.xor $push26=, $8, $pop70 +; NO-SIMD128-NEXT: i32.sub $push27=, $pop26, $16 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $7 +; NO-SIMD128-NEXT: i32.const $push69=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop28, $pop69 +; NO-SIMD128-NEXT: local.tee $push67=, $16=, $pop68 +; NO-SIMD128-NEXT: i32.xor $push29=, $7, $pop67 +; NO-SIMD128-NEXT: i32.sub $push30=, $pop29, $16 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $6 +; NO-SIMD128-NEXT: i32.const $push66=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push65=, $pop31, $pop66 +; NO-SIMD128-NEXT: local.tee $push64=, $16=, $pop65 +; NO-SIMD128-NEXT: i32.xor $push32=, $6, $pop64 +; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.extend8_s $push34=, $5 +; NO-SIMD128-NEXT: i32.const $push63=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push62=, $pop34, $pop63 +; NO-SIMD128-NEXT: local.tee $push61=, $16=, $pop62 +; NO-SIMD128-NEXT: i32.xor $push35=, $5, $pop61 +; NO-SIMD128-NEXT: i32.sub $push36=, $pop35, $16 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $4 +; NO-SIMD128-NEXT: i32.const $push60=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push59=, $pop37, $pop60 +; NO-SIMD128-NEXT: local.tee $push58=, $16=, $pop59 +; NO-SIMD128-NEXT: i32.xor $push38=, $4, $pop58 +; NO-SIMD128-NEXT: i32.sub $push39=, $pop38, $16 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $3 +; NO-SIMD128-NEXT: i32.const $push57=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push56=, $pop40, $pop57 +; NO-SIMD128-NEXT: local.tee $push55=, $16=, $pop56 +; NO-SIMD128-NEXT: i32.xor $push41=, $3, $pop55 +; NO-SIMD128-NEXT: i32.sub $push42=, $pop41, $16 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.extend8_s $push43=, $2 +; NO-SIMD128-NEXT: i32.const $push54=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop43, $pop54 +; NO-SIMD128-NEXT: local.tee $push52=, $16=, $pop53 +; NO-SIMD128-NEXT: i32.xor $push44=, $2, $pop52 +; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.extend8_s $push46=, $1 +; NO-SIMD128-NEXT: i32.const $push51=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push50=, $pop46, $pop51 +; NO-SIMD128-NEXT: local.tee $push49=, $16=, $pop50 +; NO-SIMD128-NEXT: i32.xor $push47=, $1, $pop49 +; NO-SIMD128-NEXT: i32.sub $push48=, $pop47, $16 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v16i8: @@ -2420,138 +2002,116 @@ define <16 x i8> @abs_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push1=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push117=, $pop0, $pop1 -; NO-SIMD128-FAST-NEXT: local.tee $push116=, $17=, $pop117 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop116 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: local.tee $push94=, $17=, $pop95 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop94 ; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push114=, $pop4, $pop115 -; NO-SIMD128-FAST-NEXT: local.tee $push113=, $1=, $pop114 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop4, $pop93 +; NO-SIMD128-FAST-NEXT: local.tee $push91=, $1=, $pop92 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop91 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push111=, $pop7, $pop112 -; NO-SIMD128-FAST-NEXT: local.tee $push110=, $2=, $pop111 -; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop7, $pop90 +; NO-SIMD128-FAST-NEXT: local.tee $push88=, $2=, $pop89 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop88 ; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push108=, $pop10, $pop109 -; NO-SIMD128-FAST-NEXT: local.tee $push107=, $3=, $pop108 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop107 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push105=, $pop15, $pop106 -; NO-SIMD128-FAST-NEXT: local.tee $push104=, $4=, $pop105 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop104 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push102=, $pop18, $pop103 -; NO-SIMD128-FAST-NEXT: local.tee $push101=, $5=, $pop102 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop101 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push99=, $pop23, $pop100 -; NO-SIMD128-FAST-NEXT: local.tee $push98=, $6=, $pop99 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop98 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop97 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop28, $pop96 -; NO-SIMD128-FAST-NEXT: local.tee $push94=, $7=, $pop95 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop94 -; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $7 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop30 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $9 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop32, $pop93 -; NO-SIMD128-FAST-NEXT: local.tee $push91=, $8=, $pop92 -; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop91 -; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $pop33, $8 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop35, $pop90 -; NO-SIMD128-FAST-NEXT: local.tee $push88=, $9=, $pop89 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $10, $pop88 -; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $pop36, $9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $11 ; NO-SIMD128-FAST-NEXT: i32.const $push87=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop40, $pop87 -; NO-SIMD128-FAST-NEXT: local.tee $push85=, $10=, $pop86 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop85 -; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop10, $pop87 +; NO-SIMD128-FAST-NEXT: local.tee $push85=, $3=, $pop86 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop85 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $5 ; NO-SIMD128-FAST-NEXT: i32.const $push84=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop45, $pop84 -; NO-SIMD128-FAST-NEXT: local.tee $push82=, $11=, $pop83 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop82 -; NO-SIMD128-FAST-NEXT: i32.sub $push47=, $pop46, $11 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $13 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop13, $pop84 +; NO-SIMD128-FAST-NEXT: local.tee $push82=, $4=, $pop83 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $5, $pop82 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop14, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $6 ; NO-SIMD128-FAST-NEXT: i32.const $push81=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop50, $pop81 -; NO-SIMD128-FAST-NEXT: local.tee $push79=, $12=, $pop80 -; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $13, $pop79 -; NO-SIMD128-FAST-NEXT: i32.sub $push52=, $pop51, $12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push55=, $14 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop16, $pop81 +; NO-SIMD128-FAST-NEXT: local.tee $push79=, $5=, $pop80 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $6, $pop79 +; NO-SIMD128-FAST-NEXT: i32.sub $push18=, $pop17, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 ; NO-SIMD128-FAST-NEXT: i32.const $push78=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop55, $pop78 -; NO-SIMD128-FAST-NEXT: local.tee $push76=, $13=, $pop77 -; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $14, $pop76 -; NO-SIMD128-FAST-NEXT: i32.sub $push57=, $pop56, $13 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop19, $pop78 +; NO-SIMD128-FAST-NEXT: local.tee $push76=, $6=, $pop77 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $7, $pop76 +; NO-SIMD128-FAST-NEXT: i32.sub $push21=, $pop20, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $8 ; NO-SIMD128-FAST-NEXT: i32.const $push75=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop60, $pop75 -; NO-SIMD128-FAST-NEXT: local.tee $push73=, $14=, $pop74 -; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $15, $pop73 -; NO-SIMD128-FAST-NEXT: i32.sub $push62=, $pop61, $14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push69=, $0, $pop68 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push65=, $16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop22, $pop75 +; NO-SIMD128-FAST-NEXT: local.tee $push73=, $7=, $pop74 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $8, $pop73 +; NO-SIMD128-FAST-NEXT: i32.sub $push24=, $pop23, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9 ; NO-SIMD128-FAST-NEXT: i32.const $push72=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop65, $pop72 -; NO-SIMD128-FAST-NEXT: local.tee $push70=, $0=, $pop71 -; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $16, $pop70 -; NO-SIMD128-FAST-NEXT: i32.sub $push67=, $pop66, $0 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop69), $pop67 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop25, $pop72 +; NO-SIMD128-FAST-NEXT: local.tee $push70=, $8=, $pop71 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $9, $pop70 +; NO-SIMD128-FAST-NEXT: i32.sub $push27=, $pop26, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push68=, $pop28, $pop69 +; NO-SIMD128-FAST-NEXT: local.tee $push67=, $9=, $pop68 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $10, $pop67 +; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop31, $pop66 +; NO-SIMD128-FAST-NEXT: local.tee $push64=, $10=, $pop65 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $11, $pop64 +; NO-SIMD128-FAST-NEXT: i32.sub $push33=, $pop32, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push62=, $pop34, $pop63 +; NO-SIMD128-FAST-NEXT: local.tee $push61=, $11=, $pop62 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $12, $pop61 +; NO-SIMD128-FAST-NEXT: i32.sub $push36=, $pop35, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push59=, $pop37, $pop60 +; NO-SIMD128-FAST-NEXT: local.tee $push58=, $12=, $pop59 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $13, $pop58 +; NO-SIMD128-FAST-NEXT: i32.sub $push39=, $pop38, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push56=, $pop40, $pop57 +; NO-SIMD128-FAST-NEXT: local.tee $push55=, $13=, $pop56 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $14, $pop55 +; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push43=, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push53=, $pop43, $pop54 +; NO-SIMD128-FAST-NEXT: local.tee $push52=, $14=, $pop53 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $15, $pop52 +; NO-SIMD128-FAST-NEXT: i32.sub $push45=, $pop44, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push46=, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop46, $pop51 +; NO-SIMD128-FAST-NEXT: local.tee $push49=, $15=, $pop50 +; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $16, $pop49 +; NO-SIMD128-FAST-NEXT: i32.sub $push48=, $pop47, $15 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> zeroinitializer, %x %b = icmp slt <16 x i8> %x, zeroinitializer @@ -2576,75 +2136,53 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) { ; NO-SIMD128: .functype neg_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $9 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop53, $5 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop52, $3 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop51, $2 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, 0 -; NO-SIMD128-NEXT: i32.sub $push5=, $pop50, $1 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, 0 -; NO-SIMD128-NEXT: i32.sub $push6=, $pop49, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, 0 -; NO-SIMD128-NEXT: i32.sub $push9=, $pop48, $15 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, 0 -; NO-SIMD128-NEXT: i32.sub $push12=, $pop47, $14 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, 0 -; NO-SIMD128-NEXT: i32.sub $push15=, $pop46, $13 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, 0 -; NO-SIMD128-NEXT: i32.sub $push18=, $pop45, $12 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, 0 -; NO-SIMD128-NEXT: i32.sub $push21=, $pop44, $11 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, 0 -; NO-SIMD128-NEXT: i32.sub $push24=, $pop43, $10 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, 0 -; NO-SIMD128-NEXT: i32.sub $push27=, $pop42, $8 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, 0 -; NO-SIMD128-NEXT: i32.sub $push30=, $pop41, $7 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push40=, 0 -; NO-SIMD128-NEXT: i32.sub $push33=, $pop40, $6 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push39=, 0 -; NO-SIMD128-NEXT: i32.sub $push36=, $pop39, $4 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $16 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop31, $15 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop30, $14 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop29, $13 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, 0 +; NO-SIMD128-NEXT: i32.sub $push5=, $pop28, $12 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, 0 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop27, $11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, 0 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop26, $10 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, 0 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop25, $9 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, 0 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop24, $8 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, 0 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop23, $7 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, 0 +; NO-SIMD128-NEXT: i32.sub $push11=, $pop22, $6 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, 0 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop21, $5 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, 0 +; NO-SIMD128-NEXT: i32.sub $push13=, $pop20, $4 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, 0 +; NO-SIMD128-NEXT: i32.sub $push14=, $pop19, $3 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 0 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop18, $2 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, 0 +; NO-SIMD128-NEXT: i32.sub $push16=, $pop17, $1 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v16i8: @@ -2653,73 +2191,51 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop53, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop31, $2 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop52, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop30, $3 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop51, $4 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop50, $5 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop49, $6 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop48, $7 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop47, $8 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop46, $9 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop45, $10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push23=, $pop44, $11 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push26=, $pop43, $12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push29=, $pop42, $13 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push32=, $pop41, $14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push35=, $pop40, $15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push38=, $pop39, $16 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop29, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $pop28, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop27, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop26, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop25, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop24, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop23, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push11=, $pop22, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop21, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop20, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push14=, $pop19, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop18, $15 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $16 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> , @@ -2744,124 +2260,80 @@ define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push40=, $17, $pop0 -; NO-SIMD128-NEXT: local.tee $push39=, $17=, $pop40 -; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop39 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.shl $push3=, $3, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.shl $push4=, $2, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.shl $push5=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.shl $push6=, $16, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.shl $push9=, $15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.shl $push12=, $14, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.shl $push15=, $13, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.shl $push18=, $12, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.shl $push21=, $11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.shl $push24=, $10, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.shl $push27=, $8, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.shl $push30=, $7, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.shl $push33=, $6, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.shl $push36=, $4, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.and $push18=, $17, $pop0 +; NO-SIMD128-NEXT: local.tee $push17=, $17=, $pop18 +; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop17 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $15, $17 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $14, $17 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.shl $push4=, $13, $17 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.shl $push5=, $12, $17 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.shl $push6=, $11, $17 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.shl $push7=, $10, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.shl $push8=, $9, $17 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.shl $push9=, $8, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.shl $push10=, $7, $17 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.shl $push11=, $6, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.shl $push12=, $5, $17 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.shl $push13=, $4, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.shl $push14=, $3, $17 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.shl $push15=, $2, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v16i8: ; NO-SIMD128-FAST: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $17, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push39=, $17=, $pop40 -; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $17=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.shl $push17=, $9, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.shl $push23=, $11, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $12, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.shl $push29=, $13, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $14, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shl $push35=, $15, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $16, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -2890,75 +2362,53 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { ; NO-SIMD128: .functype shl_const_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop53 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $3, $pop52 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $2, $pop51 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, 5 -; NO-SIMD128-NEXT: i32.shl $push5=, $1, $pop50 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.shl $push6=, $16, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, 5 -; NO-SIMD128-NEXT: i32.shl $push9=, $15, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, 5 -; NO-SIMD128-NEXT: i32.shl $push12=, $14, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, 5 -; NO-SIMD128-NEXT: i32.shl $push15=, $13, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, 5 -; NO-SIMD128-NEXT: i32.shl $push18=, $12, $pop45 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, 5 -; NO-SIMD128-NEXT: i32.shl $push21=, $11, $pop44 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, 5 -; NO-SIMD128-NEXT: i32.shl $push24=, $10, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, 5 -; NO-SIMD128-NEXT: i32.shl $push27=, $8, $pop42 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, 5 -; NO-SIMD128-NEXT: i32.shl $push30=, $7, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push40=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop40 -; NO-SIMD128-NEXT: i32.const $push39=, 5 -; NO-SIMD128-NEXT: i32.shl $push33=, $6, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop33 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.const $push38=, 5 -; NO-SIMD128-NEXT: i32.shl $push35=, $4, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $15, $pop31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $14, $pop30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $13, $pop29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, 5 +; NO-SIMD128-NEXT: i32.shl $push5=, $12, $pop28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, 5 +; NO-SIMD128-NEXT: i32.shl $push6=, $11, $pop27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, 5 +; NO-SIMD128-NEXT: i32.shl $push7=, $10, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, 5 +; NO-SIMD128-NEXT: i32.shl $push8=, $9, $pop25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, 5 +; NO-SIMD128-NEXT: i32.shl $push9=, $8, $pop24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, 5 +; NO-SIMD128-NEXT: i32.shl $push10=, $7, $pop23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, 5 +; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, 5 +; NO-SIMD128-NEXT: i32.shl $push12=, $5, $pop21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, 5 +; NO-SIMD128-NEXT: i32.shl $push13=, $4, $pop20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, 5 +; NO-SIMD128-NEXT: i32.shl $push14=, $3, $pop19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.shl $push15=, $2, $pop18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, 5 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $pop17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v16i8: @@ -2967,73 +2417,51 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop31 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop30 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop50 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $6, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $7, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $8, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $9, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push19=, $10, $pop44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push25=, $12, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push31=, $14, $pop40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $15, $pop39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $16, $pop38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0 -; NO-SIMD128-NEXT: i32.shl $push2=, $9, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, 255 -; NO-SIMD128-NEXT: i32.and $push3=, $21, $pop69 -; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop3 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $19, $pop68 -; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop5 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop67 -; NO-SIMD128-NEXT: i32.shl $push8=, $2, $pop7 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, 255 -; NO-SIMD128-NEXT: i32.and $push9=, $17, $pop66 -; NO-SIMD128-NEXT: i32.shl $push10=, $1, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $32, $pop65 -; NO-SIMD128-NEXT: i32.shl $push12=, $16, $pop11 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, 255 -; NO-SIMD128-NEXT: i32.and $push15=, $31, $pop64 -; NO-SIMD128-NEXT: i32.shl $push16=, $15, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $30, $pop63 -; NO-SIMD128-NEXT: i32.shl $push20=, $14, $pop19 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $29, $pop62 -; NO-SIMD128-NEXT: i32.shl $push24=, $13, $pop23 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $28, $pop61 -; NO-SIMD128-NEXT: i32.shl $push28=, $12, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop60 -; NO-SIMD128-NEXT: i32.shl $push32=, $11, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, 255 -; NO-SIMD128-NEXT: i32.and $push35=, $26, $pop59 -; NO-SIMD128-NEXT: i32.shl $push36=, $10, $pop35 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, 255 -; NO-SIMD128-NEXT: i32.and $push39=, $24, $pop58 -; NO-SIMD128-NEXT: i32.shl $push40=, $8, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $23, $pop57 -; NO-SIMD128-NEXT: i32.shl $push44=, $7, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $22, $pop56 -; NO-SIMD128-NEXT: i32.shl $push48=, $6, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $20, $pop55 -; NO-SIMD128-NEXT: i32.shl $push52=, $4, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0 +; NO-SIMD128-NEXT: i32.shl $push2=, $16, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, 255 +; NO-SIMD128-NEXT: i32.and $push3=, $31, $pop47 +; NO-SIMD128-NEXT: i32.shl $push4=, $15, $pop3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $30, $pop46 +; NO-SIMD128-NEXT: i32.shl $push6=, $14, $pop5 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $29, $pop45 +; NO-SIMD128-NEXT: i32.shl $push8=, $13, $pop7 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, 255 +; NO-SIMD128-NEXT: i32.and $push9=, $28, $pop44 +; NO-SIMD128-NEXT: i32.shl $push10=, $12, $pop9 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $27, $pop43 +; NO-SIMD128-NEXT: i32.shl $push12=, $11, $pop11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $26, $pop42 +; NO-SIMD128-NEXT: i32.shl $push14=, $10, $pop13 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, 255 +; NO-SIMD128-NEXT: i32.and $push15=, $25, $pop41 +; NO-SIMD128-NEXT: i32.shl $push16=, $9, $pop15 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $24, $pop40 +; NO-SIMD128-NEXT: i32.shl $push18=, $8, $pop17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $23, $pop39 +; NO-SIMD128-NEXT: i32.shl $push20=, $7, $pop19 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $22, $pop38 +; NO-SIMD128-NEXT: i32.shl $push22=, $6, $pop21 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $21, $pop37 +; NO-SIMD128-NEXT: i32.shl $push24=, $5, $pop23 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop36 +; NO-SIMD128-NEXT: i32.shl $push26=, $4, $pop25 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $19, $pop35 +; NO-SIMD128-NEXT: i32.shl $push28=, $3, $pop27 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $18, $pop34 +; NO-SIMD128-NEXT: i32.shl $push30=, $2, $pop29 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop33 +; NO-SIMD128-NEXT: i32.shl $push32=, $1, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v16i8: @@ -3342,88 +2748,66 @@ define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop47 ; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop46 ; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $20, $pop67 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $21, $pop66 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $22, $pop65 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop64 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $24, $pop63 -; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop62 -; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $9, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $26, $pop61 -; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $10, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $27, $pop60 -; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $11, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $29, $pop58 -; NO-SIMD128-FAST-NEXT: i32.shl $push42=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $30, $pop57 -; NO-SIMD128-FAST-NEXT: i32.shl $push46=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push49=, $31, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shl $push50=, $15, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $32, $pop55 -; NO-SIMD128-FAST-NEXT: i32.shl $push54=, $16, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $20, $pop45 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $21, $pop44 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $22, $pop43 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $23, $pop42 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $24, $pop41 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $25, $pop40 +; NO-SIMD128-FAST-NEXT: i32.shl $push18=, $9, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $12, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $29, $pop36 +; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $13, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $30, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $31, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $15, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $32, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, %x ret <16 x i8> %a @@ -3445,79 +2829,57 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: shr_s_v16i8: ; NO-SIMD128: .functype shr_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $17, $pop0 -; NO-SIMD128-NEXT: local.tee $push55=, $17=, $pop56 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop55 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $5 +; NO-SIMD128-NEXT: i32.and $push34=, $17, $pop0 +; NO-SIMD128-NEXT: local.tee $push33=, $17=, $pop34 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop33 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $15 ; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.extend8_s $push5=, $3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $14 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $13 ; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $1 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $12 ; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $16 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $11 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.extend8_s $push15=, $15 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $10 +; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $9 ; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $14 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $8 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $7 ; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $13 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $5 ; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $12 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $4 +; NO-SIMD128-NEXT: i32.shr_s $push26=, $pop25, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.extend8_s $push27=, $3 ; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $2 +; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $1 ; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop31, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.extend8_s $push35=, $10 -; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8 -; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7 -; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop43, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $6 -; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.extend8_s $push51=, $4 -; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop51, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v16i8: @@ -3525,9 +2887,9 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $17, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push55=, $1=, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop55 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push33=, $1=, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop33 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push3=, $2 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 @@ -3535,67 +2897,45 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $3 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $4 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $5 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $6 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $8 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $9 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $10 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $12 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $13 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push27=, $14 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push28=, $pop27, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $15 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $11 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop33, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $12 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop37, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $13 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $14 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop45, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $16 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push54=, $pop53, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -3811,108 +3151,86 @@ define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v16i8: ; NO-SIMD128: .functype shr_s_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push2=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push2=, $16 ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0 ; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop3 -; NO-SIMD128-NEXT: i32.extend8_s $push5=, $5 -; NO-SIMD128-NEXT: i32.const $push85=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop85 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.const $push63=, 255 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop63 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-NEXT: i32.extend8_s $push8=, $3 -; NO-SIMD128-NEXT: i32.const $push84=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop84 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $14 +; NO-SIMD128-NEXT: i32.const $push62=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop62 ; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $2 -; NO-SIMD128-NEXT: i32.const $push83=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop83 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $13 +; NO-SIMD128-NEXT: i32.const $push61=, 255 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop61 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop12 -; NO-SIMD128-NEXT: i32.extend8_s $push14=, $1 -; NO-SIMD128-NEXT: i32.const $push82=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop82 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push14=, $12 +; NO-SIMD128-NEXT: i32.const $push60=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop60 ; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 15 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $16 -; NO-SIMD128-NEXT: i32.const $push81=, 255 -; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop81 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $11 +; NO-SIMD128-NEXT: i32.const $push59=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop59 ; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 14 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $15 -; NO-SIMD128-NEXT: i32.const $push80=, 255 -; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop80 -; NO-SIMD128-NEXT: i32.shr_s $push23=, $pop22, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 13 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $14 -; NO-SIMD128-NEXT: i32.const $push79=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop79 -; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 12 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push32=, $13 -; NO-SIMD128-NEXT: i32.const $push78=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop78 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $10 +; NO-SIMD128-NEXT: i32.const $push58=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop58 +; NO-SIMD128-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $9 +; NO-SIMD128-NEXT: i32.const $push57=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop57 +; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push26=, $8 +; NO-SIMD128-NEXT: i32.const $push56=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $24, $pop56 +; NO-SIMD128-NEXT: i32.shr_s $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $7 +; NO-SIMD128-NEXT: i32.const $push55=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $23, $pop55 +; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $6 +; NO-SIMD128-NEXT: i32.const $push54=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $22, $pop54 ; NO-SIMD128-NEXT: i32.shr_s $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push39=, 11 -; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $12 -; NO-SIMD128-NEXT: i32.const $push77=, 255 -; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop77 -; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-NEXT: i32.const $push44=, 10 -; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $11 -; NO-SIMD128-NEXT: i32.const $push76=, 255 -; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop76 -; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop42, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-NEXT: i32.const $push49=, 9 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $10 -; NO-SIMD128-NEXT: i32.const $push75=, 255 -; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop75 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.extend8_s $push35=, $5 +; NO-SIMD128-NEXT: i32.const $push53=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $21, $pop53 +; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.extend8_s $push38=, $4 +; NO-SIMD128-NEXT: i32.const $push52=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $20, $pop52 +; NO-SIMD128-NEXT: i32.shr_s $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $3 +; NO-SIMD128-NEXT: i32.const $push51=, 255 +; NO-SIMD128-NEXT: i32.and $push40=, $19, $pop51 +; NO-SIMD128-NEXT: i32.shr_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $2 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $18, $pop50 +; NO-SIMD128-NEXT: i32.shr_s $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.extend8_s $push47=, $1 +; NO-SIMD128-NEXT: i32.const $push49=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $17, $pop49 ; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push54=, 7 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $8 -; NO-SIMD128-NEXT: i32.const $push74=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop74 -; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop52, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-NEXT: i32.const $push59=, 6 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.extend8_s $push57=, $7 -; NO-SIMD128-NEXT: i32.const $push73=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop73 -; NO-SIMD128-NEXT: i32.shr_s $push58=, $pop57, $pop56 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.const $push64=, 5 -; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-NEXT: i32.extend8_s $push62=, $6 -; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop72 -; NO-SIMD128-NEXT: i32.shr_s $push63=, $pop62, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-NEXT: i32.const $push69=, 3 -; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69 -; NO-SIMD128-NEXT: i32.extend8_s $push67=, $4 -; NO-SIMD128-NEXT: i32.const $push71=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71 -; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop67, $pop66 -; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v16i8: @@ -3924,102 +3242,80 @@ define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop63 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push8=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop62 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $20, $pop83 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop82 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $22, $pop81 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop80 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop60 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop58 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $9 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop56 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $24, $pop79 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop29), $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $9 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop78 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop34, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push39=, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $26, $pop77 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop39, $pop38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $27, $pop76 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $26, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $27, $pop54 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push33=, $pop32, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $28, $pop53 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push38=, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $29, $pop52 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $30, $pop51 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $31, $pop50 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push45=, $pop44, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push48=, $28, $pop75 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push54=, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $29, $pop74 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop54, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push59=, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $30, $pop73 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push60=, $pop59, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop57), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push64=, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $31, $pop72 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $32, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop70 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push47=, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $32, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push48=, $pop47, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = ashr <16 x i8> %v, %x ret <16 x i8> %a @@ -4042,94 +3338,72 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128: .functype shr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push71=, $17, $pop72 -; NO-SIMD128-NEXT: local.tee $push70=, $17=, $pop71 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop70 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, 255 -; NO-SIMD128-NEXT: i32.and $push3=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $17, $pop50 +; NO-SIMD128-NEXT: local.tee $push48=, $17=, $pop49 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop48 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, 255 +; NO-SIMD128-NEXT: i32.and $push3=, $15, $pop47 ; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $14, $pop46 ; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $2, $pop67 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop45 ; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, 255 -; NO-SIMD128-NEXT: i32.and $push9=, $1, $pop66 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, 255 +; NO-SIMD128-NEXT: i32.and $push9=, $12, $pop44 ; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $16, $pop65 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $11, $pop43 ; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, 255 -; NO-SIMD128-NEXT: i32.and $push15=, $15, $pop64 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $10, $pop42 +; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, 255 +; NO-SIMD128-NEXT: i32.and $push15=, $9, $pop41 ; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $14, $pop63 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $8, $pop40 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop39 ; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $13, $pop62 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $6, $pop38 +; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $5, $pop37 ; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $12, $pop61 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $4, $pop36 +; NO-SIMD128-NEXT: i32.shr_u $push26=, $pop25, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $3, $pop35 ; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $2, $pop34 +; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $1, $pop33 ; NO-SIMD128-NEXT: i32.shr_u $push32=, $pop31, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, 255 -; NO-SIMD128-NEXT: i32.and $push35=, $10, $pop59 -; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, 255 -; NO-SIMD128-NEXT: i32.and $push39=, $8, $pop58 -; NO-SIMD128-NEXT: i32.shr_u $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $7, $pop57 -; NO-SIMD128-NEXT: i32.shr_u $push44=, $pop43, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $6, $pop56 -; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $4, $pop55 -; NO-SIMD128-NEXT: i32.shr_u $push52=, $pop51, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v16i8: @@ -4137,93 +3411,71 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $17, $pop72 -; NO-SIMD128-FAST-NEXT: local.tee $push70=, $1=, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $17, $pop50 +; NO-SIMD128-FAST-NEXT: local.tee $push48=, $1=, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop48 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop47 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop46 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop45 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $5, $pop44 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push10=, $pop9, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $6, $pop43 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop65 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $7, $pop42 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop64 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $pop41 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $9, $pop40 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $11, $pop38 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $12, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $13, $pop36 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push27=, $10, $pop61 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $14, $pop35 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push28=, $pop27, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push32=, $pop31, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push37=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop59 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push39=, $13, $pop58 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push40=, $pop39, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $14, $pop57 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push44=, $pop43, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $15, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $16, $pop55 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push52=, $pop51, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -4440,123 +3692,101 @@ define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128: .functype shr_u_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop0 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop101 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $5, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop99 -; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $3, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop97 -; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $2, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop95 -; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $1, $pop94 -; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop93 -; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 15 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push17=, $16, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop91 -; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 14 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push22=, $15, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop89 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 13 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $14, $pop88 -; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop87 -; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 12 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push86=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $13, $pop86 -; NO-SIMD128-NEXT: i32.const $push85=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop85 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push39=, 11 -; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-NEXT: i32.const $push84=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $12, $pop84 -; NO-SIMD128-NEXT: i32.const $push83=, 255 -; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop83 -; NO-SIMD128-NEXT: i32.shr_u $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-NEXT: i32.const $push44=, 10 -; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-NEXT: i32.const $push82=, 255 -; NO-SIMD128-NEXT: i32.and $push42=, $11, $pop82 -; NO-SIMD128-NEXT: i32.const $push81=, 255 -; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop81 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-NEXT: i32.const $push49=, 9 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push80=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $10, $pop80 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 ; NO-SIMD128-NEXT: i32.const $push79=, 255 -; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop79 -; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push54=, 7 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop79 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push78=, 255 -; NO-SIMD128-NEXT: i32.and $push52=, $8, $pop78 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop78 ; NO-SIMD128-NEXT: i32.const $push77=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop77 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-NEXT: i32.const $push59=, 6 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop77 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push76=, 255 -; NO-SIMD128-NEXT: i32.and $push57=, $7, $pop76 +; NO-SIMD128-NEXT: i32.and $push8=, $14, $pop76 ; NO-SIMD128-NEXT: i32.const $push75=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop75 -; NO-SIMD128-NEXT: i32.shr_u $push58=, $pop57, $pop56 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.const $push64=, 5 -; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop75 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push74=, 255 -; NO-SIMD128-NEXT: i32.and $push62=, $6, $pop74 +; NO-SIMD128-NEXT: i32.and $push11=, $13, $pop74 ; NO-SIMD128-NEXT: i32.const $push73=, 255 -; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop73 -; NO-SIMD128-NEXT: i32.shr_u $push63=, $pop62, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-NEXT: i32.const $push69=, 3 -; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop73 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push67=, $4, $pop72 +; NO-SIMD128-NEXT: i32.and $push14=, $12, $pop72 ; NO-SIMD128-NEXT: i32.const $push71=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71 -; NO-SIMD128-NEXT: i32.shr_u $push68=, $pop67, $pop66 -; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop71 +; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $11, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop69 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $10, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop67 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $9, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop65 +; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push64=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $8, $pop64 +; NO-SIMD128-NEXT: i32.const $push63=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $24, $pop63 +; NO-SIMD128-NEXT: i32.shr_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.const $push62=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $7, $pop62 +; NO-SIMD128-NEXT: i32.const $push61=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $23, $pop61 +; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push60=, 255 +; NO-SIMD128-NEXT: i32.and $push32=, $6, $pop60 +; NO-SIMD128-NEXT: i32.const $push59=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $22, $pop59 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.const $push58=, 255 +; NO-SIMD128-NEXT: i32.and $push35=, $5, $pop58 +; NO-SIMD128-NEXT: i32.const $push57=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $21, $pop57 +; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $pop34 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push56=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $4, $pop56 +; NO-SIMD128-NEXT: i32.const $push55=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $20, $pop55 +; NO-SIMD128-NEXT: i32.shr_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.const $push54=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $3, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 255 +; NO-SIMD128-NEXT: i32.and $push40=, $19, $pop53 +; NO-SIMD128-NEXT: i32.shr_u $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.const $push52=, 255 +; NO-SIMD128-NEXT: i32.and $push44=, $2, $pop52 +; NO-SIMD128-NEXT: i32.const $push51=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $18, $pop51 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $1, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $17, $pop49 +; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v16i8: @@ -4564,122 +3794,100 @@ define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop79 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop99 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop97 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop95 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop93 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop91 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop89 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop87 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop86 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop85 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $pop84 -; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $26, $pop83 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push38=, $pop37, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop82 -; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop81 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $12, $pop80 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $28, $pop79 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 ; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $13, $pop78 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop78 ; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $29, $pop77 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop77 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $14, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop76 ; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $30, $pop75 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop75 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $15, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop74 ; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push61=, $31, $pop73 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop61 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop73 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 ; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push67=, $16, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop72 ; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push66=, $32, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push68=, $pop67, $pop66 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop71 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop69 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop67 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop65 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop64 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop63 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $10, $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $26, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $11, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $27, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop58 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $28, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $13, $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $29, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $14, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $30, $pop53 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push42=, $pop41, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $15, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $31, $pop51 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $16, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $32, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = lshr <16 x i8> %v, %x ret <16 x i8> %a @@ -4701,60 +3909,38 @@ define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: and_v16i8: ; NO-SIMD128: .functype and_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.and $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.and $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.and $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.and $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.and $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.and $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.and $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.and $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.and $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.and $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.and $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.and $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.and $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.and $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.and $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.and $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.and $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.and $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.and $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v16i8: @@ -4766,54 +3952,32 @@ define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = and <16 x i8> %x, %y ret <16 x i8> %a @@ -4835,60 +3999,38 @@ define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: or_v16i8: ; NO-SIMD128: .functype or_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.or $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.or $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.or $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.or $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.or $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.or $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.or $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.or $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.or $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.or $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.or $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.or $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.or $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.or $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.or $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.or $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.or $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.or $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.or $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.or $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.or $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.or $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.or $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.or $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.or $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.or $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v16i8: @@ -4900,54 +4042,32 @@ define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.or $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.or $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.or $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.or $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.or $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.or $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.or $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.or $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.or $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.or $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.or $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.or $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.or $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = or <16 x i8> %x, %y ret <16 x i8> %a @@ -4969,60 +4089,38 @@ define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: xor_v16i8: ; NO-SIMD128: .functype xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.xor $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.xor $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.xor $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.xor $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.xor $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.xor $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.xor $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.xor $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.xor $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.xor $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.xor $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.xor $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.xor $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.xor $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.xor $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.xor $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.xor $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.xor $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v16i8: @@ -5034,54 +4132,32 @@ define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, %y ret <16 x i8> %a @@ -5104,75 +4180,53 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) { ; NO-SIMD128: .functype not_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $5, $pop53 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop52 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $2, $pop51 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $1, $pop50 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, -1 -; NO-SIMD128-NEXT: i32.xor $push6=, $16, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $15, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $13, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop45 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop44 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push24=, $10, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push27=, $8, $pop42 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push30=, $7, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push40=, -1 -; NO-SIMD128-NEXT: i32.xor $push33=, $6, $pop40 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push39=, -1 -; NO-SIMD128-NEXT: i32.xor $push36=, $4, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $15, $pop31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $14, $pop30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $13, $pop29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $12, $pop28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $11, $pop27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $10, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $9, $pop25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $8, $pop24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $7, $pop23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push12=, $5, $pop21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $4, $pop20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $3, $pop19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $2, $pop18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v16i8: @@ -5181,73 +4235,51 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop31 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop30 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop50 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $9, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $10, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $11, $pop44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $12, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $13, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $14, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $15, $pop40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $16, $pop39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $7, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $9, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $10, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $11, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128: .functype andnot_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $25, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $21, $pop69 -; NO-SIMD128-NEXT: i32.and $push4=, $5, $pop3 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $19, $pop68 -; NO-SIMD128-NEXT: i32.and $push6=, $3, $pop5 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $18, $pop67 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop7 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $17, $pop66 -; NO-SIMD128-NEXT: i32.and $push10=, $1, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $32, $pop65 -; NO-SIMD128-NEXT: i32.and $push12=, $16, $pop11 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $31, $pop64 -; NO-SIMD128-NEXT: i32.and $push16=, $15, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $30, $pop63 -; NO-SIMD128-NEXT: i32.and $push20=, $14, $pop19 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, -1 -; NO-SIMD128-NEXT: i32.xor $push23=, $29, $pop62 -; NO-SIMD128-NEXT: i32.and $push24=, $13, $pop23 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, -1 -; NO-SIMD128-NEXT: i32.xor $push27=, $28, $pop61 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, -1 -; NO-SIMD128-NEXT: i32.xor $push31=, $27, $pop60 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, -1 -; NO-SIMD128-NEXT: i32.xor $push35=, $26, $pop59 -; NO-SIMD128-NEXT: i32.and $push36=, $10, $pop35 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, -1 -; NO-SIMD128-NEXT: i32.xor $push39=, $24, $pop58 -; NO-SIMD128-NEXT: i32.and $push40=, $8, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, -1 -; NO-SIMD128-NEXT: i32.xor $push43=, $23, $pop57 -; NO-SIMD128-NEXT: i32.and $push44=, $7, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, -1 -; NO-SIMD128-NEXT: i32.xor $push47=, $22, $pop56 -; NO-SIMD128-NEXT: i32.and $push48=, $6, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, -1 -; NO-SIMD128-NEXT: i32.xor $push51=, $20, $pop55 -; NO-SIMD128-NEXT: i32.and $push52=, $4, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.xor $push1=, $32, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $31, $pop47 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $30, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $14, $pop5 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $29, $pop45 +; NO-SIMD128-NEXT: i32.and $push8=, $13, $pop7 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $28, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop9 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $27, $pop43 +; NO-SIMD128-NEXT: i32.and $push12=, $11, $pop11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $26, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $10, $pop13 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $25, $pop41 +; NO-SIMD128-NEXT: i32.and $push16=, $9, $pop15 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $24, $pop40 +; NO-SIMD128-NEXT: i32.and $push18=, $8, $pop17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push19=, $23, $pop39 +; NO-SIMD128-NEXT: i32.and $push20=, $7, $pop19 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $22, $pop38 +; NO-SIMD128-NEXT: i32.and $push22=, $6, $pop21 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push23=, $21, $pop37 +; NO-SIMD128-NEXT: i32.and $push24=, $5, $pop23 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $20, $pop36 +; NO-SIMD128-NEXT: i32.and $push26=, $4, $pop25 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push27=, $19, $pop35 +; NO-SIMD128-NEXT: i32.and $push28=, $3, $pop27 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $18, $pop34 +; NO-SIMD128-NEXT: i32.and $push30=, $2, $pop29 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push31=, $17, $pop33 +; NO-SIMD128-NEXT: i32.and $push32=, $1, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v16i8: @@ -5368,88 +4378,66 @@ define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $17, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop47 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop46 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $pop67 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $21, $pop66 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $pop65 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $23, $pop64 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $24, $pop63 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $25, $pop62 -; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $26, $pop61 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $10, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $27, $pop60 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $11, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $29, $pop58 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $30, $pop57 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $31, $pop56 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $15, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $32, $pop55 -; NO-SIMD128-FAST-NEXT: i32.and $push54=, $16, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $20, $pop45 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $21, $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $22, $pop43 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $23, $pop42 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $24, $pop41 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $25, $pop40 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $10, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $12, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $29, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $13, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $30, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $31, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $15, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $32, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <16 x i8> %y, @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: bitselect_v16i8: ; NO-SIMD128: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.and $push0=, $16, $32 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $48 ; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push101=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $pop101 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $47 -; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push100=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $14, $pop100 -; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $46 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push79=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $15, $pop79 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $47 +; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push78=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $14, $pop78 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $46 +; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push77=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $13, $pop77 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $45 ; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push99=, -1 -; NO-SIMD128-NEXT: i32.xor $push20=, $13, $pop99 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $45 -; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.and $push25=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push98=, -1 -; NO-SIMD128-NEXT: i32.xor $push26=, $12, $pop98 -; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $44 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push76=, -1 +; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop76 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $44 +; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push75=, -1 +; NO-SIMD128-NEXT: i32.xor $push22=, $11, $pop75 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $43 +; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push74=, -1 +; NO-SIMD128-NEXT: i32.xor $push26=, $10, $pop74 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $42 ; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.and $push31=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push97=, -1 -; NO-SIMD128-NEXT: i32.xor $push32=, $11, $pop97 -; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $43 -; NO-SIMD128-NEXT: i32.or $push34=, $pop31, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.and $push37=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push96=, -1 -; NO-SIMD128-NEXT: i32.xor $push38=, $10, $pop96 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push73=, -1 +; NO-SIMD128-NEXT: i32.xor $push30=, $9, $pop73 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $41 +; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push33=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push72=, -1 +; NO-SIMD128-NEXT: i32.xor $push34=, $8, $pop72 +; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $40 +; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push37=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push71=, -1 +; NO-SIMD128-NEXT: i32.xor $push38=, $7, $pop71 +; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $39 ; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.and $push43=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push95=, -1 -; NO-SIMD128-NEXT: i32.xor $push44=, $9, $pop95 -; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $41 -; NO-SIMD128-NEXT: i32.or $push46=, $pop43, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.and $push47=, $8, $24 -; NO-SIMD128-NEXT: i32.const $push94=, -1 -; NO-SIMD128-NEXT: i32.xor $push48=, $8, $pop94 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $40 -; NO-SIMD128-NEXT: i32.or $push50=, $pop47, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.and $push53=, $7, $23 -; NO-SIMD128-NEXT: i32.const $push93=, -1 -; NO-SIMD128-NEXT: i32.xor $push54=, $7, $pop93 -; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push41=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push70=, -1 +; NO-SIMD128-NEXT: i32.xor $push42=, $6, $pop70 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $38 +; NO-SIMD128-NEXT: i32.or $push44=, $pop41, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.and $push45=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push69=, -1 +; NO-SIMD128-NEXT: i32.xor $push46=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $37 +; NO-SIMD128-NEXT: i32.or $push48=, $pop45, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.and $push49=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push68=, -1 +; NO-SIMD128-NEXT: i32.xor $push50=, $4, $pop68 +; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $36 +; NO-SIMD128-NEXT: i32.or $push52=, $pop49, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.and $push53=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push67=, -1 +; NO-SIMD128-NEXT: i32.xor $push54=, $3, $pop67 +; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $35 ; NO-SIMD128-NEXT: i32.or $push56=, $pop53, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.and $push59=, $6, $22 -; NO-SIMD128-NEXT: i32.const $push92=, -1 -; NO-SIMD128-NEXT: i32.xor $push60=, $6, $pop92 -; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $38 -; NO-SIMD128-NEXT: i32.or $push62=, $pop59, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.and $push65=, $5, $21 -; NO-SIMD128-NEXT: i32.const $push91=, -1 -; NO-SIMD128-NEXT: i32.xor $push66=, $5, $pop91 -; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $37 -; NO-SIMD128-NEXT: i32.or $push68=, $pop65, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-NEXT: i32.and $push69=, $4, $20 -; NO-SIMD128-NEXT: i32.const $push90=, -1 -; NO-SIMD128-NEXT: i32.xor $push70=, $4, $pop90 -; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $36 -; NO-SIMD128-NEXT: i32.or $push72=, $pop69, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-NEXT: i32.and $push75=, $3, $19 -; NO-SIMD128-NEXT: i32.const $push89=, -1 -; NO-SIMD128-NEXT: i32.xor $push76=, $3, $pop89 -; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $35 -; NO-SIMD128-NEXT: i32.or $push78=, $pop75, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 -; NO-SIMD128-NEXT: i32.and $push79=, $2, $18 -; NO-SIMD128-NEXT: i32.const $push88=, -1 -; NO-SIMD128-NEXT: i32.xor $push80=, $2, $pop88 -; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $34 -; NO-SIMD128-NEXT: i32.or $push82=, $pop79, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 -; NO-SIMD128-NEXT: i32.and $push83=, $1, $17 -; NO-SIMD128-NEXT: i32.const $push87=, -1 -; NO-SIMD128-NEXT: i32.xor $push84=, $1, $pop87 -; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $33 -; NO-SIMD128-NEXT: i32.or $push86=, $pop83, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.and $push57=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push66=, -1 +; NO-SIMD128-NEXT: i32.xor $push58=, $2, $pop66 +; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $34 +; NO-SIMD128-NEXT: i32.or $push60=, $pop57, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.and $push61=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push65=, -1 +; NO-SIMD128-NEXT: i32.xor $push62=, $1, $pop65 +; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $33 +; NO-SIMD128-NEXT: i32.or $push64=, $pop61, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v16i8: @@ -5607,117 +4573,95 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop79 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $34 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop78 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $35 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop77 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $36 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop98 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $37 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop97 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $38 -; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop96 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $37 +; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop75 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $38 +; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $39 +; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop73 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $40 ; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop95 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $40 -; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $9, $pop94 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $9, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $41 +; NO-SIMD128-FAST-NEXT: i32.or $push36=, $pop33, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $10, $pop71 +; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $42 +; NO-SIMD128-FAST-NEXT: i32.or $push40=, $pop37, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $11, $pop70 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $43 ; NO-SIMD128-FAST-NEXT: i32.or $push44=, $pop41, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $10, $pop93 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop69 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $44 ; NO-SIMD128-FAST-NEXT: i32.or $push48=, $pop45, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $11, $pop92 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $43 -; NO-SIMD128-FAST-NEXT: i32.or $push54=, $pop51, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $12, $pop91 -; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $13, $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $45 +; NO-SIMD128-FAST-NEXT: i32.or $push52=, $pop49, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $14, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $46 +; NO-SIMD128-FAST-NEXT: i32.or $push56=, $pop53, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $15, $pop66 +; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $47 ; NO-SIMD128-FAST-NEXT: i32.or $push60=, $pop57, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $13, $pop90 -; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $45 -; NO-SIMD128-FAST-NEXT: i32.or $push66=, $pop63, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $14, $pop89 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $46 -; NO-SIMD128-FAST-NEXT: i32.or $push72=, $pop69, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $15, $pop88 -; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $47 -; NO-SIMD128-FAST-NEXT: i32.or $push78=, $pop75, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $16, $pop87 -; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $48 -; NO-SIMD128-FAST-NEXT: i32.or $push84=, $pop81, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $16, $pop65 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $48 +; NO-SIMD128-FAST-NEXT: i32.or $push64=, $pop61, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <16 x i8> %c, %v1 %inv_mask = xor <16 x i8> %c, @@ -5746,92 +4690,70 @@ define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v16i8: ; NO-SIMD128: .functype bitselect_xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 15 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $32, $48 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $16 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $48 -; NO-SIMD128-NEXT: i32.store8 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push5=, $31, $47 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $15 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $47 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46 -; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $14 -; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 12 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.xor $push15=, $29, $45 -; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $13 -; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $45 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push23=, 11 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.xor $push20=, $28, $44 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $12 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $44 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push28=, 10 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.xor $push25=, $27, $43 -; NO-SIMD128-NEXT: i32.and $push26=, $pop25, $11 -; NO-SIMD128-NEXT: i32.xor $push27=, $pop26, $43 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push33=, 9 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.xor $push30=, $26, $42 -; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $10 -; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $42 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.xor $push35=, $25, $41 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $9 -; NO-SIMD128-NEXT: i32.xor $push37=, $pop36, $41 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop37 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.xor $push38=, $24, $40 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $8 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $40 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push46=, 6 -; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-NEXT: i32.xor $push43=, $23, $39 -; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $7 -; NO-SIMD128-NEXT: i32.xor $push45=, $pop44, $39 -; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.xor $push48=, $22, $38 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $6 -; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $38 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.xor $push53=, $21, $37 -; NO-SIMD128-NEXT: i32.and $push54=, $pop53, $5 -; NO-SIMD128-NEXT: i32.xor $push55=, $pop54, $37 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop55 -; NO-SIMD128-NEXT: i32.const $push59=, 3 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.xor $push56=, $20, $36 -; NO-SIMD128-NEXT: i32.and $push57=, $pop56, $4 -; NO-SIMD128-NEXT: i32.xor $push58=, $pop57, $36 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.xor $push61=, $19, $35 -; NO-SIMD128-NEXT: i32.and $push62=, $pop61, $3 -; NO-SIMD128-NEXT: i32.xor $push63=, $pop62, $35 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop63 -; NO-SIMD128-NEXT: i32.xor $push64=, $18, $34 -; NO-SIMD128-NEXT: i32.and $push65=, $pop64, $2 -; NO-SIMD128-NEXT: i32.xor $push66=, $pop65, $34 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop66 -; NO-SIMD128-NEXT: i32.xor $push67=, $17, $33 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $1 -; NO-SIMD128-NEXT: i32.xor $push69=, $pop68, $33 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop69 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $31, $47 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $15 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $47 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $30, $46 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $14 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $46 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $29, $45 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $13 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $45 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $28, $44 +; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $12 +; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $44 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $27, $43 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $11 +; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $43 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop17 +; NO-SIMD128-NEXT: i32.xor $push18=, $26, $42 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $10 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push21=, $25, $41 +; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $9 +; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $41 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop23 +; NO-SIMD128-NEXT: i32.xor $push24=, $24, $40 +; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $8 +; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $40 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop26 +; NO-SIMD128-NEXT: i32.xor $push27=, $23, $39 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $7 +; NO-SIMD128-NEXT: i32.xor $push29=, $pop28, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-NEXT: i32.xor $push30=, $22, $38 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $6 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $38 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop32 +; NO-SIMD128-NEXT: i32.xor $push33=, $21, $37 +; NO-SIMD128-NEXT: i32.and $push34=, $pop33, $5 +; NO-SIMD128-NEXT: i32.xor $push35=, $pop34, $37 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop35 +; NO-SIMD128-NEXT: i32.xor $push36=, $20, $36 +; NO-SIMD128-NEXT: i32.and $push37=, $pop36, $4 +; NO-SIMD128-NEXT: i32.xor $push38=, $pop37, $36 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop38 +; NO-SIMD128-NEXT: i32.xor $push39=, $19, $35 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $3 +; NO-SIMD128-NEXT: i32.xor $push41=, $pop40, $35 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop41 +; NO-SIMD128-NEXT: i32.xor $push42=, $18, $34 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $2 +; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $34 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop44 +; NO-SIMD128-NEXT: i32.xor $push45=, $17, $33 +; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $1 +; NO-SIMD128-NEXT: i32.xor $push47=, $pop46, $33 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop47 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v16i8: @@ -5849,80 +4771,58 @@ define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $35 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $20, $36 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $21, $37 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $37 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $22, $38 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $23, $39 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $24, $40 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop31 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $25, $41 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $9 -; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $pop33, $41 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $26, $42 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $10 -; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $pop38, $42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $11 -; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop41), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $28, $44 -; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $12 -; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $pop48, $44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $29, $45 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $13 -; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop51), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $30, $46 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $pop57, $14 -; NO-SIMD128-FAST-NEXT: i32.xor $push59=, $pop58, $46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $31, $47 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $15 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop64 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $0, $pop65 -; NO-SIMD128-FAST-NEXT: i32.xor $push67=, $32, $48 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $16 -; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $pop68, $48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop66), $pop69 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $36 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $36 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $21, $37 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $37 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $38 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $38 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $23, $39 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $24, $40 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $40 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $25, $41 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $9 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $26, $42 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $10 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $pop28, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $27, $43 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $11 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $28, $44 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $12 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $pop34, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $29, $45 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $13 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $45 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $30, $46 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $14 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $pop40, $46 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $31, $47 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $15 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $47 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $32, $48 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $pop45, $16 +; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $pop46, $48 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop47 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <16 x i8> %v1, %v2 %and = and <16 x i8> %xor1, %c @@ -5949,124 +4849,102 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 ; NO-SIMD128-LABEL: bitselect_xor_reversed_v16i8: ; NO-SIMD128: .functype bitselect_xor_reversed_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $32, $48 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $48 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push8=, $31, $47 -; NO-SIMD128-NEXT: i32.const $push101=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop101 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $47 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.xor $push14=, $30, $46 -; NO-SIMD128-NEXT: i32.const $push100=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $14, $pop100 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $31, $47 +; NO-SIMD128-NEXT: i32.const $push79=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $15, $pop79 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $47 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46 +; NO-SIMD128-NEXT: i32.const $push78=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $14, $pop78 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $29, $45 +; NO-SIMD128-NEXT: i32.const $push77=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $13, $pop77 ; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $46 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.xor $push20=, $29, $45 -; NO-SIMD128-NEXT: i32.const $push99=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $13, $pop99 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $45 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.xor $push26=, $28, $44 -; NO-SIMD128-NEXT: i32.const $push98=, -1 -; NO-SIMD128-NEXT: i32.xor $push25=, $12, $pop98 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $45 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 +; NO-SIMD128-NEXT: i32.xor $push18=, $28, $44 +; NO-SIMD128-NEXT: i32.const $push76=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $12, $pop76 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $44 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push22=, $27, $43 +; NO-SIMD128-NEXT: i32.const $push75=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop75 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.xor $push24=, $pop23, $43 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.xor $push26=, $26, $42 +; NO-SIMD128-NEXT: i32.const $push74=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $10, $pop74 ; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $44 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.xor $push32=, $27, $43 -; NO-SIMD128-NEXT: i32.const $push97=, -1 -; NO-SIMD128-NEXT: i32.xor $push31=, $11, $pop97 -; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.xor $push34=, $pop33, $43 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.xor $push38=, $26, $42 -; NO-SIMD128-NEXT: i32.const $push96=, -1 -; NO-SIMD128-NEXT: i32.xor $push37=, $10, $pop96 +; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.xor $push30=, $25, $41 +; NO-SIMD128-NEXT: i32.const $push73=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $9, $pop73 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $41 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.xor $push34=, $24, $40 +; NO-SIMD128-NEXT: i32.const $push72=, -1 +; NO-SIMD128-NEXT: i32.xor $push33=, $8, $pop72 +; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $40 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.xor $push38=, $23, $39 +; NO-SIMD128-NEXT: i32.const $push71=, -1 +; NO-SIMD128-NEXT: i32.xor $push37=, $7, $pop71 ; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $42 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.xor $push44=, $25, $41 -; NO-SIMD128-NEXT: i32.const $push95=, -1 -; NO-SIMD128-NEXT: i32.xor $push43=, $9, $pop95 -; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.xor $push46=, $pop45, $41 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.xor $push48=, $24, $40 -; NO-SIMD128-NEXT: i32.const $push94=, -1 -; NO-SIMD128-NEXT: i32.xor $push47=, $8, $pop94 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $40 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.xor $push54=, $23, $39 -; NO-SIMD128-NEXT: i32.const $push93=, -1 -; NO-SIMD128-NEXT: i32.xor $push53=, $7, $pop93 +; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.xor $push42=, $22, $38 +; NO-SIMD128-NEXT: i32.const $push70=, -1 +; NO-SIMD128-NEXT: i32.xor $push41=, $6, $pop70 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $38 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.xor $push46=, $21, $37 +; NO-SIMD128-NEXT: i32.const $push69=, -1 +; NO-SIMD128-NEXT: i32.xor $push45=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.xor $push48=, $pop47, $37 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.xor $push50=, $20, $36 +; NO-SIMD128-NEXT: i32.const $push68=, -1 +; NO-SIMD128-NEXT: i32.xor $push49=, $4, $pop68 +; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.xor $push52=, $pop51, $36 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.xor $push54=, $19, $35 +; NO-SIMD128-NEXT: i32.const $push67=, -1 +; NO-SIMD128-NEXT: i32.xor $push53=, $3, $pop67 ; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $39 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.xor $push60=, $22, $38 -; NO-SIMD128-NEXT: i32.const $push92=, -1 -; NO-SIMD128-NEXT: i32.xor $push59=, $6, $pop92 -; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.xor $push62=, $pop61, $38 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.xor $push66=, $21, $37 -; NO-SIMD128-NEXT: i32.const $push91=, -1 -; NO-SIMD128-NEXT: i32.xor $push65=, $5, $pop91 -; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.xor $push68=, $pop67, $37 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-NEXT: i32.xor $push70=, $20, $36 -; NO-SIMD128-NEXT: i32.const $push90=, -1 -; NO-SIMD128-NEXT: i32.xor $push69=, $4, $pop90 -; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.xor $push72=, $pop71, $36 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-NEXT: i32.xor $push76=, $19, $35 -; NO-SIMD128-NEXT: i32.const $push89=, -1 -; NO-SIMD128-NEXT: i32.xor $push75=, $3, $pop89 -; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.xor $push78=, $pop77, $35 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 -; NO-SIMD128-NEXT: i32.xor $push80=, $18, $34 -; NO-SIMD128-NEXT: i32.const $push88=, -1 -; NO-SIMD128-NEXT: i32.xor $push79=, $2, $pop88 -; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.xor $push82=, $pop81, $34 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 -; NO-SIMD128-NEXT: i32.xor $push84=, $17, $33 -; NO-SIMD128-NEXT: i32.const $push87=, -1 -; NO-SIMD128-NEXT: i32.xor $push83=, $1, $pop87 -; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.xor $push86=, $pop85, $33 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $35 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.xor $push58=, $18, $34 +; NO-SIMD128-NEXT: i32.const $push66=, -1 +; NO-SIMD128-NEXT: i32.xor $push57=, $2, $pop66 +; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.xor $push60=, $pop59, $34 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.xor $push62=, $17, $33 +; NO-SIMD128-NEXT: i32.const $push65=, -1 +; NO-SIMD128-NEXT: i32.xor $push61=, $1, $pop65 +; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.xor $push64=, $pop63, $33 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v16i8: @@ -6079,117 +4957,95 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $33 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $18, $34 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop79 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $34 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $19, $35 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop78 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $35 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $20, $36 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop77 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $21, $37 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop98 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $37 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $22, $38 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop97 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $23, $39 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $21, $37 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $5, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $37 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $22, $38 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $6, $pop75 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $pop23, $38 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $23, $39 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $7, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $pop27, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $24, $40 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop73 ; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $24, $40 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop95 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $25, $41 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $9, $pop94 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $40 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $25, $41 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $pop35, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $26, $42 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $10, $pop71 +; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.xor $push40=, $pop39, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop70 ; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $41 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $26, $42 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $10, $pop93 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $28, $44 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $12, $pop69 ; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $27, $43 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $11, $pop92 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $28, $44 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $12, $pop91 +; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $29, $45 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $13, $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $pop51, $45 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $30, $46 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $14, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $pop55, $46 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $31, $47 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $15, $pop66 ; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $29, $45 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push63=, $13, $pop90 -; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $pop65, $45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $30, $46 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $14, $pop89 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.xor $push72=, $pop71, $46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $31, $47 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push75=, $15, $pop88 -; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.xor $push78=, $pop77, $47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $32, $48 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push81=, $16, $pop87 -; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.xor $push84=, $pop83, $48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $47 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $32, $48 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $16, $pop65 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $48 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <16 x i8> %v1, %v2 %notc = xor <16 x i8> %c, @add_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: add_v8i16: ; NO-SIMD128: .functype add_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.add $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.add $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.add $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.add $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.add $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v8i16: @@ -6253,24 +5101,16 @@ define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y ret <8 x i16> %a @@ -6292,30 +5132,22 @@ define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: sub_v8i16: ; NO-SIMD128: .functype sub_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.sub $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.sub $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.sub $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.sub $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.sub $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.sub $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.sub $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.sub $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.sub $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.sub $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v8i16: @@ -6327,24 +5159,16 @@ define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> %x, %y ret <8 x i16> %a @@ -6366,30 +5190,22 @@ define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: mul_v8i16: ; NO-SIMD128: .functype mul_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.mul $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.mul $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.mul $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.mul $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.mul $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.mul $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.mul $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.mul $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.mul $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.mul $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v8i16: @@ -6401,24 +5217,16 @@ define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.mul $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = mul <8 x i16> %x, %y ret <8 x i16> %a @@ -6440,54 +5248,46 @@ define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: min_s_v8i16: ; NO-SIMD128: .functype min_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 12 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15 -; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8 -; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 10 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $15 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $15, $pop6 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $14 +; NO-SIMD128-NEXT: i32.lt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $14, $pop10 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop11 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push12=, $13 ; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-NEXT: i32.const $push26=, 6 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12 -; NO-SIMD128-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $13, $pop14 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $12 +; NO-SIMD128-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $4, $12, $pop18 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop19 +; NO-SIMD128-NEXT: i32.extend16_s $push21=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $11 +; NO-SIMD128-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $3, $11, $pop22 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop23 +; NO-SIMD128-NEXT: i32.extend16_s $push25=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push24=, $10 +; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-NEXT: i32.select $push27=, $2, $10, $pop26 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop27 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push28=, $9 ; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10 -; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32 -; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop35 -; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9 -; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop39 +; NO-SIMD128-NEXT: i32.select $push31=, $1, $9, $pop30 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v8i16: @@ -6508,39 +5308,31 @@ define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $13 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $13, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $14 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $14, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push24=, $15 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $16 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $16, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop31 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6563,70 +5355,62 @@ define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: min_u_v8i16: ; NO-SIMD128: .functype min_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 -; NO-SIMD128-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop47 ; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54 -; NO-SIMD128-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53 -; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52 -; NO-SIMD128-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51 -; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49 -; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47 -; NO-SIMD128-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $7, $pop46 ; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45 -; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop45 +; NO-SIMD128-NEXT: i32.lt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $7, $15, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $pop44 ; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43 -; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $pop43 +; NO-SIMD128-NEXT: i32.lt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $6, $14, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $5, $pop42 ; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41 -; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $pop41 +; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $5, $13, $pop15 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $4, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $pop39 +; NO-SIMD128-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $4, $12, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $3, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $pop37 +; NO-SIMD128-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $3, $11, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-NEXT: i32.and $push26=, $2, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $pop35 +; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $2, $10, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push30=, $1, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $pop33 +; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $1, $9, $pop31 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v8i16: @@ -6634,68 +5418,60 @@ define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop45 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop43 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop41 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $pop39 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $13, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $pop37 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $14, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop35 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6718,54 +5494,46 @@ define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: max_s_v8i16: ; NO-SIMD128: .functype max_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 12 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15 -; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8 -; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 10 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $15 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $15, $pop6 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $14 +; NO-SIMD128-NEXT: i32.gt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $14, $pop10 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop11 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push12=, $13 ; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-NEXT: i32.const $push26=, 6 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12 -; NO-SIMD128-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $13, $pop14 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $12 +; NO-SIMD128-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $4, $12, $pop18 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop19 +; NO-SIMD128-NEXT: i32.extend16_s $push21=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $11 +; NO-SIMD128-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $3, $11, $pop22 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop23 +; NO-SIMD128-NEXT: i32.extend16_s $push25=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push24=, $10 +; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-NEXT: i32.select $push27=, $2, $10, $pop26 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop27 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push28=, $9 ; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10 -; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32 -; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop35 -; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9 -; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop39 +; NO-SIMD128-NEXT: i32.select $push31=, $1, $9, $pop30 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v8i16: @@ -6786,39 +5554,31 @@ define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $13 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $13, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $14 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $14, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push24=, $15 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $16 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $16, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop31 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6841,70 +5601,62 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: max_u_v8i16: ; NO-SIMD128: .functype max_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 -; NO-SIMD128-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop47 ; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54 -; NO-SIMD128-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53 -; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52 -; NO-SIMD128-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51 -; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49 -; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47 -; NO-SIMD128-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $7, $pop46 ; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45 -; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop45 +; NO-SIMD128-NEXT: i32.gt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $7, $15, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $pop44 ; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43 -; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $pop43 +; NO-SIMD128-NEXT: i32.gt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $6, $14, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $5, $pop42 ; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41 -; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $pop41 +; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $5, $13, $pop15 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $4, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $pop39 +; NO-SIMD128-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $4, $12, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $3, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $pop37 +; NO-SIMD128-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $3, $11, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-NEXT: i32.and $push26=, $2, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $pop35 +; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $2, $10, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push30=, $1, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $pop33 +; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $1, $9, $pop31 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v8i16: @@ -6912,68 +5664,60 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop45 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop43 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop41 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $pop39 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $13, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $pop37 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $14, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop35 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6996,78 +5740,70 @@ define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: avgr_u_v8i16: ; NO-SIMD128: .functype avgr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $8, $16 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 65534 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push63=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $7, $15 -; NO-SIMD128-NEXT: i32.const $push62=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62 -; NO-SIMD128-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61 -; NO-SIMD128-NEXT: i32.const $push60=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $6, $14 -; NO-SIMD128-NEXT: i32.const $push59=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59 -; NO-SIMD128-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58 -; NO-SIMD128-NEXT: i32.const $push57=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-NEXT: i32.const $push56=, 1 -; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56 -; NO-SIMD128-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 65534 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push55=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop55 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $7, $15 ; NO-SIMD128-NEXT: i32.const $push54=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-NEXT: i32.const $push24=, 6 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push26=, $4, $12 -; NO-SIMD128-NEXT: i32.const $push53=, 1 -; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53 -; NO-SIMD128-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-NEXT: i32.const $push52=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop52 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 ; NO-SIMD128-NEXT: i32.const $push51=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51 -; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-NEXT: i32.add $push30=, $3, $11 -; NO-SIMD128-NEXT: i32.const $push50=, 1 -; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $5, $13 ; NO-SIMD128-NEXT: i32.const $push48=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop33 -; NO-SIMD128-NEXT: i32.add $push34=, $2, $10 -; NO-SIMD128-NEXT: i32.const $push47=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47 -; NO-SIMD128-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-NEXT: i32.const $push46=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $4, $12 ; NO-SIMD128-NEXT: i32.const $push45=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop37 -; NO-SIMD128-NEXT: i32.add $push38=, $1, $9 -; NO-SIMD128-NEXT: i32.const $push44=, 1 -; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $3, $11 ; NO-SIMD128-NEXT: i32.const $push42=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-NEXT: i32.const $push40=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $2, $10 +; NO-SIMD128-NEXT: i32.const $push39=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $1, $9 +; NO-SIMD128-NEXT: i32.const $push36=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-NEXT: i32.const $push34=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop33 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v8i16: @@ -7078,73 +5814,65 @@ define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop55 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop52 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52 ; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $12 ; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $13 ; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $14 ; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop33 ; NO-SIMD128-FAST-NEXT: return %a = add nuw <8 x i16> %x, %y %b = add nuw <8 x i16> %a, @@ -7176,78 +5904,70 @@ define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: avgr_u_v8i16_wrap: ; NO-SIMD128: .functype avgr_u_v8i16_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $8, $16 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 65534 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push63=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $7, $15 -; NO-SIMD128-NEXT: i32.const $push62=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62 -; NO-SIMD128-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61 -; NO-SIMD128-NEXT: i32.const $push60=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $6, $14 -; NO-SIMD128-NEXT: i32.const $push59=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59 -; NO-SIMD128-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58 -; NO-SIMD128-NEXT: i32.const $push57=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-NEXT: i32.const $push56=, 1 -; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56 -; NO-SIMD128-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 65534 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push55=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop55 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $7, $15 ; NO-SIMD128-NEXT: i32.const $push54=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-NEXT: i32.const $push24=, 6 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push26=, $4, $12 -; NO-SIMD128-NEXT: i32.const $push53=, 1 -; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53 -; NO-SIMD128-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-NEXT: i32.const $push52=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop52 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 ; NO-SIMD128-NEXT: i32.const $push51=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51 -; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-NEXT: i32.add $push30=, $3, $11 -; NO-SIMD128-NEXT: i32.const $push50=, 1 -; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $5, $13 ; NO-SIMD128-NEXT: i32.const $push48=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop33 -; NO-SIMD128-NEXT: i32.add $push34=, $2, $10 -; NO-SIMD128-NEXT: i32.const $push47=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47 -; NO-SIMD128-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-NEXT: i32.const $push46=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $4, $12 ; NO-SIMD128-NEXT: i32.const $push45=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop37 -; NO-SIMD128-NEXT: i32.add $push38=, $1, $9 -; NO-SIMD128-NEXT: i32.const $push44=, 1 -; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $3, $11 ; NO-SIMD128-NEXT: i32.const $push42=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-NEXT: i32.const $push40=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $2, $10 +; NO-SIMD128-NEXT: i32.const $push39=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $1, $9 +; NO-SIMD128-NEXT: i32.const $push36=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-NEXT: i32.const $push34=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop33 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v8i16_wrap: @@ -7258,73 +5978,65 @@ define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop55 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop52 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52 ; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $12 ; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $13 ; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $14 ; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop33 ; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y %b = add <8 x i16> %a, @@ -7348,70 +6060,62 @@ define <8 x i16> @abs_v8i16(<8 x i16> %x) { ; NO-SIMD128-LABEL: abs_v8i16: ; NO-SIMD128: .functype abs_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $8 ; NO-SIMD128-NEXT: i32.const $push1=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push55=, $pop0, $pop1 -; NO-SIMD128-NEXT: local.tee $push54=, $9=, $pop55 -; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop54 +; NO-SIMD128-NEXT: i32.shr_s $push47=, $pop0, $pop1 +; NO-SIMD128-NEXT: local.tee $push46=, $9=, $pop47 +; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop46 ; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $7 -; NO-SIMD128-NEXT: i32.const $push53=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop6, $pop53 -; NO-SIMD128-NEXT: local.tee $push51=, $8=, $pop52 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop51 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.extend16_s $push11=, $6 -; NO-SIMD128-NEXT: i32.const $push50=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push49=, $pop11, $pop50 -; NO-SIMD128-NEXT: local.tee $push48=, $8=, $pop49 -; NO-SIMD128-NEXT: i32.xor $push12=, $6, $pop48 -; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 -; NO-SIMD128-NEXT: i32.extend16_s $push16=, $5 -; NO-SIMD128-NEXT: i32.const $push47=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push46=, $pop16, $pop47 -; NO-SIMD128-NEXT: local.tee $push45=, $8=, $pop46 -; NO-SIMD128-NEXT: i32.xor $push17=, $5, $pop45 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $7 +; NO-SIMD128-NEXT: i32.const $push45=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop4, $pop45 +; NO-SIMD128-NEXT: local.tee $push43=, $8=, $pop44 +; NO-SIMD128-NEXT: i32.xor $push5=, $7, $pop43 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $6 +; NO-SIMD128-NEXT: i32.const $push42=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push41=, $pop7, $pop42 +; NO-SIMD128-NEXT: local.tee $push40=, $8=, $pop41 +; NO-SIMD128-NEXT: i32.xor $push8=, $6, $pop40 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop8, $8 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $5 +; NO-SIMD128-NEXT: i32.const $push39=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop10, $pop39 +; NO-SIMD128-NEXT: local.tee $push37=, $8=, $pop38 +; NO-SIMD128-NEXT: i32.xor $push11=, $5, $pop37 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop11, $8 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $4 +; NO-SIMD128-NEXT: i32.const $push36=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push35=, $pop13, $pop36 +; NO-SIMD128-NEXT: local.tee $push34=, $8=, $pop35 +; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop34 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop14, $8 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $3 +; NO-SIMD128-NEXT: i32.const $push33=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop16, $pop33 +; NO-SIMD128-NEXT: local.tee $push31=, $8=, $pop32 +; NO-SIMD128-NEXT: i32.xor $push17=, $3, $pop31 ; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $8 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 6 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $4 -; NO-SIMD128-NEXT: i32.const $push44=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop19, $pop44 -; NO-SIMD128-NEXT: local.tee $push42=, $8=, $pop43 -; NO-SIMD128-NEXT: i32.xor $push20=, $4, $pop42 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.extend16_s $push19=, $2 +; NO-SIMD128-NEXT: i32.const $push30=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push29=, $pop19, $pop30 +; NO-SIMD128-NEXT: local.tee $push28=, $8=, $pop29 +; NO-SIMD128-NEXT: i32.xor $push20=, $2, $pop28 ; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.extend16_s $push24=, $3 -; NO-SIMD128-NEXT: i32.const $push41=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop24, $pop41 -; NO-SIMD128-NEXT: local.tee $push39=, $8=, $pop40 -; NO-SIMD128-NEXT: i32.xor $push25=, $3, $pop39 -; NO-SIMD128-NEXT: i32.sub $push26=, $pop25, $8 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push27=, $2 -; NO-SIMD128-NEXT: i32.const $push38=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push37=, $pop27, $pop38 -; NO-SIMD128-NEXT: local.tee $push36=, $8=, $pop37 -; NO-SIMD128-NEXT: i32.xor $push28=, $2, $pop36 -; NO-SIMD128-NEXT: i32.sub $push29=, $pop28, $8 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 -; NO-SIMD128-NEXT: i32.extend16_s $push30=, $1 -; NO-SIMD128-NEXT: i32.const $push35=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push34=, $pop30, $pop35 -; NO-SIMD128-NEXT: local.tee $push33=, $8=, $pop34 -; NO-SIMD128-NEXT: i32.xor $push31=, $1, $pop33 -; NO-SIMD128-NEXT: i32.sub $push32=, $pop31, $8 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.extend16_s $push22=, $1 +; NO-SIMD128-NEXT: i32.const $push27=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push26=, $pop22, $pop27 +; NO-SIMD128-NEXT: local.tee $push25=, $8=, $pop26 +; NO-SIMD128-NEXT: i32.xor $push23=, $1, $pop25 +; NO-SIMD128-NEXT: i32.sub $push24=, $pop23, $8 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v8i16: @@ -7419,68 +6123,60 @@ define <8 x i16> @abs_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push1=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop0, $pop1 -; NO-SIMD128-FAST-NEXT: local.tee $push54=, $9=, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop54 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push47=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: local.tee $push46=, $9=, $pop47 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop46 ; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push52=, $pop4, $pop53 -; NO-SIMD128-FAST-NEXT: local.tee $push51=, $1=, $pop52 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push44=, $pop4, $pop45 +; NO-SIMD128-FAST-NEXT: local.tee $push43=, $1=, $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop43 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push49=, $pop7, $pop50 -; NO-SIMD128-FAST-NEXT: local.tee $push48=, $2=, $pop49 -; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push41=, $pop7, $pop42 +; NO-SIMD128-FAST-NEXT: local.tee $push40=, $2=, $pop41 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop40 ; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop10, $pop47 -; NO-SIMD128-FAST-NEXT: local.tee $push45=, $3=, $pop46 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop10, $pop39 +; NO-SIMD128-FAST-NEXT: local.tee $push37=, $3=, $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push43=, $pop15, $pop44 -; NO-SIMD128-FAST-NEXT: local.tee $push42=, $4=, $pop43 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop42 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop18, $pop41 -; NO-SIMD128-FAST-NEXT: local.tee $push39=, $5=, $pop40 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop39 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push37=, $pop23, $pop38 -; NO-SIMD128-FAST-NEXT: local.tee $push36=, $6=, $pop37 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop36 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop28, $pop35 -; NO-SIMD128-FAST-NEXT: local.tee $push33=, $0=, $pop34 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33 -; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $0 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop13, $pop36 +; NO-SIMD128-FAST-NEXT: local.tee $push34=, $4=, $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $5, $pop34 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop14, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop16, $pop33 +; NO-SIMD128-FAST-NEXT: local.tee $push31=, $5=, $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $6, $pop31 +; NO-SIMD128-FAST-NEXT: i32.sub $push18=, $pop17, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push29=, $pop19, $pop30 +; NO-SIMD128-FAST-NEXT: local.tee $push28=, $6=, $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.sub $push21=, $pop20, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop22, $pop27 +; NO-SIMD128-FAST-NEXT: local.tee $push25=, $7=, $pop26 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.sub $push24=, $pop23, $7 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> zeroinitializer, %x %b = icmp slt <8 x i16> %x, zeroinitializer @@ -7505,37 +6201,29 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) { ; NO-SIMD128: .functype neg_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $5 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop23, $3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop22, $2 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop21, $1 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, 0 -; NO-SIMD128-NEXT: i32.sub $push5=, $pop20, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, 0 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop19, $7 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 0 -; NO-SIMD128-NEXT: i32.sub $push11=, $pop18, $6 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, 0 -; NO-SIMD128-NEXT: i32.sub $push14=, $pop17, $4 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $8 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop15, $7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop14, $6 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop13, $5 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, 0 +; NO-SIMD128-NEXT: i32.sub $push5=, $pop12, $4 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, 0 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop11, $3 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 0 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop10, $2 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, 0 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop9, $1 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v8i16: @@ -7544,35 +6232,27 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop23, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop15, $2 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop22, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop14, $3 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop21, $4 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop20, $5 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop18, $7 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $8 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop13, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop11, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop10, $7 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop9, $8 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> , %x @@ -7596,64 +6276,48 @@ define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0 -; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18 -; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop17 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.shl $push3=, $2, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.shl $push4=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.shl $push5=, $8, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.shl $push8=, $7, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.shl $push11=, $6, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.shl $push14=, $4, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop0 +; NO-SIMD128-NEXT: local.tee $push9=, $9=, $pop10 +; NO-SIMD128-NEXT: i32.shl $push1=, $8, $pop9 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $7, $9 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $6, $9 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.shl $push4=, $5, $9 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.shl $push5=, $4, $9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.shl $push6=, $3, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.shl $push7=, $2, $9 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.shl $push8=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v8i16: ; NO-SIMD128-FAST: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push17=, $9=, $pop18 -; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push9=, $9=, $pop10 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop9 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -7681,37 +6345,29 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { ; NO-SIMD128: .functype shl_const_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop23 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop22 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, 5 -; NO-SIMD128-NEXT: i32.shl $push5=, $8, $pop20 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, 5 -; NO-SIMD128-NEXT: i32.shl $push8=, $7, $pop19 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, 5 -; NO-SIMD128-NEXT: i32.shl $push14=, $4, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.shl $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $7, $pop15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $6, $pop14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, 5 +; NO-SIMD128-NEXT: i32.shl $push5=, $4, $pop12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, 5 +; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 5 +; NO-SIMD128-NEXT: i32.shl $push7=, $2, $pop10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, 5 +; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v8i16: @@ -7720,35 +6376,27 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop14 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop17 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, @@ -7866,45 +6514,37 @@ define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128: .functype shl_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-NEXT: i32.and $push3=, $11, $pop31 -; NO-SIMD128-NEXT: i32.shl $push4=, $3, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $10, $pop30 -; NO-SIMD128-NEXT: i32.shl $push6=, $2, $pop5 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop29 -; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop7 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-NEXT: i32.and $push9=, $16, $pop28 -; NO-SIMD128-NEXT: i32.shl $push10=, $8, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $15, $pop27 -; NO-SIMD128-NEXT: i32.shl $push14=, $7, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-NEXT: i32.and $push17=, $14, $pop26 -; NO-SIMD128-NEXT: i32.shl $push18=, $6, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-NEXT: i32.and $push21=, $12, $pop25 -; NO-SIMD128-NEXT: i32.shl $push22=, $4, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.shl $push2=, $8, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-NEXT: i32.and $push3=, $15, $pop23 +; NO-SIMD128-NEXT: i32.shl $push4=, $7, $pop3 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $14, $pop22 +; NO-SIMD128-NEXT: i32.shl $push6=, $6, $pop5 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop21 +; NO-SIMD128-NEXT: i32.shl $push8=, $5, $pop7 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-NEXT: i32.and $push9=, $12, $pop20 +; NO-SIMD128-NEXT: i32.shl $push10=, $4, $pop9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $11, $pop19 +; NO-SIMD128-NEXT: i32.shl $push12=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $10, $pop18 +; NO-SIMD128-NEXT: i32.shl $push14=, $2, $pop13 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-NEXT: i32.and $push15=, $9, $pop17 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v8i16: @@ -7914,42 +6554,34 @@ define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop0 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop23 ; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop22 ; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $12, $pop29 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $13, $pop28 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $14, $pop27 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop25 -; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, %x ret <8 x i16> %a @@ -7971,41 +6603,33 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: shr_s_v8i16: ; NO-SIMD128: .functype shr_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push26=, $9, $pop0 -; NO-SIMD128-NEXT: local.tee $push25=, $9=, $pop26 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop25 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $3 +; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop17 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $7 ; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.extend16_s $push5=, $2 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $6 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5 ; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $8 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $4 ; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $7 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.extend16_s $push11=, $3 +; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $2 ; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend16_s $push17=, $6 -; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.extend16_s $push21=, $4 -; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.extend16_s $push15=, $1 +; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v8i16: @@ -8013,9 +6637,9 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push25=, $1=, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop25 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop17 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push3=, $2 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 @@ -8023,29 +6647,21 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $3 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $4 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $5 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $6 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $8 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -8164,54 +6780,46 @@ define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v8i16: ; NO-SIMD128: .functype shr_s_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push2=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push2=, $8 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 ; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.extend16_s $push5=, $3 -; NO-SIMD128-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop39 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop31 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.extend16_s $push8=, $2 -; NO-SIMD128-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop38 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $6 +; NO-SIMD128-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop30 ; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.extend16_s $push11=, $1 -; NO-SIMD128-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop37 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 +; NO-SIMD128-NEXT: i32.extend16_s $push11=, $5 +; NO-SIMD128-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop29 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push14=, $8 -; NO-SIMD128-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop36 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push14=, $4 +; NO-SIMD128-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $12, $pop28 ; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $7 -; NO-SIMD128-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop35 -; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push24=, $6 -; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop34 -; NO-SIMD128-NEXT: i32.shr_s $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $4 -; NO-SIMD128-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33 -; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $3 +; NO-SIMD128-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-NEXT: i32.and $push16=, $11, $pop27 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $2 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $10, $pop26 +; NO-SIMD128-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.extend16_s $push23=, $1 +; NO-SIMD128-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $9, $pop25 +; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v8i16: @@ -8223,48 +6831,40 @@ define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop31 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push8=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop30 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop14 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop36 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $14, $pop35 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop19), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push26=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop34 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push31=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $16, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop29), $pop32 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push14=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $13, $pop28 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = ashr <8 x i16> %v, %x ret <8 x i16> %a @@ -8287,48 +6887,40 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128: .functype shr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $9, $pop34 -; NO-SIMD128-NEXT: local.tee $push32=, $9=, $pop33 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop32 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-NEXT: i32.and $push3=, $3, $pop31 +; NO-SIMD128-NEXT: i32.and $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $9, $pop26 +; NO-SIMD128-NEXT: local.tee $push24=, $9=, $pop25 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop24 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-NEXT: i32.and $push3=, $7, $pop23 ; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop30 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop22 ; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $1, $pop29 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $5, $pop21 ; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-NEXT: i32.and $push9=, $8, $pop28 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-NEXT: i32.and $push9=, $4, $pop20 ; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $7, $pop27 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $3, $pop19 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $2, $pop18 ; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-NEXT: i32.and $push17=, $6, $pop26 -; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-NEXT: i32.and $push21=, $4, $pop25 -; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-NEXT: i32.and $push15=, $1, $pop17 +; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v8i16: @@ -8336,47 +6928,39 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $pop34 -; NO-SIMD128-FAST-NEXT: local.tee $push32=, $1=, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop26 +; NO-SIMD128-FAST-NEXT: local.tee $push24=, $1=, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop24 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop23 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop22 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop21 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $5, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push10=, $pop9, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $6, $pop19 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push15=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $7, $pop18 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop25 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $pop17 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -8496,61 +7080,53 @@ define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128: .functype shr_u_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop47 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop45 -; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop43 -; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop41 -; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop39 -; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop39 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop37 -; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop37 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop35 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop35 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33 -; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop33 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 65535 +; NO-SIMD128-NEXT: i32.and $push14=, $4, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $12, $pop31 +; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $3, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-NEXT: i32.and $push16=, $11, $pop29 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-NEXT: i32.and $push20=, $2, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $10, $pop27 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push23=, $1, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $9, $pop25 +; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v8i16: @@ -8558,60 +7134,52 @@ define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop39 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop45 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop43 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop41 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop39 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $14, $pop37 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $15, $pop35 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $16, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $13, $pop31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $14, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = lshr <8 x i16> %v, %x ret <8 x i16> %a @@ -8633,30 +7201,22 @@ define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: and_v8i16: ; NO-SIMD128: .functype and_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.and $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.and $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.and $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.and $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.and $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.and $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.and $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v8i16: @@ -8668,24 +7228,16 @@ define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = and <8 x i16> %x, %y ret <8 x i16> %a @@ -8707,30 +7259,22 @@ define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: or_v8i16: ; NO-SIMD128: .functype or_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.or $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.or $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.or $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.or $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.or $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.or $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.or $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.or $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.or $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.or $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v8i16: @@ -8742,24 +7286,16 @@ define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.or $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = or <8 x i16> %x, %y ret <8 x i16> %a @@ -8781,30 +7317,22 @@ define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: xor_v8i16: ; NO-SIMD128: .functype xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.xor $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.xor $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.xor $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.xor $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.xor $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.xor $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v8i16: @@ -8816,24 +7344,16 @@ define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, %y ret <8 x i16> %a @@ -8856,37 +7376,29 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) { ; NO-SIMD128: .functype not_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop23 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop22 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $8, $pop20 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop19 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $7, $pop15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $5, $pop13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $4, $pop12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $1, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v8i16: @@ -8895,35 +7407,27 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop14 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop17 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $5, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $7, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, @@ -8948,45 +7452,37 @@ define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128: .functype andnot_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $13, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $11, $pop31 -; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $10, $pop30 -; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $9, $pop29 -; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $16, $pop28 -; NO-SIMD128-NEXT: i32.and $push10=, $8, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $15, $pop27 -; NO-SIMD128-NEXT: i32.and $push14=, $7, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, -1 -; NO-SIMD128-NEXT: i32.xor $push17=, $14, $pop26 -; NO-SIMD128-NEXT: i32.and $push18=, $6, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, -1 -; NO-SIMD128-NEXT: i32.xor $push21=, $12, $pop25 -; NO-SIMD128-NEXT: i32.and $push22=, $4, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $15, $pop23 +; NO-SIMD128-NEXT: i32.and $push4=, $7, $pop3 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $14, $pop22 +; NO-SIMD128-NEXT: i32.and $push6=, $6, $pop5 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $13, $pop21 +; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop7 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $12, $pop20 +; NO-SIMD128-NEXT: i32.and $push10=, $4, $pop9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $11, $pop19 +; NO-SIMD128-NEXT: i32.and $push12=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $10, $pop18 +; NO-SIMD128-NEXT: i32.and $push14=, $2, $pop13 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $9, $pop17 +; NO-SIMD128-NEXT: i32.and $push16=, $1, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v8i16: @@ -8996,42 +7492,34 @@ define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $9, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop23 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop22 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $pop29 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $13, $pop28 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $15, $pop26 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $16, $pop25 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <8 x i16> %y, @@ -9058,62 +7546,54 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: bitselect_v8i16: ; NO-SIMD128: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.and $push0=, $16, $8 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $24, $pop2 ; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $7 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop47 -; NO-SIMD128-NEXT: i32.and $push9=, $23, $pop8 -; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $6 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $6, $pop46 -; NO-SIMD128-NEXT: i32.and $push15=, $22, $pop14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $7 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $7, $pop39 +; NO-SIMD128-NEXT: i32.and $push7=, $23, $pop6 +; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $6 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $pop38 +; NO-SIMD128-NEXT: i32.and $push11=, $22, $pop10 +; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $5 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $5, $pop37 +; NO-SIMD128-NEXT: i32.and $push15=, $21, $pop14 ; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $5 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push20=, $5, $pop45 -; NO-SIMD128-NEXT: i32.and $push21=, $21, $pop20 -; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $4 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push24=, $4, $pop44 -; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop24 -; NO-SIMD128-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $3 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push30=, $3, $pop43 -; NO-SIMD128-NEXT: i32.and $push31=, $19, $pop30 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $4 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push18=, $4, $pop36 +; NO-SIMD128-NEXT: i32.and $push19=, $20, $pop18 +; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $3 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push22=, $3, $pop35 +; NO-SIMD128-NEXT: i32.and $push23=, $19, $pop22 +; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $2 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push26=, $2, $pop34 +; NO-SIMD128-NEXT: i32.and $push27=, $18, $pop26 +; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $1 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push30=, $1, $pop33 +; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop30 ; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $2 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push34=, $2, $pop42 -; NO-SIMD128-NEXT: i32.and $push35=, $18, $pop34 -; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $1 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push38=, $1, $pop41 -; NO-SIMD128-NEXT: i32.and $push39=, $17, $pop38 -; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v8i16: @@ -9126,55 +7606,47 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop39 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $18, $pop6 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop38 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $20, $pop14 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop44 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop43 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop42 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $23, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $24, $pop30 ; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop41 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <8 x i16> %v1, %c %inv_mask = xor <8 x i16> @@ -9203,46 +7675,38 @@ define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v8i16: ; NO-SIMD128: .functype bitselect_xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 14 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $16, $24 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $8 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $24 -; NO-SIMD128-NEXT: i32.store16 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push5=, $15, $23 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $7 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $23 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push13=, 10 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22 -; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $6 -; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22 -; NO-SIMD128-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.xor $push15=, $13, $21 -; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $5 -; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 6 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20 -; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $4 -; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.xor $push23=, $11, $19 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $3 -; NO-SIMD128-NEXT: i32.xor $push25=, $pop24, $19 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 -; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18 -; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $2 -; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 -; NO-SIMD128-NEXT: i32.xor $push29=, $9, $17 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $1 -; NO-SIMD128-NEXT: i32.xor $push31=, $pop30, $17 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $15, $23 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $7 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $23 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $14, $22 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $6 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $22 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $13, $21 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $5 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $12, $20 +; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $4 +; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $20 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $11, $19 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $3 +; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $19 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.xor $push18=, $10, $18 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $2 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push21=, $9, $17 +; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $1 +; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $17 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v8i16: @@ -9260,34 +7724,26 @@ define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $19 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $20 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $13, $21 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $14, $22 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $22 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $15, $23 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $16, $24 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $20 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $20 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $21 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $22 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $15, $23 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $23 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $16, $24 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <8 x i16> %v1, %v2 %and = and <8 x i16> %xor1, %c @@ -9314,62 +7770,54 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x ; NO-SIMD128-LABEL: bitselect_xor_reversed_v8i16: ; NO-SIMD128: .functype bitselect_xor_reversed_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $16, $24 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $24 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $23 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop47 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $23 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.xor $push14=, $14, $22 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $6, $pop46 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $15, $23 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $7, $pop39 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $23 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $6, $pop38 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $13, $21 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $5, $pop37 ; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $22 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.xor $push20=, $13, $21 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $5, $pop45 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.xor $push24=, $12, $20 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push23=, $4, $pop44 -; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $20 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.xor $push30=, $11, $19 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push29=, $3, $pop43 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $4, $pop36 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push22=, $11, $19 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $3, $pop35 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.xor $push24=, $pop23, $19 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $2, $pop34 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.xor $push30=, $9, $17 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $1, $pop33 ; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $19 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 -; NO-SIMD128-NEXT: i32.xor $push34=, $10, $18 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push33=, $2, $pop42 -; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $18 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 -; NO-SIMD128-NEXT: i32.xor $push38=, $9, $17 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push37=, $1, $pop41 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $17 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v8i16: @@ -9382,55 +7830,47 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $17 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $10, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop39 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $18 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop38 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $19 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $12, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $13, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop44 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $14, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop43 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $22 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $15, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $13, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $5, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $14, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $6, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $pop23, $22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $15, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $7, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $pop27, $23 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $16, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33 ; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $16, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop41 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <8 x i16> %v1, %v2 %notc = xor <8 x i16> %c, @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: extmul_low_s_v8i16: ; NO-SIMD128: .functype extmul_low_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push0=, $21 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $24 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push4=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $19 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $23 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $18 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $22 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push10=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $17 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $21 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 14 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $24 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $23 -; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19 -; NO-SIMD128-NEXT: i32.const $push25=, 10 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.extend8_s $push28=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $20 -; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27 -; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $19 +; NO-SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $18 +; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $17 +; NO-SIMD128-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_s_v8i16: @@ -9515,34 +7947,26 @@ define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $19 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $4 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $20 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $21 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $22 -; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 -; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $24 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $20 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $21 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $22 +; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $23 +; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $24 +; NO-SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> @@ -9572,46 +7996,38 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: extmul_high_s_v8i16: ; NO-SIMD128: .functype extmul_high_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push0=, $29 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push4=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $27 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $31 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $26 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $30 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push10=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $25 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $29 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 14 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $16 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $32 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $28 ; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $31 -; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19 -; NO-SIMD128-NEXT: i32.const $push25=, 10 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $30 -; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.extend8_s $push28=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $28 -; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27 -; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $27 +; NO-SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $26 +; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $25 +; NO-SIMD128-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_s_v8i16: @@ -9629,34 +8045,26 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $27 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $28 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $29 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $30 -; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $31 -; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $32 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $28 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $29 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $30 +; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $31 +; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $32 +; NO-SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> @@ -9687,61 +8095,53 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128: .functype extmul_low_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $21, $pop47 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $19, $pop45 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop43 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $17, $pop41 -; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $24, $pop39 -; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $24, $pop39 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 255 -; NO-SIMD128-NEXT: i32.and $push18=, $23, $pop37 -; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $23, $pop37 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 255 -; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $22, $pop35 -; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $22, $pop35 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 255 -; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 255 -; NO-SIMD128-NEXT: i32.and $push28=, $20, $pop33 -; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $21, $pop33 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $4, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $20, $pop31 +; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $3, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $19, $pop29 +; NO-SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $2, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $18, $pop27 +; NO-SIMD128-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $1, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $17, $pop25 +; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_u_v8i16: @@ -9749,60 +8149,52 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop39 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop45 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop43 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop41 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop39 -; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop37 -; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop33 -; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop33 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop31 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop29 +; NO-SIMD128-FAST-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop25 +; NO-SIMD128-FAST-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> @@ -9833,61 +8225,53 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128: .functype extmul_high_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $13, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $29, $pop47 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $11, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $27, $pop45 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $10, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $26, $pop43 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $9, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $25, $pop41 -; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $16, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $32, $pop39 -; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop39 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $15, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 255 -; NO-SIMD128-NEXT: i32.and $push18=, $31, $pop37 -; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop37 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 255 -; NO-SIMD128-NEXT: i32.and $push24=, $14, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $14, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $30, $pop35 -; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop35 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 255 -; NO-SIMD128-NEXT: i32.and $push29=, $12, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $13, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 255 -; NO-SIMD128-NEXT: i32.and $push28=, $28, $pop33 -; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop33 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $12, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop31 +; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $11, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop29 +; NO-SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $10, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop27 +; NO-SIMD128-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $9, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop25 +; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_u_v8i16: @@ -9895,60 +8279,52 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop39 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop45 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop43 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop41 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $13, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $29, $pop39 -; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $14, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $30, $pop37 -; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $15, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $31, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $32, $pop33 -; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop33 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $13, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $29, $pop31 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $14, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $15, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $31, $pop27 +; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $32, $pop25 +; NO-SIMD128-FAST-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> @@ -9979,16 +8355,14 @@ define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: add_v4i32: ; NO-SIMD128: .functype add_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.add $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.add $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v4i32: @@ -10000,10 +8374,8 @@ define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = add <4 x i32> %x, %y ret <4 x i32> %a @@ -10025,16 +8397,14 @@ define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: sub_v4i32: ; NO-SIMD128: .functype sub_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.sub $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.sub $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v4i32: @@ -10046,10 +8416,8 @@ define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> %x, %y ret <4 x i32> %a @@ -10071,16 +8439,14 @@ define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: mul_v4i32: ; NO-SIMD128: .functype mul_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.mul $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.mul $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v4i32: @@ -10092,10 +8458,8 @@ define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = mul <4 x i32> %x, %y ret <4 x i32> %a @@ -10117,20 +8481,18 @@ define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: min_s_v4i32: ; NO-SIMD128: .functype min_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.lt_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.lt_s $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.lt_s $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.lt_s $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.lt_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.lt_s $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.lt_s $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v4i32: @@ -10145,11 +8507,9 @@ define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10172,20 +8532,18 @@ define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: min_u_v4i32: ; NO-SIMD128: .functype min_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.lt_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.lt_u $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.lt_u $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.lt_u $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.lt_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.lt_u $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.lt_u $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.lt_u $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v4i32: @@ -10200,11 +8558,9 @@ define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_u $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10227,20 +8583,18 @@ define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: max_s_v4i32: ; NO-SIMD128: .functype max_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.gt_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.gt_s $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.gt_s $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.gt_s $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.gt_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.gt_s $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.gt_s $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v4i32: @@ -10255,11 +8609,9 @@ define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10282,20 +8634,18 @@ define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: max_u_v4i32: ; NO-SIMD128: .functype max_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.gt_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.gt_u $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.gt_u $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.gt_u $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.gt_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.gt_u $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.gt_u $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.gt_u $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v4i32: @@ -10310,11 +8660,9 @@ define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_u $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10337,63 +8685,59 @@ define <4 x i32> @abs_v4i32(<4 x i32> %x) { ; NO-SIMD128-LABEL: abs_v4i32: ; NO-SIMD128: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.const $push0=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push21=, $4, $pop0 -; NO-SIMD128-NEXT: local.tee $push20=, $5=, $pop21 -; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop20 +; NO-SIMD128-NEXT: i32.shr_s $push19=, $4, $pop0 +; NO-SIMD128-NEXT: local.tee $push18=, $5=, $pop19 +; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop18 ; NO-SIMD128-NEXT: i32.sub $push2=, $pop1, $5 -; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push19=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push18=, $3, $pop19 -; NO-SIMD128-NEXT: local.tee $push17=, $4=, $pop18 -; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push17=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push16=, $3, $pop17 +; NO-SIMD128-NEXT: local.tee $push15=, $4=, $pop16 +; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop15 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop3, $4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push14=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push13=, $2, $pop14 +; NO-SIMD128-NEXT: local.tee $push12=, $4=, $pop13 +; NO-SIMD128-NEXT: i32.xor $push5=, $2, $pop12 ; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $4 -; NO-SIMD128-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push16=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push15=, $2, $pop16 -; NO-SIMD128-NEXT: local.tee $push14=, $4=, $pop15 -; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop14 +; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push11=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push10=, $1, $pop11 +; NO-SIMD128-NEXT: local.tee $push9=, $4=, $pop10 +; NO-SIMD128-NEXT: i32.xor $push7=, $1, $pop9 ; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push13=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push12=, $1, $pop13 -; NO-SIMD128-NEXT: local.tee $push11=, $4=, $pop12 -; NO-SIMD128-NEXT: i32.xor $push9=, $1, $pop11 -; NO-SIMD128-NEXT: i32.sub $push10=, $pop9, $4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop10 +; NO-SIMD128-NEXT: i32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v4i32: ; NO-SIMD128-FAST: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push20=, $5=, $pop21 -; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push19=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push18=, $5=, $pop19 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop18 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop1, $5 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $2, $pop19 -; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: local.tee $push15=, $1=, $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $3, $pop16 -; NO-SIMD128-FAST-NEXT: local.tee $push14=, $2=, $pop15 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push13=, $3, $pop14 +; NO-SIMD128-FAST-NEXT: local.tee $push12=, $2=, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop12 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $2 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $4, $pop13 -; NO-SIMD128-FAST-NEXT: local.tee $push11=, $0=, $pop12 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $0 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $4, $pop11 +; NO-SIMD128-FAST-NEXT: local.tee $push9=, $3=, $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $3 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> zeroinitializer, %x %b = icmp slt <4 x i32> %x, zeroinitializer @@ -10418,19 +8762,17 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) { ; NO-SIMD128: .functype neg_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $3 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop9, $2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop8, $1 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $4 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop7, $4 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop7, $3 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop6, $2 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop5, $1 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v4i32: @@ -10439,17 +8781,15 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop9, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop7, $2 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop8, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop7, $4 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop5, $4 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> , %x ret <4 x i32> %a @@ -10471,16 +8811,14 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shl_v4i32: ; NO-SIMD128: .functype shl_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shl $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shl $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shl $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shl $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shl $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v4i32: @@ -10492,10 +8830,8 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10523,19 +8859,17 @@ define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { ; NO-SIMD128: .functype shl_const_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $3, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $2, $pop9 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $1, $pop8 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.shl $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v4i32: @@ -10544,17 +8878,15 @@ define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, ret <4 x i32> %a @@ -10606,16 +8938,14 @@ define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shl_vec_v4i32: ; NO-SIMD128: .functype shl_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shl $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shl $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shl $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shl $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shl $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v4i32: @@ -10627,10 +8957,8 @@ define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, %x ret <4 x i32> %a @@ -10652,16 +8980,14 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shr_s_v4i32: ; NO-SIMD128: .functype shr_s_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_s $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v4i32: @@ -10673,10 +8999,8 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10731,16 +9055,14 @@ define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v4i32: ; NO-SIMD128: .functype shr_s_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v4i32: @@ -10752,10 +9074,8 @@ define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = ashr <4 x i32> %v, %x ret <4 x i32> %a @@ -10777,16 +9097,14 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shr_u_v4i32: ; NO-SIMD128: .functype shr_u_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_u $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_u $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v4i32: @@ -10798,10 +9116,8 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10856,16 +9172,14 @@ define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shr_u_vec_v4i32: ; NO-SIMD128: .functype shr_u_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_u $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v4i32: @@ -10877,10 +9191,8 @@ define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = lshr <4 x i32> %v, %x ret <4 x i32> %a @@ -10902,16 +9214,14 @@ define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: and_v4i32: ; NO-SIMD128: .functype and_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.and $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.and $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v4i32: @@ -10923,10 +9233,8 @@ define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = and <4 x i32> %x, %y ret <4 x i32> %a @@ -10948,16 +9256,14 @@ define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: or_v4i32: ; NO-SIMD128: .functype or_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.or $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.or $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v4i32: @@ -10969,10 +9275,8 @@ define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = or <4 x i32> %x, %y ret <4 x i32> %a @@ -10994,16 +9298,14 @@ define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: xor_v4i32: ; NO-SIMD128: .functype xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.xor $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.xor $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v4i32: @@ -11015,10 +9317,8 @@ define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, %y ret <4 x i32> %a @@ -11041,19 +9341,17 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) { ; NO-SIMD128: .functype not_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $3, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $2, $pop9 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $1, $pop8 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v4i32: @@ -11062,17 +9360,15 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, ret <4 x i32> %a @@ -11096,23 +9392,21 @@ define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128: .functype andnot_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $7, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push13=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop13 -; NO-SIMD128-NEXT: i32.and $push4=, $2, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push12=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $5, $pop12 -; NO-SIMD128-NEXT: i32.and $push6=, $1, $pop5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $4, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 ; NO-SIMD128-NEXT: i32.const $push11=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $8, $pop11 -; NO-SIMD128-NEXT: i32.and $push8=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.xor $push3=, $7, $pop11 +; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3 +; NO-SIMD128-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push10=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $6, $pop10 +; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push9=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $5, $pop9 +; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7 +; NO-SIMD128-NEXT: i32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v4i32: @@ -11122,20 +9416,18 @@ define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $5, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop11 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop10 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $pop11 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <4 x i32> %y, %a = and <4 x i32> %x, %inv_y @@ -11161,32 +9453,30 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { ; NO-SIMD128-LABEL: bitselect_v4i32: ; NO-SIMD128: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $4, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $12 ; NO-SIMD128-NEXT: i32.and $push0=, $4, $8 ; NO-SIMD128-NEXT: i32.or $push4=, $pop3, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $3, $pop21 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $11 -; NO-SIMD128-NEXT: i32.and $push7=, $3, $7 -; NO-SIMD128-NEXT: i32.or $push10=, $pop9, $pop7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push12=, $2, $pop20 -; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $10 -; NO-SIMD128-NEXT: i32.and $push11=, $2, $6 -; NO-SIMD128-NEXT: i32.or $push14=, $pop13, $pop11 -; NO-SIMD128-NEXT: i32.store 4($0), $pop14 +; NO-SIMD128-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop19 -; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $9 -; NO-SIMD128-NEXT: i32.and $push15=, $1, $5 -; NO-SIMD128-NEXT: i32.or $push18=, $pop17, $pop15 -; NO-SIMD128-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop19 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $11 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $7 +; NO-SIMD128-NEXT: i32.or $push8=, $pop7, $pop5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $2, $pop18 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $10 +; NO-SIMD128-NEXT: i32.and $push9=, $2, $6 +; NO-SIMD128-NEXT: i32.or $push12=, $pop11, $pop9 +; NO-SIMD128-NEXT: i32.store 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $1, $pop17 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $9 +; NO-SIMD128-NEXT: i32.and $push13=, $1, $5 +; NO-SIMD128-NEXT: i32.or $push16=, $pop15, $pop13 +; NO-SIMD128-NEXT: i32.store 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v4i32: @@ -11198,26 +9488,24 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { ; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5 ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop3, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop19 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $10 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $6 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop7, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop18 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $11 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop11, $pop9 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $12 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop15, $pop13 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <4 x i32> %c, %v1 %inv_mask = xor <4 x i32> , %c @@ -11244,24 +9532,22 @@ define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v4i32: ; NO-SIMD128: .functype bitselect_xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $8, $12 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $4 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $12 -; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.xor $push5=, $7, $11 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $3 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $11 -; NO-SIMD128-NEXT: i32.store 8($0), $pop7 -; NO-SIMD128-NEXT: i32.xor $push8=, $6, $10 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $2 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $10 -; NO-SIMD128-NEXT: i32.store 4($0), $pop10 -; NO-SIMD128-NEXT: i32.xor $push11=, $5, $9 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $1 -; NO-SIMD128-NEXT: i32.xor $push13=, $pop12, $9 -; NO-SIMD128-NEXT: i32.store 0($0), $pop13 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $7, $11 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $3 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $11 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $6, $10 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $2 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $5, $9 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $1 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $9 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v4i32: @@ -11279,12 +9565,10 @@ define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $11 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $8, $12 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $12 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $12 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <4 x i32> %v1, %v2 %and = and <4 x i32> %xor1, %c @@ -11311,32 +9595,30 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x ; NO-SIMD128-LABEL: bitselect_xor_reversed_v4i32: ; NO-SIMD128: .functype bitselect_xor_reversed_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $8, $12 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $12 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $11 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $3, $pop21 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $11 -; NO-SIMD128-NEXT: i32.store 8($0), $pop10 -; NO-SIMD128-NEXT: i32.xor $push12=, $6, $10 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $2, $pop20 -; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $pop11 -; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $10 -; NO-SIMD128-NEXT: i32.store 4($0), $pop14 -; NO-SIMD128-NEXT: i32.xor $push16=, $5, $9 +; NO-SIMD128-NEXT: i32.store 12($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $7, $11 ; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $1, $pop19 -; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $pop15 -; NO-SIMD128-NEXT: i32.xor $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop19 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $11 +; NO-SIMD128-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $10 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $2, $pop18 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $5, $9 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $1, $pop17 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v4i32: @@ -11349,25 +9631,23 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $9 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop19 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $10 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $7, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop18 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $11 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $8, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $12 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <4 x i32> %v1, %v2 %notc = xor <4 x i32> %c, @@ -11394,24 +9674,22 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: extmul_low_s_v4i32: ; NO-SIMD128: .functype extmul_low_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push0=, $11 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $12 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push4=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $10 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $11 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $9 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $10 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 12 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.extend16_s $push10=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $12 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $9 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_s_v4i32: @@ -11429,12 +9707,10 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $11 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $4 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $12 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $12 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> @@ -11464,24 +9740,22 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: extmul_high_s_v4i32: ; NO-SIMD128: .functype extmul_high_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push0=, $15 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push4=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $14 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $15 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $13 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $14 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 12 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.extend16_s $push10=, $8 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $16 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $13 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_s_v4i32: @@ -11499,12 +9773,10 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $15 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $16 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $16 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> @@ -11535,31 +9807,29 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128: .functype extmul_low_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop0 -; NO-SIMD128-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $11, $pop21 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop20 +; NO-SIMD128-NEXT: i32.and $push2=, $4, $pop0 ; NO-SIMD128-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $10, $pop19 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push1=, $12, $pop19 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop18 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop18 ; NO-SIMD128-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop17 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store 0($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 12 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop17 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $4, $pop16 +; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop16 ; NO-SIMD128-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop15 +; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop15 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store 4($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop14 +; NO-SIMD128-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop13 ; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.store 0($0), $pop12 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_u_v4i32: @@ -11567,30 +9837,28 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop19 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop19 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop18 ; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop16 ; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop13 ; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop12 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> @@ -11621,31 +9889,29 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128: .functype extmul_high_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $7, $pop0 -; NO-SIMD128-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $15, $pop21 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop20 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $14, $pop19 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop19 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop18 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop18 ; NO-SIMD128-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop17 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store 0($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 12 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop17 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $8, $pop16 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop16 ; NO-SIMD128-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $16, $pop15 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop15 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store 4($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop14 +; NO-SIMD128-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop13 ; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.store 0($0), $pop12 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_u_v4i32: @@ -11653,30 +9919,28 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop19 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop19 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop18 ; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop16 ; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop13 ; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop12 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> @@ -13061,16 +11325,14 @@ define <4 x float> @neg_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: neg_v4f32: ; NO-SIMD128: .functype neg_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.neg $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.neg $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.neg $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.neg $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.neg $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.neg $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.neg $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.neg $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v4f32: @@ -13082,10 +11344,8 @@ define <4 x float> @neg_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.neg $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.neg $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.neg $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fsub nsz <4 x float> , %x ret <4 x float> %a @@ -13108,16 +11368,14 @@ define <4 x float> @abs_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: abs_v4f32: ; NO-SIMD128: .functype abs_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.abs $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.abs $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.abs $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.abs $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.abs $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.abs $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.abs $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.abs $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v4f32: @@ -13129,10 +11387,8 @@ define <4 x float> @abs_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.abs $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.abs $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.abs $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) ret <4 x float> %a @@ -13157,54 +11413,50 @@ define <4 x float> @min_unordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.gt $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.gt $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.gt $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.gt $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_unordered_v4f32: ; NO-SIMD128-FAST: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ule <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, @@ -13231,54 +11483,50 @@ define <4 x float> @max_unordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.lt $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.lt $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.lt $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.lt $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_unordered_v4f32: ; NO-SIMD128-FAST: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp uge <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, @@ -13305,54 +11553,50 @@ define <4 x float> @min_ordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.ge $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.ge $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.ge $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.ge $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_ordered_v4f32: ; NO-SIMD128-FAST: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ole <4 x float> , %x %a = select <4 x i1> %cmps, @@ -13379,54 +11623,50 @@ define <4 x float> @max_ordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.le $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.le $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.le $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.le $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_ordered_v4f32: ; NO-SIMD128-FAST: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp oge <4 x float> , %x %a = select <4 x i1> %cmps, @@ -13451,16 +11691,14 @@ define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: min_intrinsic_v4f32: ; NO-SIMD128: .functype min_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.min $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.min $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.min $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.min $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.min $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.min $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.min $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.min $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_intrinsic_v4f32: @@ -13472,10 +11710,8 @@ define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.min $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13552,16 +11788,14 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: minnum_intrinsic_v4f32: ; NO-SIMD128: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: @@ -13573,10 +11807,8 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13598,16 +11830,14 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: minnum_nsz_intrinsic_v4f32: ; NO-SIMD128: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: minnum_nsz_intrinsic_v4f32: @@ -13619,10 +11849,8 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13647,19 +11875,17 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push2=, fminf, $2, $pop9 -; NO-SIMD128-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push3=, fminf, $1, $pop8 -; NO-SIMD128-NEXT: f32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 ; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-NEXT: call $push6=, fminf, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-NEXT: f32.const $push5=, -0x1p0 +; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: fminnumv432_non_zero_intrinsic: @@ -13668,17 +11894,15 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 ; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop6 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push6=, fminf, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push5=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $4, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -13755,19 +11979,17 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0 -; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fminf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0 +; NO-SIMD128-NEXT: call $push4=, fminf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fminf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic: @@ -13779,14 +12001,12 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fminf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -13809,16 +12029,14 @@ define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: max_intrinsic_v4f32: ; NO-SIMD128: .functype max_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.max $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.max $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.max $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.max $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.max $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.max $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.max $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.max $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_intrinsic_v4f32: @@ -13830,10 +12048,8 @@ define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13910,16 +12126,14 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32: ; NO-SIMD128: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: @@ -13931,10 +12145,8 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13956,16 +12168,14 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: maxnum_nsz_intrinsic_v4f32: ; NO-SIMD128: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_nsz_intrinsic_v4f32: @@ -13977,10 +12187,8 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -14057,19 +12265,17 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0 -; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0 +; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32: @@ -14081,14 +12287,12 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -14113,19 +12317,17 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x1p0 -; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x1p0 +; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_non_zero_intrinsic_v4f32: @@ -14137,14 +12339,12 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x1p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -14240,20 +12440,18 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: pmin_v4f32: ; NO-SIMD128: .functype pmin_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.lt $push0=, $7, $3 -; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.lt $push2=, $6, $2 -; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.lt $push4=, $5, $1 -; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4 -; NO-SIMD128-NEXT: f32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: f32.lt $push6=, $8, $4 -; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: f32.lt $push0=, $8, $4 +; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.lt $push2=, $7, $3 +; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2 +; NO-SIMD128-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-NEXT: f32.lt $push4=, $6, $2 +; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4 +; NO-SIMD128-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-NEXT: f32.lt $push6=, $5, $1 +; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmin_v4f32: @@ -14268,11 +12466,9 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $7, $3 ; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $8, $4 ; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <4 x float> %y, %x %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x @@ -14295,28 +12491,26 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: pmin_int_v4f32: ; NO-SIMD128: .functype pmin_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $8 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $4 ; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $7 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $3 -; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8 -; NO-SIMD128-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $6 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $2 -; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12 -; NO-SIMD128-NEXT: i32.store 4($0), $pop13 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $5 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $1 -; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14 -; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16 -; NO-SIMD128-NEXT: i32.store 0($0), $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push5=, $7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push4=, $3 +; NO-SIMD128-NEXT: f32.lt $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $3, $pop6 +; NO-SIMD128-NEXT: i32.store 8($0), $pop7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push9=, $6 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push8=, $2 +; NO-SIMD128-NEXT: f32.lt $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $2, $pop10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop11 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push13=, $5 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push12=, $1 +; NO-SIMD128-NEXT: f32.lt $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $1, $pop14 +; NO-SIMD128-NEXT: i32.store 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmin_int_v4f32: @@ -14337,13 +12531,11 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $8 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $4 ; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %fx = bitcast <4 x i32> %x to <4 x float> %fy = bitcast <4 x i32> %y to <4 x float> @@ -14368,20 +12560,18 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: pmax_v4f32: ; NO-SIMD128: .functype pmax_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.lt $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.lt $push2=, $2, $6 -; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.lt $push4=, $1, $5 -; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4 -; NO-SIMD128-NEXT: f32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: f32.lt $push6=, $4, $8 -; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7 +; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2 +; NO-SIMD128-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6 +; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4 +; NO-SIMD128-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5 +; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmax_v4f32: @@ -14396,11 +12586,9 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <4 x float> %x, %y %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x @@ -14423,28 +12611,26 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: pmax_int_v4f32: ; NO-SIMD128: .functype pmax_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $8 ; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $7 -; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8 -; NO-SIMD128-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $2 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $6 -; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12 -; NO-SIMD128-NEXT: i32.store 4($0), $pop13 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $1 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $5 -; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14 -; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16 -; NO-SIMD128-NEXT: i32.store 0($0), $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push5=, $3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push4=, $7 +; NO-SIMD128-NEXT: f32.lt $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $3, $pop6 +; NO-SIMD128-NEXT: i32.store 8($0), $pop7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push9=, $2 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push8=, $6 +; NO-SIMD128-NEXT: f32.lt $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $2, $pop10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop11 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push13=, $1 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push12=, $5 +; NO-SIMD128-NEXT: f32.lt $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $1, $pop14 +; NO-SIMD128-NEXT: i32.store 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmax_int_v4f32: @@ -14465,13 +12651,11 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $4 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $8 ; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %fx = bitcast <4 x i32> %x to <4 x float> %fy = bitcast <4 x i32> %y to <4 x float> @@ -14496,16 +12680,14 @@ define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: add_v4f32: ; NO-SIMD128: .functype add_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.add $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.add $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.add $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.add $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.add $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.add $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.add $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.add $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v4f32: @@ -14517,10 +12699,8 @@ define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.add $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.add $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.add $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fadd <4 x float> %x, %y ret <4 x float> %a @@ -14542,16 +12722,14 @@ define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: sub_v4f32: ; NO-SIMD128: .functype sub_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.sub $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.sub $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.sub $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.sub $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.sub $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.sub $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.sub $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.sub $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v4f32: @@ -14563,10 +12741,8 @@ define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.sub $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.sub $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.sub $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fsub <4 x float> %x, %y ret <4 x float> %a @@ -14588,16 +12764,14 @@ define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: div_v4f32: ; NO-SIMD128: .functype div_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.div $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.div $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.div $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.div $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.div $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.div $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.div $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.div $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: div_v4f32: @@ -14609,10 +12783,8 @@ define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.div $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.div $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.div $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fdiv <4 x float> %x, %y ret <4 x float> %a @@ -14634,16 +12806,14 @@ define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: mul_v4f32: ; NO-SIMD128: .functype mul_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.mul $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.mul $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.mul $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.mul $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.mul $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.mul $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.mul $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.mul $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v4f32: @@ -14655,10 +12825,8 @@ define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.mul $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.mul $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.mul $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fmul <4 x float> %x, %y ret <4 x float> %a @@ -14681,16 +12849,14 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: sqrt_v4f32: ; NO-SIMD128: .functype sqrt_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.sqrt $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.sqrt $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.sqrt $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.sqrt $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.sqrt $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.sqrt $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.sqrt $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.sqrt $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sqrt_v4f32: @@ -14702,10 +12868,8 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.sqrt $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.sqrt $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) ret <4 x float> %a diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll index d2a38de4cc855..5ec9f6a2a321b 100644 --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -38,44 +38,22 @@ define <16 x i8> @splat_v16i8(i8 %x) { ; NO-SIMD128-LABEL: splat_v16i8: ; NO-SIMD128: .functype splat_v16i8 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $1 +; NO-SIMD128-NEXT: i32.store8 14($0), $1 +; NO-SIMD128-NEXT: i32.store8 13($0), $1 +; NO-SIMD128-NEXT: i32.store8 12($0), $1 +; NO-SIMD128-NEXT: i32.store8 11($0), $1 +; NO-SIMD128-NEXT: i32.store8 10($0), $1 +; NO-SIMD128-NEXT: i32.store8 9($0), $1 ; NO-SIMD128-NEXT: i32.store8 8($0), $1 +; NO-SIMD128-NEXT: i32.store8 7($0), $1 +; NO-SIMD128-NEXT: i32.store8 6($0), $1 +; NO-SIMD128-NEXT: i32.store8 5($0), $1 ; NO-SIMD128-NEXT: i32.store8 4($0), $1 +; NO-SIMD128-NEXT: i32.store8 3($0), $1 ; NO-SIMD128-NEXT: i32.store8 2($0), $1 ; NO-SIMD128-NEXT: i32.store8 1($0), $1 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $1 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $1 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $1 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $1 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $1 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $1 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $1 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $1 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $1 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $1 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $1 ; NO-SIMD128-NEXT: return %v = insertelement <16 x i8> undef, i8 %x, i32 0 %res = shufflevector <16 x i8> %v, <16 x i8> undef, @@ -356,44 +334,22 @@ define <16 x i8> @replace_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: replace_v16i8: ; NO-SIMD128: .functype replace_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $17 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $17 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 11 ret <16 x i8> %res @@ -461,44 +417,22 @@ define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: replace_zero_v16i8: ; NO-SIMD128: .functype replace_zero_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $12 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $17 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $12 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 0 ret <16 x i8> %res @@ -514,44 +448,22 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: shuffle_v16i8: ; NO-SIMD128: .functype shuffle_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $32 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $30 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $28 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $26 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $24 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $22 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $20 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $18 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $32 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $30 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $28 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $26 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $24 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $22 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $20 ; NO-SIMD128-NEXT: return %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v16i8: ; NO-SIMD128: .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $2 +; NO-SIMD128-NEXT: i32.store8 14($0), $2 +; NO-SIMD128-NEXT: i32.store8 13($0), $2 +; NO-SIMD128-NEXT: i32.store8 12($0), $2 +; NO-SIMD128-NEXT: i32.store8 11($0), $2 +; NO-SIMD128-NEXT: i32.store8 10($0), $2 +; NO-SIMD128-NEXT: i32.store8 9($0), $2 ; NO-SIMD128-NEXT: i32.store8 8($0), $2 +; NO-SIMD128-NEXT: i32.store8 7($0), $2 +; NO-SIMD128-NEXT: i32.store8 6($0), $2 +; NO-SIMD128-NEXT: i32.store8 5($0), $2 ; NO-SIMD128-NEXT: i32.store8 4($0), $2 +; NO-SIMD128-NEXT: i32.store8 3($0), $2 ; NO-SIMD128-NEXT: i32.store8 2($0), $2 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $2 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $2 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $2 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $2 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $2 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $2 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $2 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $2 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $2 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $2 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> @build_v16i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3, ; NO-SIMD128-LABEL: build_v16i8: ; NO-SIMD128: .functype build_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $12 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $12 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return i8 %x4, i8 %x5, i8 %x6, i8 %x7, i8 %x8, i8 %x9, i8 %x10, i8 %x11, @@ -734,22 +602,14 @@ define <8 x i16> @splat_v8i16(i16 %x) { ; NO-SIMD128-LABEL: splat_v8i16: ; NO-SIMD128: .functype splat_v8i16 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $1 +; NO-SIMD128-NEXT: i32.store16 12($0), $1 +; NO-SIMD128-NEXT: i32.store16 10($0), $1 ; NO-SIMD128-NEXT: i32.store16 8($0), $1 +; NO-SIMD128-NEXT: i32.store16 6($0), $1 ; NO-SIMD128-NEXT: i32.store16 4($0), $1 ; NO-SIMD128-NEXT: i32.store16 2($0), $1 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $1 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $1 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $1 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $1 ; NO-SIMD128-NEXT: return %v = insertelement <8 x i16> undef, i16 %x, i32 0 %res = shufflevector <8 x i16> %v, <8 x i16> undef, @@ -1016,22 +876,14 @@ define <8 x i16> @replace_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: replace_v8i16: ; NO-SIMD128: .functype replace_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $9 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $9 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 7 ret <8 x i16> %res @@ -1095,22 +947,14 @@ define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: replace_zero_v8i16: ; NO-SIMD128: .functype replace_zero_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $9 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $8 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 0 ret <8 x i16> %res @@ -1126,22 +970,14 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: shuffle_v8i16: ; NO-SIMD128: .functype shuffle_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $16 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $14 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $12 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $10 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $12 ; NO-SIMD128-NEXT: return %res = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> @@ -1158,22 +994,14 @@ define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v8i16: ; NO-SIMD128: .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $2 +; NO-SIMD128-NEXT: i32.store16 12($0), $2 +; NO-SIMD128-NEXT: i32.store16 10($0), $2 ; NO-SIMD128-NEXT: i32.store16 8($0), $2 +; NO-SIMD128-NEXT: i32.store16 6($0), $2 ; NO-SIMD128-NEXT: i32.store16 4($0), $2 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $2 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $2 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $2 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> @build_v8i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3, ; NO-SIMD128-LABEL: build_v8i16: ; NO-SIMD128: .functype build_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $8 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return i16 %x4, i16 %x5, i16 %x6, i16 %x7) { %t0 = insertelement <8 x i16> undef, i16 %x0, i32 0 @@ -1258,12 +1078,10 @@ define <4 x i32> @splat_v4i32(i32 %x) { ; NO-SIMD128-LABEL: splat_v4i32: ; NO-SIMD128: .functype splat_v4i32 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $1 ; NO-SIMD128-NEXT: i32.store 8($0), $1 ; NO-SIMD128-NEXT: i32.store 4($0), $1 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $1 ; NO-SIMD128-NEXT: return %v = insertelement <4 x i32> undef, i32 %x, i32 0 %res = shufflevector <4 x i32> %v, <4 x i32> undef, @@ -1368,12 +1186,10 @@ define <4 x i32> @replace_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: replace_v4i32: ; NO-SIMD128: .functype replace_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $5 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 2 ret <4 x i32> %res @@ -1433,12 +1249,10 @@ define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: replace_zero_v4i32: ; NO-SIMD128: .functype replace_zero_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $5 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 0 ret <4 x i32> %res @@ -1454,12 +1268,10 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: shuffle_v4i32: ; NO-SIMD128: .functype shuffle_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $8 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $6 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $8 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -1476,12 +1288,10 @@ define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v4i32: ; NO-SIMD128: .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $2 ; NO-SIMD128-NEXT: i32.store 8($0), $2 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -1501,12 +1311,10 @@ define <4 x i32> @build_v4i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; NO-SIMD128-LABEL: build_v4i32: ; NO-SIMD128: .functype build_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %t0 = insertelement <4 x i32> undef, i32 %x0, i32 0 %t1 = insertelement <4 x i32> %t0, i32 %x1, i32 1 @@ -1801,12 +1609,10 @@ define <4 x float> @splat_v4f32(float %x) { ; NO-SIMD128-LABEL: splat_v4f32: ; NO-SIMD128: .functype splat_v4f32 (i32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $1 ; NO-SIMD128-NEXT: f32.store 8($0), $1 ; NO-SIMD128-NEXT: f32.store 4($0), $1 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $1 ; NO-SIMD128-NEXT: return %v = insertelement <4 x float> undef, float %x, i32 0 %res = shufflevector <4 x float> %v, <4 x float> undef, @@ -1911,12 +1717,10 @@ define <4 x float> @replace_v4f32(<4 x float> %v, float %x) { ; NO-SIMD128-LABEL: replace_v4f32: ; NO-SIMD128: .functype replace_v4f32 (i32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $5 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 2 ret <4 x float> %res @@ -1976,12 +1780,10 @@ define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) { ; NO-SIMD128-LABEL: replace_zero_v4f32: ; NO-SIMD128: .functype replace_zero_v4f32 (i32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $5 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 0 ret <4 x float> %res @@ -1997,12 +1799,10 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: shuffle_v4f32: ; NO-SIMD128: .functype shuffle_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $8 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $6 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $8 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> @@ -2019,12 +1819,10 @@ define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v4f32: ; NO-SIMD128: .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $2 ; NO-SIMD128-NEXT: f32.store 8($0), $2 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> @@ -2044,12 +1842,10 @@ define <4 x float> @build_v4f32(float %x0, float %x1, float %x2, float %x3) { ; NO-SIMD128-LABEL: build_v4f32: ; NO-SIMD128: .functype build_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %t0 = insertelement <4 x float> undef, float %x0, i32 0 %t1 = insertelement <4 x float> %t0, float %x1, i32 1 diff --git a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll index 609be3bb2e54f..50e736ac68d29 100644 --- a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll +++ b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s | FileCheck %s ; Check that the shr(shl X, 56), 48) is not mistakenly turned into @@ -16,11 +17,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-unknown-linux-gnu" define i64 @foo(i64 %b) nounwind readnone { -entry: ; CHECK-LABEL: foo: -; CHECK: movsbq %dil, %rax -; CHECK: shlq $8, %rax -; CHECK: orq $1, %rax +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsbq %dil, %rax +; CHECK-NEXT: shlq $8, %rax +; CHECK-NEXT: incq %rax +; CHECK-NEXT: retq +entry: %shl = shl i64 %b, 56 ; [#uses=1] %shr = ashr i64 %shl, 48 ; [#uses=1] %add5 = or i64 %shr, 1 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll index d1a7339db9af5..55e73dc5d29ec 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -41,6 +41,7 @@ define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 % ; X86-NEXT: G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit) ; X86-NEXT: $al = COPY [[TRUNC]](s8) ; X86-NEXT: RET 0, implicit $al + ; ; X64-LABEL: name: test_i8_args_8 ; X64: bb.1.entry: ; X64-NEXT: liveins: $ecx, $edi, $edx, $esi, $r8d, $r9d @@ -109,6 +110,7 @@ define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg ; X86-NEXT: G_STORE [[LOAD7]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit) ; X86-NEXT: $eax = COPY [[LOAD]](s32) ; X86-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_i32_args_8 ; X64: bb.1.entry: ; X64-NEXT: liveins: $ecx, $edi, $edx, $esi, $r8d, $r9d @@ -196,6 +198,7 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg ; X86-NEXT: $eax = COPY [[UV]](s32) ; X86-NEXT: $edx = COPY [[UV1]](s32) ; X86-NEXT: RET 0, implicit $eax, implicit $edx + ; ; X64-LABEL: name: test_i64_args_8 ; X64: bb.1.entry: ; X64-NEXT: liveins: $rcx, $rdi, $rdx, $rsi, $r8, $r9 @@ -234,6 +237,7 @@ define float @test_float_args(float %arg1, float %arg2) { ; X86-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0) ; X86-NEXT: $fp0 = COPY [[LOAD1]](s32) ; X86-NEXT: RET 0, implicit $fp0 + ; ; X64-LABEL: name: test_float_args ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $xmm0, $xmm1 @@ -254,6 +258,7 @@ define double @test_double_args(double %arg1, double %arg2) { ; X86-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0) ; X86-NEXT: $fp0 = COPY [[LOAD1]](s64) ; X86-NEXT: RET 0, implicit $fp0 + ; ; X64-LABEL: name: test_double_args ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $xmm0, $xmm1 @@ -274,6 +279,7 @@ define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) { ; X86-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1 ; X86-NEXT: $xmm0 = COPY [[COPY1]](<4 x s32>) ; X86-NEXT: RET 0, implicit $xmm0 + ; ; X64-LABEL: name: test_v4i32_args ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $xmm0, $xmm1 @@ -297,6 +303,7 @@ define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) { ; X86-NEXT: $xmm0 = COPY [[UV]](<4 x s32>) ; X86-NEXT: $xmm1 = COPY [[UV1]](<4 x s32>) ; X86-NEXT: RET 0, implicit $xmm0, implicit $xmm1 + ; ; X64-LABEL: name: test_v8i32_args ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $xmm0, $xmm1 @@ -315,6 +322,7 @@ define void @test_void_return() { ; X86-LABEL: name: test_void_return ; X86: bb.1.entry: ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_void_return ; X64: bb.1.entry: ; X64-NEXT: RET 0 @@ -329,6 +337,7 @@ define ptr @test_memop_i32(ptr %p1) { ; X86-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16) ; X86-NEXT: $eax = COPY [[LOAD]](p0) ; X86-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_memop_i32 ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $rdi @@ -347,6 +356,7 @@ define void @test_trivial_call() { ; X86-NEXT: CALLpcrel32 @trivial_callee, csr_32, implicit $esp, implicit $ssp ; X86-NEXT: ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_trivial_call ; X64: bb.1 (%ir-block.0): ; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp @@ -377,6 +387,7 @@ define void @test_simple_arg(i32 %in0, i32 %in1) { ; X86-NEXT: CALLpcrel32 @simple_arg_callee, csr_32, implicit $esp, implicit $ssp ; X86-NEXT: ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_simple_arg ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $edi, $esi @@ -435,6 +446,7 @@ define void @test_simple_arg8_call(i32 %in0) { ; X86-NEXT: CALLpcrel32 @simple_arg8_callee, csr_32, implicit $esp, implicit $ssp ; X86-NEXT: ADJCALLSTACKUP32 32, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_simple_arg8_call ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $edi @@ -478,6 +490,7 @@ define i32 @test_simple_return_callee() { ; X86-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] ; X86-NEXT: $eax = COPY [[ADD]](s32) ; X86-NEXT: RET 0, implicit $eax + ; ; X64-LABEL: name: test_simple_return_callee ; X64: bb.1 (%ir-block.0): ; X64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 @@ -521,6 +534,7 @@ define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) { ; X86-NEXT: $xmm0 = COPY [[UV2]](<4 x s32>) ; X86-NEXT: $xmm1 = COPY [[UV3]](<4 x s32>) ; X86-NEXT: RET 0, implicit $xmm0, implicit $xmm1 + ; ; X64-LABEL: name: test_split_return_callee ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $xmm0, $xmm1, $xmm2, $xmm3 @@ -559,6 +573,7 @@ define void @test_indirect_call(ptr %func) { ; X86-NEXT: CALL32r [[LOAD]](p0), csr_32, implicit $esp, implicit $ssp ; X86-NEXT: ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_indirect_call ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $rdi @@ -603,6 +618,7 @@ define void @test_abi_exts_call(ptr %addr) { ; X86-NEXT: CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp ; X86-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_abi_exts_call ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $rdi @@ -654,6 +670,7 @@ define void @test_variadic_call_1(ptr %addr_ptr, ptr %val_ptr) { ; X86-NEXT: CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp ; X86-NEXT: ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_variadic_call_1 ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $rdi, $rsi @@ -696,6 +713,7 @@ define void @test_variadic_call_2(ptr %addr_ptr, ptr %val_ptr) { ; X86-NEXT: CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp ; X86-NEXT: ADJCALLSTACKUP32 12, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_variadic_call_2 ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $rdi, $rsi @@ -728,6 +746,7 @@ define <32 x float> @test_return_v32f32() { ; X86-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[LOAD]](p0) :: (store (<32 x s32>)) ; X86-NEXT: $eax = COPY [[LOAD]](p0) ; X86-NEXT: RET 0 + ; ; X64-LABEL: name: test_return_v32f32 ; X64: bb.1 (%ir-block.0): ; X64-NEXT: liveins: $rdi @@ -757,6 +776,7 @@ define float @test_call_v32f32() { ; X86-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s32) ; X86-NEXT: $fp0 = COPY [[EVEC]](s32) ; X86-NEXT: RET 0, implicit $fp0 + ; ; X64-LABEL: name: test_call_v32f32 ; X64: bb.1 (%ir-block.0): ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 diff --git a/llvm/test/CodeGen/X86/atomicrmw-fadd-fp-vector.ll b/llvm/test/CodeGen/X86/atomicrmw-fadd-fp-vector.ll new file mode 100644 index 0000000000000..4f8cd5a52ed4c --- /dev/null +++ b/llvm/test/CodeGen/X86/atomicrmw-fadd-fp-vector.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck %s + +define <2 x half> @test_atomicrmw_fadd_v2f16_align4(ptr addrspace(1) %ptr, <2 x half> %value) #0 { +; CHECK-LABEL: test_atomicrmw_fadd_v2f16_align4: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: subq $88, %rsp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: psrld $16, %xmm0 +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: pinsrw $0, 2(%rdi), %xmm1 +; CHECK-NEXT: pinsrw $0, (%rdi), %xmm0 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: movzwl %ax, %ebp +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: callq __extendhfsf2@PLT +; CHECK-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; CHECK-NEXT: callq __truncsfhf2@PLT +; CHECK-NEXT: pextrw $0, %xmm0, %ecx +; CHECK-NEXT: shll $16, %ecx +; CHECK-NEXT: orl %ebp, %ecx +; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: pextrw $0, %xmm0, %edx +; CHECK-NEXT: shll $16, %edx +; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: movzwl %ax, %eax +; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: lock cmpxchgl %ecx, (%rbx) +; CHECK-NEXT: setne %cl +; CHECK-NEXT: pinsrw $0, %eax, %xmm0 +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: pinsrw $0, %eax, %xmm1 +; CHECK-NEXT: testb %cl, %cl +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-NEXT: addq $88, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value seq_cst, align 4 + ret <2 x half> %res +} + +define <2 x float> @test_atomicrmw_fadd_v2f32_align8(ptr addrspace(1) %ptr, <2 x float> %value) #0 { +; CHECK-LABEL: test_atomicrmw_fadd_v2f32_align8: +; CHECK: # %bb.0: +; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %xmm1, %rax +; CHECK-NEXT: addps %xmm0, %xmm1 +; CHECK-NEXT: movq %xmm1, %rcx +; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi) +; CHECK-NEXT: movq %rax, %xmm1 +; CHECK-NEXT: jne .LBB1_1 +; CHECK-NEXT: # %bb.2: # %atomicrmw.end +; CHECK-NEXT: movdqa %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value seq_cst, align 8 + ret <2 x float> %res +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 3e40bfa1e791d..2a77d0238721c 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -1050,11 +1050,9 @@ define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) nounwin ; KNL: ## %bb.0: ; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1 ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpextrb $2, %xmm0, %eax +; KNL-NEXT: notb %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1081,11 +1079,9 @@ define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) nounwin ; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: notb %al +; KNL-NEXT: movzbl %al, %ecx ; KNL-NEXT: andl $1, %ecx ; KNL-NEXT: movl $4, %eax ; KNL-NEXT: subl %ecx, %eax @@ -1116,15 +1112,10 @@ define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) nounwind ; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 ; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: andb $1, %al -; KNL-NEXT: movb $4, %cl -; KNL-NEXT: subb %al, %cl -; KNL-NEXT: movzbl %cl, %eax +; KNL-NEXT: vpextrb $15, %xmm0, %eax +; KNL-NEXT: notb %al +; KNL-NEXT: addb $4, %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/bool-vector.ll b/llvm/test/CodeGen/X86/bool-vector.ll index e4deb878aa461..d52b455eb2e6b 100644 --- a/llvm/test/CodeGen/X86/bool-vector.ll +++ b/llvm/test/CodeGen/X86/bool-vector.ll @@ -6,7 +6,7 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 -define i32 @PR15215_bad(<4 x i32> %input) { +define i32 @PR15215_bad(<4 x i32> %input) nounwind { ; X86-LABEL: PR15215_bad: ; X86: # %bb.0: # %entry ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax @@ -59,12 +59,10 @@ entry: ret i32 %2 } -define i32 @PR15215_good(<4 x i32> %input) { +define i32 @PR15215_good(<4 x i32> %input) nounwind { ; X86-LABEL: PR15215_good: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -77,7 +75,6 @@ define i32 @PR15215_good(<4 x i32> %input) { ; X86-NEXT: leal (%edx,%ecx,4), %ecx ; X86-NEXT: leal (%ecx,%eax,8), %eax ; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; ; X64-LABEL: PR15215_good: diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index 548cf24b9200a..13c9585873273 100644 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -869,13 +869,13 @@ body: | $ymm0 = VSHUFPSZ256rmi $ymm0, $rdi, 1, $noreg, 0, $noreg, -24 ; CHECK: $ymm0 = VSHUFPSYrri $ymm0, $ymm1, -24 $ymm0 = VSHUFPSZ256rri $ymm0, $ymm1, -24 - ; CHECK: $ymm0 = VROUNDPDYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPDYmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPDYr $ymm0, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPDYri $ymm0, 15, implicit $mxcsr $ymm0 = VRNDSCALEPDZ256rri $ymm0, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPSYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPSYr $ymm0, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYri $ymm0, 15, implicit $mxcsr $ymm0 = VRNDSCALEPSZ256rri $ymm0, 15, implicit $mxcsr ; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $noreg, 0, $noreg, 32 $ymm0 = VSHUFF32X4Z256rmi $ymm0, $rip, 1, $noreg, 0, $noreg, 228 @@ -1751,13 +1751,13 @@ body: | $xmm0 = VALIGNQZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VPALIGNRrri $xmm0, $xmm1, 8 $xmm0 = VALIGNQZ128rri $xmm0, $xmm1, 1 - ; CHECK: $xmm0 = VROUNDPDm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPDmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPDr $xmm0, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPDri $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPDZ128rri $xmm0, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPSm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPSr $xmm0, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSri $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rri $xmm0, 15, implicit $mxcsr RET64 @@ -2308,21 +2308,21 @@ body: | $xmm0 = VINSERTPSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VINSERTPSrr $xmm0, $xmm0, 1 $xmm0 = VINSERTPSZrr $xmm0, $xmm0, 1 - ; CHECK: $xmm0 = VROUNDSDm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDr $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDri $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSr $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSri $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDmi_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDr_Int $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDri_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSmi_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSr_Int $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSri_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 15, implicit $mxcsr RET64 diff --git a/llvm/test/CodeGen/X86/fsafdo_test1.ll b/llvm/test/CodeGen/X86/fsafdo_test1.ll index b5ae3915294cd..61c0f59aba6f8 100644 --- a/llvm/test/CodeGen/X86/fsafdo_test1.ll +++ b/llvm/test/CodeGen/X86/fsafdo_test1.ll @@ -6,7 +6,7 @@ ; V01: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5 ; V0: .loc 1 9 5 is_stmt 0 discriminator 11266 # foo.c:9:5 ; V0: .loc 1 7 3 is_stmt 1 discriminator 11266 # foo.c:7:3 -; V1: .loc 1 9 5 is_stmt 0 discriminator 258 # foo.c:9:5 +; V1: .loc 1 9 5 is_stmt 0 discriminator 514 # foo.c:9:5 ; V1: .loc 1 7 3 is_stmt 1 discriminator 258 # foo.c:7:3 ; Check that variable __llvm_fs_discriminator__ is generated. ; V01: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__ diff --git a/llvm/test/CodeGen/X86/func-sanitizer.ll b/llvm/test/CodeGen/X86/func-sanitizer.ll index b421cb53ddfec..71f062ae2f8cd 100644 --- a/llvm/test/CodeGen/X86/func-sanitizer.ll +++ b/llvm/test/CodeGen/X86/func-sanitizer.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=MACHO ; CHECK: .type _Z3funv,@function ; CHECK-NEXT: .long 3238382334 # 0xc105cafe @@ -8,6 +9,15 @@ ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: retq +; MACHO: ltmp0: +; MACHO-NEXT: .long 3238382334 ## 0xc105cafe +; MACHO-NEXT: .long 42 ## 0x2a +; MACHO-NEXT: .alt_entry __Z3funv +; MACHO-NEXT: __Z3funv: +; MACHO-NEXT: .cfi_startproc +; MACHO-NEXT: # %bb.0: +; MACHO-NEXT: retq + define dso_local void @_Z3funv() !func_sanitize !0 { ret void } diff --git a/llvm/test/CodeGen/X86/i128-fpconv-win64-strict.ll b/llvm/test/CodeGen/X86/i128-fpconv-win64-strict.ll index 7f7ea09dbc0c2..64869da48e6c0 100644 --- a/llvm/test/CodeGen/X86/i128-fpconv-win64-strict.ll +++ b/llvm/test/CodeGen/X86/i128-fpconv-win64-strict.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64 ; RUN: llc < %s -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=WIN64 -define i64 @double_to_i128(double %d) nounwind { +define i64 @double_to_i128(double %d) nounwind strictfp { ; WIN64-LABEL: double_to_i128: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $40, %rsp @@ -15,7 +15,7 @@ define i64 @double_to_i128(double %d) nounwind { ret i64 %2 } -define i64 @double_to_ui128(double %d) nounwind { +define i64 @double_to_ui128(double %d) nounwind strictfp { ; WIN64-LABEL: double_to_ui128: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $40, %rsp @@ -28,7 +28,7 @@ define i64 @double_to_ui128(double %d) nounwind { ret i64 %2 } -define i64 @float_to_i128(float %d) nounwind { +define i64 @float_to_i128(float %d) nounwind strictfp { ; WIN64-LABEL: float_to_i128: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $40, %rsp @@ -41,7 +41,7 @@ define i64 @float_to_i128(float %d) nounwind { ret i64 %2 } -define i64 @float_to_ui128(float %d) nounwind { +define i64 @float_to_ui128(float %d) nounwind strictfp { ; WIN64-LABEL: float_to_ui128: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $40, %rsp @@ -54,12 +54,13 @@ define i64 @float_to_ui128(float %d) nounwind { ret i64 %2 } -define i64 @longdouble_to_i128(ptr nocapture readonly %0) nounwind { +define i64 @longdouble_to_i128(ptr nocapture readonly %0) nounwind strictfp { ; WIN64-LABEL: longdouble_to_i128: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $56, %rsp ; WIN64-NEXT: fldt (%rcx) ; WIN64-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN64-NEXT: wait ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN64-NEXT: callq __fixxfti ; WIN64-NEXT: movq %xmm0, %rax @@ -71,12 +72,13 @@ define i64 @longdouble_to_i128(ptr nocapture readonly %0) nounwind { ret i64 %4 } -define i64 @longdouble_to_ui128(ptr nocapture readonly %0) nounwind { +define i64 @longdouble_to_ui128(ptr nocapture readonly %0) nounwind strictfp { ; WIN64-LABEL: longdouble_to_ui128: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $56, %rsp ; WIN64-NEXT: fldt (%rcx) ; WIN64-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN64-NEXT: wait ; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN64-NEXT: callq __fixunsxfti ; WIN64-NEXT: movq %xmm0, %rax @@ -88,7 +90,7 @@ define i64 @longdouble_to_ui128(ptr nocapture readonly %0) nounwind { ret i64 %4 } -define double @i128_to_double(ptr nocapture readonly %0) nounwind { +define double @i128_to_double(ptr nocapture readonly %0) nounwind strictfp { ; WIN64-LABEL: i128_to_double: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $56, %rsp @@ -103,7 +105,7 @@ define double @i128_to_double(ptr nocapture readonly %0) nounwind { ret double %3 } -define double @ui128_to_double(ptr nocapture readonly %0) nounwind { +define double @ui128_to_double(ptr nocapture readonly %0) nounwind strictfp { ; WIN64-LABEL: ui128_to_double: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $56, %rsp @@ -118,7 +120,7 @@ define double @ui128_to_double(ptr nocapture readonly %0) nounwind { ret double %3 } -define float @i128_to_float(ptr nocapture readonly %0) nounwind { +define float @i128_to_float(ptr nocapture readonly %0) nounwind strictfp { ; WIN64-LABEL: i128_to_float: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $56, %rsp @@ -133,7 +135,7 @@ define float @i128_to_float(ptr nocapture readonly %0) nounwind { ret float %3 } -define float @ui128_to_float(ptr nocapture readonly %0) nounwind { +define float @ui128_to_float(ptr nocapture readonly %0) nounwind strictfp { ; WIN64-LABEL: ui128_to_float: ; WIN64: # %bb.0: ; WIN64-NEXT: subq $56, %rsp @@ -148,7 +150,7 @@ define float @ui128_to_float(ptr nocapture readonly %0) nounwind { ret float %3 } -define void @i128_to_longdouble(ptr noalias nocapture sret(x86_fp80) align 16 %agg.result, ptr nocapture readonly %0) nounwind { +define void @i128_to_longdouble(ptr noalias nocapture sret(x86_fp80) align 16 %agg.result, ptr nocapture readonly %0) nounwind strictfp { ; WIN64-LABEL: i128_to_longdouble: ; WIN64: # %bb.0: ; WIN64-NEXT: pushq %rsi @@ -161,6 +163,7 @@ define void @i128_to_longdouble(ptr noalias nocapture sret(x86_fp80) align 16 %a ; WIN64-NEXT: callq __floattixf ; WIN64-NEXT: fldt {{[0-9]+}}(%rsp) ; WIN64-NEXT: fstpt (%rsi) +; WIN64-NEXT: wait ; WIN64-NEXT: movq %rsi, %rax ; WIN64-NEXT: addq $64, %rsp ; WIN64-NEXT: popq %rsi @@ -171,7 +174,7 @@ define void @i128_to_longdouble(ptr noalias nocapture sret(x86_fp80) align 16 %a ret void } -define void @ui128_to_longdouble(ptr noalias nocapture sret(x86_fp80) align 16 %agg.result, ptr nocapture readonly %0) nounwind { +define void @ui128_to_longdouble(ptr noalias nocapture sret(x86_fp80) align 16 %agg.result, ptr nocapture readonly %0) nounwind strictfp { ; WIN64-LABEL: ui128_to_longdouble: ; WIN64: # %bb.0: ; WIN64-NEXT: pushq %rsi @@ -184,6 +187,7 @@ define void @ui128_to_longdouble(ptr noalias nocapture sret(x86_fp80) align 16 % ; WIN64-NEXT: callq __floatuntixf ; WIN64-NEXT: fldt {{[0-9]+}}(%rsp) ; WIN64-NEXT: fstpt (%rsi) +; WIN64-NEXT: wait ; WIN64-NEXT: movq %rsi, %rax ; WIN64-NEXT: addq $64, %rsp ; WIN64-NEXT: popq %rsi diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index 5420e6b5ce86f..16946caf9a328 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -2294,13 +2294,13 @@ define i32 @PR44139(ptr %p) { ; ; AVX1-LABEL: PR44139: ; AVX1: # %bb.0: +; AVX1-NEXT: movq (%rdi), %rax ; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 -; AVX1-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm1 +; AVX1-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: vmovaps %ymm0, 64(%rdi) ; AVX1-NEXT: vmovaps %ymm0, 96(%rdi) ; AVX1-NEXT: vmovaps %ymm0, 32(%rdi) -; AVX1-NEXT: movl (%rdi), %eax ; AVX1-NEXT: vmovaps %ymm1, (%rdi) ; AVX1-NEXT: leal 2147483647(%rax), %ecx ; AVX1-NEXT: testl %eax, %eax @@ -2315,13 +2315,13 @@ define i32 @PR44139(ptr %p) { ; ; AVX2-LABEL: PR44139: ; AVX2: # %bb.0: +; AVX2-NEXT: movq (%rdi), %rax ; AVX2-NEXT: vpbroadcastq (%rdi), %ymm0 -; AVX2-NEXT: vpinsrq $1, (%rdi), %xmm0, %xmm1 +; AVX2-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-NEXT: vmovdqa %ymm0, 64(%rdi) ; AVX2-NEXT: vmovdqa %ymm0, 96(%rdi) ; AVX2-NEXT: vmovdqa %ymm0, 32(%rdi) -; AVX2-NEXT: movl (%rdi), %eax ; AVX2-NEXT: vmovdqa %ymm1, (%rdi) ; AVX2-NEXT: leal 2147483647(%rax), %ecx ; AVX2-NEXT: testl %eax, %eax @@ -2336,14 +2336,12 @@ define i32 @PR44139(ptr %p) { ; ; AVX512-LABEL: PR44139: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 -; AVX512-NEXT: vpbroadcastq (%rdi), %zmm1 -; AVX512-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512-NEXT: vpinsrq $1, (%rdi), %xmm1, %xmm2 -; AVX512-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm2 -; AVX512-NEXT: vmovdqa64 %zmm1, 64(%rdi) -; AVX512-NEXT: vmovdqa64 %zmm2, (%rdi) -; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: movq (%rdi), %rax +; AVX512-NEXT: vpbroadcastq (%rdi), %zmm0 +; AVX512-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 +; AVX512-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 +; AVX512-NEXT: vmovdqa64 %zmm0, 64(%rdi) +; AVX512-NEXT: vmovdqa64 %zmm1, (%rdi) ; AVX512-NEXT: leal 2147483647(%rax), %ecx ; AVX512-NEXT: testl %eax, %eax ; AVX512-NEXT: cmovnsl %eax, %ecx diff --git a/llvm/test/CodeGen/X86/load-local-v3i129.ll b/llvm/test/CodeGen/X86/load-local-v3i129.ll index 8fa7ce0664537..eb5d172a3b352 100644 --- a/llvm/test/CodeGen/X86/load-local-v3i129.ll +++ b/llvm/test/CodeGen/X86/load-local-v3i129.ll @@ -12,7 +12,7 @@ define void @_start() nounwind { ; FAST-SHLD-NEXT: shrq $2, %rcx ; FAST-SHLD-NEXT: shldq $2, %rdx, %rcx ; FAST-SHLD-NEXT: andq $-4, %rax -; FAST-SHLD-NEXT: orq $1, %rax +; FAST-SHLD-NEXT: incq %rax ; FAST-SHLD-NEXT: movq %rax, -40(%rsp) ; FAST-SHLD-NEXT: movq %rcx, -32(%rsp) ; FAST-SHLD-NEXT: orq $-2, -56(%rsp) @@ -23,7 +23,7 @@ define void @_start() nounwind { ; SLOW-SHLD: # %bb.0: # %Entry ; SLOW-SHLD-NEXT: movq -40(%rsp), %rax ; SLOW-SHLD-NEXT: andq $-4, %rax -; SLOW-SHLD-NEXT: orq $1, %rax +; SLOW-SHLD-NEXT: incq %rax ; SLOW-SHLD-NEXT: movq %rax, -40(%rsp) ; SLOW-SHLD-NEXT: orq $-2, -56(%rsp) ; SLOW-SHLD-NEXT: movq $-1, -48(%rsp) diff --git a/llvm/test/CodeGen/X86/memset-minsize.ll b/llvm/test/CodeGen/X86/memset-minsize.ll index 76d2928db3a9e..cc0f2156262bb 100644 --- a/llvm/test/CodeGen/X86/memset-minsize.ll +++ b/llvm/test/CodeGen/X86/memset-minsize.ll @@ -136,4 +136,17 @@ entry: ret void } +define void @small_memset_to_rep_stos_64(ptr %ptr) minsize nounwind { +; CHECK-LABEL: small_memset_to_rep_stos_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq $16 +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: rep;stosq %rax, %es:(%rdi) +; CHECK-NEXT: retq +entry: + call void @llvm.memset.p0.i64(ptr align 8 %ptr, i8 0, i64 128, i1 false) + ret void +} + declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) diff --git a/llvm/test/CodeGen/X86/mmx-intrinsics.ll b/llvm/test/CodeGen/X86/mmx-intrinsics.ll index 54b1a31359180..a43d9400cde6c 100644 --- a/llvm/test/CodeGen/X86/mmx-intrinsics.ll +++ b/llvm/test/CodeGen/X86/mmx-intrinsics.ll @@ -1,13 +1,42 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 -; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X86 +; RUN: llc < %s -mtriple=i686-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X86 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefixes=ALL,X64 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+mmx,+avx | FileCheck %s --check-prefixes=ALL,X64 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test1 -; ALL: phaddw +; X86-LABEL: test1: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: phaddw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test1: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: phaddw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -23,8 +52,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test88 -; ALL: pcmpgtd +; X86-LABEL: test88: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pcmpgtd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test88: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pcmpgtd %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -40,8 +97,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test87 -; ALL: pcmpgtw +; X86-LABEL: test87: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pcmpgtw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test87: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pcmpgtw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -57,8 +142,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test86 -; ALL: pcmpgtb +; X86-LABEL: test86: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pcmpgtb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test86: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pcmpgtb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -74,8 +187,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test85 -; ALL: pcmpeqd +; X86-LABEL: test85: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pcmpeqd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test85: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pcmpeqd %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -91,8 +232,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test84 -; ALL: pcmpeqw +; X86-LABEL: test84: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pcmpeqw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test84: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pcmpeqw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -108,8 +277,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test83 -; ALL: pcmpeqb +; X86-LABEL: test83: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pcmpeqb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test83: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pcmpeqb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -125,9 +322,36 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test82 -; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0] -; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0] +; X86-LABEL: test82: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: punpckldq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test82: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -143,9 +367,36 @@ entry: declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test81 -; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1] -; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1] +; X86-LABEL: test81: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: punpcklwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test81: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -161,9 +412,36 @@ entry: declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test80 -; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] -; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] +; X86-LABEL: test80: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: punpcklbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test80: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -179,9 +457,36 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test79 -; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1] -; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1] +; X86-LABEL: test79: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: punpckhdq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[1],mem[1] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test79: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -197,27 +502,81 @@ entry: declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test78 -; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3] -; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3] -entry: - %0 = bitcast <1 x i64> %b to <4 x i16> - %1 = bitcast <1 x i64> %a to <4 x i16> - %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx - %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx - %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind - %3 = bitcast x86_mmx %2 to <4 x i16> - %4 = bitcast <4 x i16> %3 to <1 x i64> - %5 = extractelement <1 x i64> %4, i32 0 - ret i64 %5 -} - -declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone - +; X86-LABEL: test78: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: punpckhwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test78: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq +entry: + %0 = bitcast <1 x i64> %b to <4 x i16> + %1 = bitcast <1 x i64> %a to <4 x i16> + %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx + %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx + %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind + %3 = bitcast x86_mmx %2 to <4 x i16> + %4 = bitcast <4 x i16> %3 to <1 x i64> + %5 = extractelement <1 x i64> %4, i32 0 + ret i64 %5 +} + +declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone + define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test77 -; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] -; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] +; X86-LABEL: test77: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: punpckhbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test77: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -233,8 +592,36 @@ entry: declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test76 -; ALL: packuswb +; X86-LABEL: test76: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: packuswb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test76: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: packuswb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -250,8 +637,36 @@ entry: declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test75 -; ALL: packssdw +; X86-LABEL: test75: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: packssdw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test75: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: packssdw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -267,8 +682,36 @@ entry: declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test74 -; ALL: packsswb +; X86-LABEL: test74: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: packsswb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test74: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: packsswb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -284,8 +727,31 @@ entry: declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test73 -; ALL: psrad +; X86-LABEL: test73: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psrad $3, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test73: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: psrad $3, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -299,8 +765,31 @@ entry: declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test72 -; ALL: psraw +; X86-LABEL: test72: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psraw $3, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test72: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: psraw $3, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -312,8 +801,28 @@ entry: } define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test72_2 -; ALL-NOT: psraw +; X86-LABEL: test72_2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test72_2: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -327,8 +836,27 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test71 -; ALL: psrlq +; X86-LABEL: test71: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psrlq $3, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test71: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: psrlq $3, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -340,8 +868,31 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test70 -; ALL: psrld +; X86-LABEL: test70: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psrld $3, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test70: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: psrld $3, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -353,8 +904,28 @@ entry: } define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test70_2 -; ALL-NOT: psrld +; X86-LABEL: test70_2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test70_2: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -368,8 +939,31 @@ entry: declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test69 -; ALL: psrlw +; X86-LABEL: test69: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psrlw $3, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test69: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: psrlw $3, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -383,8 +977,27 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test68 -; ALL: psllq +; X86-LABEL: test68: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psllq $3, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test68: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: psllq $3, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -396,8 +1009,31 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test67 -; ALL: pslld +; X86-LABEL: test67: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pslld $3, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test67: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: pslld $3, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -411,8 +1047,31 @@ entry: declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test66 -; ALL: psllw +; X86-LABEL: test66: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psllw $3, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test66: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: psllw $3, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -424,8 +1083,28 @@ entry: } define i64 @test66_2(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test66_2 -; ALL-NOT: psllw +; X86-LABEL: test66_2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test66_2: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -439,8 +1118,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test65 -; ALL: psrad +; X86-LABEL: test65: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psrad 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test65: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psrad %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -456,8 +1159,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test64 -; ALL: psraw +; X86-LABEL: test64: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psraw 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test64: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psraw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -473,8 +1200,28 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test63 -; ALL: psrlq +; X86-LABEL: test63: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psrlq 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test63: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psrlq %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -488,8 +1235,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test62 -; ALL: psrld +; X86-LABEL: test62: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psrld 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test62: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psrld %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -505,8 +1276,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test61 -; ALL: psrlw +; X86-LABEL: test61: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psrlw 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test61: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psrlw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -522,8 +1317,28 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test60 -; ALL: psllq +; X86-LABEL: test60: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psllq 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test60: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psllq %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -537,8 +1352,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test59 -; ALL: pslld +; X86-LABEL: test59: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pslld 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test59: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pslld %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx @@ -554,8 +1393,32 @@ entry: declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test58 -; ALL: psllw +; X86-LABEL: test58: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psllw 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test58: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psllw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx @@ -571,8 +1434,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test56 -; ALL: pxor +; X86-LABEL: test56: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pxor {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test56: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pxor %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -588,8 +1479,36 @@ entry: declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test55 -; ALL: por +; X86-LABEL: test55: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: por {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test55: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: por %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -605,8 +1524,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test54 -; ALL: pandn +; X86-LABEL: test54: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pandn {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test54: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pandn %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -622,8 +1569,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test53 -; ALL: pand +; X86-LABEL: test53: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pand {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test53: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pand %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -639,8 +1614,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test52 -; ALL: pmullw +; X86-LABEL: test52: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test52: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmullw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -654,8 +1657,36 @@ entry: } define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test51 -; ALL: pmullw +; X86-LABEL: test51: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmullw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test51: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmullw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -671,8 +1702,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test50 -; ALL: pmulhw +; X86-LABEL: test50: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmulhw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test50: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmulhw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -688,8 +1747,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test49 -; ALL: pmaddwd +; X86-LABEL: test49: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmaddwd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test49: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmaddwd %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -705,8 +1792,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test48 -; ALL: psubusw +; X86-LABEL: test48: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psubusw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test48: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psubusw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -722,8 +1837,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test47 -; ALL: psubusb +; X86-LABEL: test47: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psubusb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test47: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psubusb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -739,8 +1882,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test46 -; ALL: psubsw +; X86-LABEL: test46: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psubsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test46: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psubsw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -756,8 +1927,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test45 -; ALL: psubsb +; X86-LABEL: test45: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psubsb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test45: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psubsb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -771,8 +1970,28 @@ entry: } define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test44 -; ALL: psubq +; X86-LABEL: test44: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: psubq 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test44: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psubq %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -788,8 +2007,36 @@ declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test43 -; ALL: psubd +; X86-LABEL: test43: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psubd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test43: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psubd %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -805,8 +2052,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test42 -; ALL: psubw +; X86-LABEL: test42: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psubw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test42: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psubw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -822,8 +2097,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test41 -; ALL: psubb +; X86-LABEL: test41: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psubb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test41: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psubb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -839,8 +2142,36 @@ entry: declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test40 -; ALL: paddusw +; X86-LABEL: test40: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: paddusw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test40: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: paddusw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -856,8 +2187,36 @@ entry: declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test39 -; ALL: paddusb +; X86-LABEL: test39: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: paddusb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test39: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: paddusb %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -873,8 +2232,36 @@ entry: declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test38 -; ALL: paddsw +; X86-LABEL: test38: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: paddsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test38: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: paddsw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -890,8 +2277,36 @@ entry: declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test37 -; ALL: paddsb +; X86-LABEL: test37: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: paddsb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test37: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: paddsb %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -907,8 +2322,28 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test36 -; ALL: paddq +; X86-LABEL: test36: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: paddq 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test36: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: paddq %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -922,8 +2357,36 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test35 -; ALL: paddd +; X86-LABEL: test35: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: paddd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test35: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: paddd %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -939,8 +2402,36 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test34 -; ALL: paddw +; X86-LABEL: test34: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: paddw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test34: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: paddw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -956,8 +2447,36 @@ entry: declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test33 -; ALL: paddb +; X86-LABEL: test33: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: paddb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test33: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: paddb %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -973,8 +2492,36 @@ entry: declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test32 -; ALL: psadbw +; X86-LABEL: test32: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psadbw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test32: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psadbw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -988,8 +2535,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test31 -; ALL: pminsw +; X86-LABEL: test31: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pminsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test31: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pminsw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1005,8 +2580,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test30 -; ALL: pminub +; X86-LABEL: test30: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pminub {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test30: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pminub %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1022,8 +2625,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test29 -; ALL: pmaxsw +; X86-LABEL: test29: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmaxsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test29: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmaxsw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1039,8 +2670,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test28 -; ALL: pmaxub +; X86-LABEL: test28: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmaxub {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test28: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmaxub %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1056,8 +2715,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test27 -; ALL: pavgw +; X86-LABEL: test27: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pavgw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test27: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pavgw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1073,8 +2760,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test26 -; ALL: pavgb +; X86-LABEL: test26: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pavgb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test26: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pavgb %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1090,8 +2805,18 @@ entry: declare void @llvm.x86.mmx.movnt.dq(ptr, x86_mmx) nounwind define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp { -; ALL-LABEL: @test25 -; ALL: movntq +; X86-LABEL: test25: +; X86: # %bb.0: # %entry +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movntq %mm0, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: test25: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rsi, %mm0 +; X64-NEXT: movntq %mm0, (%rdi) +; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var.i = bitcast i64 %0 to x86_mmx @@ -1102,8 +2827,27 @@ entry: declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test24 -; ALL: pmovmskb +; X86-LABEL: test24: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movq (%esp), %mm0 +; X86-NEXT: pmovmskb %mm0, %eax +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test24: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: pmovmskb %mm0, %eax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <8 x i8> %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx @@ -1114,8 +2858,37 @@ entry: declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, ptr) nounwind define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp { -; ALL-LABEL: @test23 -; ALL: maskmovq +; X86-LABEL: test23: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %edi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq (%esp), %mm1 +; X86-NEXT: maskmovq %mm0, %mm1 +; X86-NEXT: leal -4(%ebp), %esp +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test23: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: movq %rdx, %rdi +; X64-NEXT: maskmovq %mm1, %mm0 +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %n to <8 x i8> %1 = bitcast <1 x i64> %d to <8 x i8> @@ -1128,8 +2901,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test22 -; ALL: pmulhuw +; X86-LABEL: test22: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmulhuw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test22: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmulhuw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1145,9 +2946,30 @@ entry: declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test21 -; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0] -; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0] +; X86-LABEL: test21: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: pshufw $3, {{[0-9]+}}(%esp), %mm0 # mm0 = mem[3,0,0,0] +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test21: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0] +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -1159,10 +2981,28 @@ entry: } define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test21_2 -; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0] -; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0] -; ALL: movd +; X86-LABEL: test21_2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: pshufw $3, (%esp), %mm0 # mm0 = mem[3,0,0,0] +; X86-NEXT: movd %mm0, %eax +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test21_2: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: pshufw $3, %mm0, %mm0 # mm0 = mm0[3,0,0,0] +; X64-NEXT: movd %mm0, %eax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -1176,8 +3016,36 @@ entry: declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test20 -; ALL: pmuludq +; X86-LABEL: test20: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmuludq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test20: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmuludq %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1191,8 +3059,26 @@ entry: declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test19 -; ALL: cvtpi2pd +; X86-LABEL: test19: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: cvtpi2pd (%esp), %xmm0 +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test19: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: cvtpi2pd %mm0, %xmm0 +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %1 = bitcast <2 x i32> %0 to x86_mmx @@ -1203,8 +3089,25 @@ entry: declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test18 -; ALL: cvttpd2pi +; X86-LABEL: test18: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: cvttpd2pi %xmm0, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test18: +; X64: # %bb.0: # %entry +; X64-NEXT: cvttpd2pi %xmm0, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone %1 = bitcast x86_mmx %0 to <2 x i32> @@ -1216,8 +3119,25 @@ entry: declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test17 -; ALL: cvtpd2pi +; X86-LABEL: test17: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: cvtpd2pi %xmm0, %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test17: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtpd2pi %xmm0, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone %1 = bitcast x86_mmx %0 to <2 x i32> @@ -1229,8 +3149,28 @@ entry: declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test16 -; ALL: palignr +; X86-LABEL: test16: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movq 8(%ebp), %mm0 +; X86-NEXT: palignr $16, 16(%ebp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test16: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: palignr $16, %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = extractelement <1 x i64> %a, i32 0 %mmx_var = bitcast i64 %0 to x86_mmx @@ -1244,8 +3184,30 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test15 -; ALL: pabsd +; X86-LABEL: test15: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: pabsd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test15: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: pabsd %mm0, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <2 x i32> %1 = bitcast <2 x i32> %0 to x86_mmx @@ -1259,8 +3221,30 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test14 -; ALL: pabsw +; X86-LABEL: test14: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: pabsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test14: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: pabsw %mm0, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <4 x i16> %1 = bitcast <4 x i16> %0 to x86_mmx @@ -1274,8 +3258,30 @@ entry: declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp { -; ALL-LABEL: @test13 -; ALL: pabsb +; X86-LABEL: test13: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: pabsb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test13: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: pabsb %mm0, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to x86_mmx @@ -1289,8 +3295,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test12 -; ALL: psignd +; X86-LABEL: test12: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psignd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test12: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psignd %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1306,8 +3340,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test11 -; ALL: psignw +; X86-LABEL: test11: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psignw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test11: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psignw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1323,8 +3385,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test10 -; ALL: psignb +; X86-LABEL: test10: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: psignb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test10: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: psignb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1340,8 +3430,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test9 -; ALL: pshufb +; X86-LABEL: test9: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pshufb {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test9: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pshufb %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1357,8 +3475,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test8 -; ALL: pmulhrsw +; X86-LABEL: test8: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmulhrsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test8: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmulhrsw %mm0, %mm1 +; X64-NEXT: movq %mm1, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1374,8 +3520,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test7 -; ALL: pmaddubsw +; X86-LABEL: test7: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: pmaddubsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test7: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: pmaddubsw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <8 x i8> %1 = bitcast <1 x i64> %a to <8 x i8> @@ -1391,8 +3565,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test6 -; ALL: phsubsw +; X86-LABEL: test6: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: phsubsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test6: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: phsubsw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1408,8 +3610,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test5 -; ALL: phsubd +; X86-LABEL: test5: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: phsubd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test5: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: phsubd %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1425,8 +3655,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test4 -; ALL: phsubw +; X86-LABEL: test4: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: phsubw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test4: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: phsubw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1442,8 +3700,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test3 -; ALL: phaddsw +; X86-LABEL: test3: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: phaddsw {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test3: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: phaddsw %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <4 x i16> %1 = bitcast <1 x i64> %a to <4 x i16> @@ -1459,8 +3745,36 @@ entry: declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp { -; ALL-LABEL: @test2 -; ALL: phaddd +; X86-LABEL: test2: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $24, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: phaddd {{[0-9]+}}(%esp), %mm0 +; X86-NEXT: movq %mm0, (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; X64-LABEL: test2: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, %mm0 +; X64-NEXT: movq %rsi, %mm1 +; X64-NEXT: phaddd %mm1, %mm0 +; X64-NEXT: movq %mm0, %rax +; X64-NEXT: retq entry: %0 = bitcast <1 x i64> %b to <2 x i32> %1 = bitcast <1 x i64> %a to <2 x i32> @@ -1474,18 +3788,21 @@ entry: } define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind { -; ALL-LABEL: @test89 -; ALL: cvtpi2ps +; ALL-LABEL: test89: +; ALL: # %bb.0: +; ALL-NEXT: cvtpi2ps %mm0, %xmm0 +; ALL-NEXT: ret{{[l|q]}} %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b) ret <4 x float> %c } declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone -; ALL-LABEL: test90 define void @test90() { -; ALL-LABEL: @test90 -; ALL: emms +; ALL-LABEL: test90: +; ALL: # %bb.0: +; ALL-NEXT: emms +; ALL-NEXT: ret{{[l|q]}} call void @llvm.x86.mmx.emms() ret void } diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index e8b3121ecfb52..253f990f8735e 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -3682,18 +3682,12 @@ define i1 @movmsk_v16i8(<16 x i8> %x, <16 x i8> %y) { ; KNL-LABEL: movmsk_v16i8: ; KNL: # %bb.0: ; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: kshiftrw $8, %k0, %k1 -; KNL-NEXT: kmovw %k1, %edx -; KNL-NEXT: kshiftrw $3, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: xorb %dl, %al -; KNL-NEXT: andb %cl, %al +; KNL-NEXT: vpextrb $15, %xmm0, %ecx +; KNL-NEXT: vpextrb $8, %xmm0, %edx +; KNL-NEXT: vpextrb $3, %xmm0, %eax +; KNL-NEXT: xorl %edx, %eax +; KNL-NEXT: andl %ecx, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: movmsk_v16i8: diff --git a/llvm/test/CodeGen/X86/patchable-prologue.ll b/llvm/test/CodeGen/X86/patchable-prologue.ll index 71a392845fdea..43761e3d1e1eb 100644 --- a/llvm/test/CodeGen/X86/patchable-prologue.ll +++ b/llvm/test/CodeGen/X86/patchable-prologue.ll @@ -193,3 +193,20 @@ do.body: ; preds = %do.body, %entry do.end: ; preds = %do.body ret void } + + +; Test that inline asm is properly hotpatched. We currently don't examine the +; asm instruction when printing it, thus we always emit patching NOPs. + +; 64: inline_asm: +; 64-NEXT: # %bb.0: +; 64-NEXT: xchgw %ax, %ax # encoding: [0x66,0x90] +; 64-NEXT: #APP +; 64-NEXT: int3 # encoding: [0xcc] +; 64-NEXT: #NO_APP + +define dso_local void @inline_asm() "patchable-function"="prologue-short-redirect" { +entry: + call void asm sideeffect "int3", "~{dirflag},~{fpsr},~{flags}"() + ret void +} diff --git a/llvm/test/CodeGen/X86/pr23664.ll b/llvm/test/CodeGen/X86/pr23664.ll index 453e5db2bed61..8179602b8c2a8 100644 --- a/llvm/test/CodeGen/X86/pr23664.ll +++ b/llvm/test/CodeGen/X86/pr23664.ll @@ -6,7 +6,7 @@ define i2 @f(i32 %arg) { ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: leal (%rdi,%rdi), %eax -; CHECK-NEXT: orb $1, %al +; CHECK-NEXT: incb %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %trunc = trunc i32 %arg to i1 diff --git a/llvm/test/CodeGen/X86/pr45995.ll b/llvm/test/CodeGen/X86/pr45995.ll index f9a9ab768f1a0..997ad6be84b9d 100644 --- a/llvm/test/CodeGen/X86/pr45995.ll +++ b/llvm/test/CodeGen/X86/pr45995.ll @@ -1,18 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -O3 --x86-asm-syntax=intel -mtriple=x86_64 -mattr=avx < %s | FileCheck %s -define void @extracter0([4 x <4 x i1>] %matrix) { +define void @extracter0([4 x <4 x i1>] %matrix) nounwind { ; CHECK-LABEL: extracter0: ; CHECK: # %bb.0: ; CHECK-NEXT: push rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: push r14 -; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: push rbx -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset rbx, -32 -; CHECK-NEXT: .cfi_offset r14, -24 -; CHECK-NEXT: .cfi_offset rbp, -16 ; CHECK-NEXT: vpslld xmm0, xmm0, 31 ; CHECK-NEXT: vmovmskps edi, xmm0 ; CHECK-NEXT: mov ebx, edi @@ -31,11 +25,8 @@ define void @extracter0([4 x <4 x i1>] %matrix) { ; CHECK-NEXT: movzx edi, bl ; CHECK-NEXT: call print_i1@PLT ; CHECK-NEXT: pop rbx -; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: pop r14 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pop rbp -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: ret %1 = extractvalue [4 x <4 x i1>] %matrix, 0 %2 = extractelement <4 x i1> %1, i64 0 @@ -49,29 +40,16 @@ define void @extracter0([4 x <4 x i1>] %matrix) { ret void } -define void @extracter1([4 x <4 x i1>] %matrix) { +define void @extracter1([4 x <4 x i1>] %matrix) nounwind { ; CHECK-LABEL: extracter1: ; CHECK: # %bb.0: ; CHECK-NEXT: push rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: push r15 -; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: push r14 -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: push r13 -; CHECK-NEXT: .cfi_def_cfa_offset 40 ; CHECK-NEXT: push r12 -; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: push rbx -; CHECK-NEXT: .cfi_def_cfa_offset 56 ; CHECK-NEXT: push rax -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset rbx, -56 -; CHECK-NEXT: .cfi_offset r12, -48 -; CHECK-NEXT: .cfi_offset r13, -40 -; CHECK-NEXT: .cfi_offset r14, -32 -; CHECK-NEXT: .cfi_offset r15, -24 -; CHECK-NEXT: .cfi_offset rbp, -16 ; CHECK-NEXT: vpslld xmm1, xmm1, 31 ; CHECK-NEXT: vmovmskps ebx, xmm1 ; CHECK-NEXT: mov eax, ebx @@ -109,19 +87,12 @@ define void @extracter1([4 x <4 x i1>] %matrix) { ; CHECK-NEXT: movzx edi, byte ptr [rsp + 7] # 1-byte Folded Reload ; CHECK-NEXT: call print_i1@PLT ; CHECK-NEXT: add rsp, 8 -; CHECK-NEXT: .cfi_def_cfa_offset 56 ; CHECK-NEXT: pop rbx -; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: pop r12 -; CHECK-NEXT: .cfi_def_cfa_offset 40 ; CHECK-NEXT: pop r13 -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: pop r14 -; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: pop r15 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pop rbp -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: ret %1 = extractvalue [4 x <4 x i1>] %matrix, 0 %2 = extractelement <4 x i1> %1, i64 0 diff --git a/llvm/test/CodeGen/X86/pr63439.ll b/llvm/test/CodeGen/X86/pr63439.ll index 155da0c629127..7018940faa81f 100644 --- a/llvm/test/CodeGen/X86/pr63439.ll +++ b/llvm/test/CodeGen/X86/pr63439.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s define i16 @mulhs(i16 %a0, i16 %a1) { ; CHECK-LABEL: mulhs: ; CHECK: # %bb.0: -; CHECK-NEXT: movswl %si, %ecx -; CHECK-NEXT: movswl %di, %eax +; CHECK-NEXT: movswl %di, %ecx +; CHECK-NEXT: movswl %si, %eax ; CHECK-NEXT: imull %ecx, %eax ; CHECK-NEXT: shrl $16, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax @@ -23,23 +23,14 @@ define i16 @mulhs(i16 %a0, i16 %a1) { } define i16 @mulhu(i16 %a0, i16 %a1) { -; SSE-LABEL: mulhu: -; SSE: # %bb.0: -; SSE-NEXT: movzwl %si, %ecx -; SSE-NEXT: movzwl %di, %eax -; SSE-NEXT: imull %ecx, %eax -; SSE-NEXT: shrl $16, %eax -; SSE-NEXT: # kill: def $ax killed $ax killed $eax -; SSE-NEXT: retq -; -; AVX-LABEL: mulhu: -; AVX: # %bb.0: -; AVX-NEXT: vmovd %edi, %xmm0 -; AVX-NEXT: vmovd %esi, %xmm1 -; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: # kill: def $ax killed $ax killed $eax -; AVX-NEXT: retq +; CHECK-LABEL: mulhu: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl %di, %ecx +; CHECK-NEXT: movzwl %si, %eax +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: shrl $16, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: retq %x0 = zext i16 %a0 to i32 %x1 = zext i16 %a1 to i32 %v0 = insertelement <1 x i32> , i32 %x0, i32 0 diff --git a/llvm/test/CodeGen/X86/pr64439.ll b/llvm/test/CodeGen/X86/pr64439.ll index 7aa52fc49a9fc..6e3d007dd78c9 100644 --- a/llvm/test/CodeGen/X86/pr64439.ll +++ b/llvm/test/CodeGen/X86/pr64439.ll @@ -4,10 +4,9 @@ define void @f(ptr %0, <32 x i1> %1, i32 %2) nounwind { ; CHECK-LABEL: f: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 -; CHECK-NEXT: vpmovb2m %ymm0, %k0 -; CHECK-NEXT: kshiftrd $3, %k0, %k1 -; CHECK-NEXT: kmovd %k1, %eax +; CHECK-NEXT: vpsllw $7, %ymm0, %ymm1 +; CHECK-NEXT: vpmovb2m %ymm1, %k0 +; CHECK-NEXT: vpextrb $3, %xmm0, %eax ; CHECK-NEXT: vpbroadcastb %esi, %ymm0 ; CHECK-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 ; CHECK-NEXT: vpmovm2b %k0, %ymm0 diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll index 14a0626ff0ea7..aea76f694a0fc 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll @@ -2,18 +2,18 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2-FAST -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2-FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512BWVL-ONLY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512BWVL-ONLY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512VBMI,AVX512VBMI-FAST +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL,AVX512VBMI,AVX512VBMI-SLOW ; PR31551 ; Pairs of shufflevector:trunc functions with functional equivalence. @@ -652,3 +652,900 @@ define void @trunc_v2i64_to_v2i8(ptr %L, ptr %S) nounwind { store <2 x i8> %strided.vec, ptr %S ret void } + +; PR88030 - Select sub-elements and truncate + +define <16 x i8> @evenelts_v32i16_shuffle_v16i16_to_v16i8(<32 x i16> %n2) nounwind { +; SSE2-LABEL: evenelts_v32i16_shuffle_v16i16_to_v16i8: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: packuswb %xmm3, %xmm2 +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: packuswb %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: evenelts_v32i16_shuffle_v16i16_to_v16i8: +; SSE42: # %bb.0: +; SSE42-NEXT: movq {{.*#+}} xmm4 = [0,0,0,0,0,4,8,12,0,0,0,0,0,0,0,0] +; SSE42-NEXT: pshufb %xmm4, %xmm3 +; SSE42-NEXT: pshufb %xmm4, %xmm2 +; SSE42-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE42-NEXT: movd {{.*#+}} xmm3 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; SSE42-NEXT: pshufb %xmm3, %xmm1 +; SSE42-NEXT: pshufb %xmm3, %xmm0 +; SSE42-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; SSE42-NEXT: retq +; +; AVX1-LABEL: evenelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12] +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vmovd {{.*#+}} xmm3 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: evenelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12] +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vmovd {{.*#+}} xmm3 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: evenelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %n0 = bitcast <32 x i16> %n2 to <64 x i8> + %p = shufflevector <64 x i8> %n0, <64 x i8> poison, <16 x i32> + ret <16 x i8> %p +} + +define <16 x i8> @oddelts_v32i16_shuffle_v16i16_to_v16i8(<32 x i16> %n2) nounwind { +; SSE2-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; SSE2: # %bb.0: +; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,5,6,7] +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,2,4,5,6,7] +; SSE2-NEXT: packuswb %xmm3, %xmm2 +; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,2,0] +; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7] +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] +; SSE2-NEXT: retq +; +; SSE42-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; SSE42: # %bb.0: +; SSE42-NEXT: movq {{.*#+}} xmm4 = [0,0,0,0,2,6,10,14,0,0,0,0,0,0,0,0] +; SSE42-NEXT: pshufb %xmm4, %xmm3 +; SSE42-NEXT: pshufb %xmm4, %xmm2 +; SSE42-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE42-NEXT: movd {{.*#+}} xmm3 = [2,6,10,14,0,0,0,0,0,0,0,0,0,0,0,0] +; SSE42-NEXT: pshufb %xmm3, %xmm1 +; SSE42-NEXT: pshufb %xmm3, %xmm0 +; SSE42-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; SSE42-NEXT: retq +; +; AVX1-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14] +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vmovd {{.*#+}} xmm3 = [2,6,10,14,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14] +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vmovd {{.*#+}} xmm3 = [2,6,10,14,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14] +; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512F-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX512F-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14] +; AVX512VL-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512VL-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX512VL-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0 +; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14] +; AVX512BW-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX512BW-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-ONLY-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX512BWVL-ONLY: # %bb.0: +; AVX512BWVL-ONLY-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512BWVL-ONLY-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512BWVL-ONLY-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2,6,10,14,2,6,10,14,2,6,10,14,2,6,10,14] +; AVX512BWVL-ONLY-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512BWVL-ONLY-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX512BWVL-ONLY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX512BWVL-ONLY-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX512BWVL-ONLY-NEXT: vpmovdb %ymm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512BWVL-ONLY-NEXT: vzeroupper +; AVX512BWVL-ONLY-NEXT: retq +; +; AVX512VBMI-LABEL: oddelts_v32i16_shuffle_v16i16_to_v16i8: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [2,6,10,14,18,22,26,30,34,38,42,46,50,54,58,62] +; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512VBMI-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 +; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI-NEXT: vzeroupper +; AVX512VBMI-NEXT: retq + %n0 = bitcast <32 x i16> %n2 to <64 x i8> + %p = shufflevector <64 x i8> %n0, <64 x i8> poison, <16 x i32> + ret <16 x i8> %p +} + +define <16 x i8> @evenelts_v32i16_trunc_v16i16_to_v16i8(<32 x i16> %n2) nounwind { +; SSE2-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; SSE2: # %bb.0: +; SSE2-NEXT: pushq %rbp +; SSE2-NEXT: pushq %r14 +; SSE2-NEXT: pushq %rbx +; SSE2-NEXT: pextrw $2, %xmm0, %eax +; SSE2-NEXT: pextrw $4, %xmm0, %ecx +; SSE2-NEXT: pextrw $6, %xmm0, %edx +; SSE2-NEXT: pextrw $2, %xmm1, %esi +; SSE2-NEXT: pextrw $4, %xmm1, %edi +; SSE2-NEXT: pextrw $6, %xmm1, %r8d +; SSE2-NEXT: pextrw $2, %xmm2, %r9d +; SSE2-NEXT: pextrw $4, %xmm2, %r10d +; SSE2-NEXT: pextrw $6, %xmm2, %r11d +; SSE2-NEXT: pextrw $2, %xmm3, %ebx +; SSE2-NEXT: pextrw $4, %xmm3, %ebp +; SSE2-NEXT: pextrw $6, %xmm3, %r14d +; SSE2-NEXT: movd %r14d, %xmm4 +; SSE2-NEXT: movd %ebp, %xmm5 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] +; SSE2-NEXT: movd %ebx, %xmm4 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] +; SSE2-NEXT: movd %r11d, %xmm4 +; SSE2-NEXT: movd %r10d, %xmm5 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] +; SSE2-NEXT: movd %r9d, %xmm4 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE2-NEXT: movd %r8d, %xmm3 +; SSE2-NEXT: movd %edi, %xmm4 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE2-NEXT: movd %esi, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; SSE2-NEXT: movd %edx, %xmm3 +; SSE2-NEXT: movd %ecx, %xmm4 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: popq %rbx +; SSE2-NEXT: popq %r14 +; SSE2-NEXT: popq %rbp +; SSE2-NEXT: retq +; +; SSE42-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; SSE42: # %bb.0: +; SSE42-NEXT: pushq %rbp +; SSE42-NEXT: pushq %r14 +; SSE42-NEXT: pushq %rbx +; SSE42-NEXT: pextrw $6, %xmm3, %eax +; SSE42-NEXT: pextrw $4, %xmm3, %ecx +; SSE42-NEXT: pextrw $2, %xmm3, %edx +; SSE42-NEXT: movd %xmm3, %esi +; SSE42-NEXT: pextrw $6, %xmm2, %edi +; SSE42-NEXT: pextrw $4, %xmm2, %r8d +; SSE42-NEXT: pextrw $2, %xmm2, %r9d +; SSE42-NEXT: movd %xmm2, %r10d +; SSE42-NEXT: pextrw $6, %xmm1, %r11d +; SSE42-NEXT: pextrw $4, %xmm1, %ebx +; SSE42-NEXT: pextrw $2, %xmm1, %ebp +; SSE42-NEXT: movd %xmm1, %r14d +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE42-NEXT: pinsrb $4, %r14d, %xmm0 +; SSE42-NEXT: pinsrb $5, %ebp, %xmm0 +; SSE42-NEXT: pinsrb $6, %ebx, %xmm0 +; SSE42-NEXT: pinsrb $7, %r11d, %xmm0 +; SSE42-NEXT: pinsrb $8, %r10d, %xmm0 +; SSE42-NEXT: pinsrb $9, %r9d, %xmm0 +; SSE42-NEXT: pinsrb $10, %r8d, %xmm0 +; SSE42-NEXT: pinsrb $11, %edi, %xmm0 +; SSE42-NEXT: pinsrb $12, %esi, %xmm0 +; SSE42-NEXT: pinsrb $13, %edx, %xmm0 +; SSE42-NEXT: pinsrb $14, %ecx, %xmm0 +; SSE42-NEXT: pinsrb $15, %eax, %xmm0 +; SSE42-NEXT: popq %rbx +; SSE42-NEXT: popq %r14 +; SSE42-NEXT: popq %rbp +; SSE42-NEXT: retq +; +; AVX1-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; AVX1: # %bb.0: +; AVX1-NEXT: pushq %rbp +; AVX1-NEXT: pushq %r14 +; AVX1-NEXT: pushq %rbx +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm2, %eax +; AVX1-NEXT: vpextrw $4, %xmm2, %ecx +; AVX1-NEXT: vpextrw $2, %xmm2, %edx +; AVX1-NEXT: vmovd %xmm2, %esi +; AVX1-NEXT: vpextrw $6, %xmm1, %edi +; AVX1-NEXT: vpextrw $4, %xmm1, %r8d +; AVX1-NEXT: vpextrw $2, %xmm1, %r9d +; AVX1-NEXT: vmovd %xmm1, %r10d +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrw $6, %xmm1, %r11d +; AVX1-NEXT: vpextrw $4, %xmm1, %ebx +; AVX1-NEXT: vpextrw $2, %xmm1, %ebp +; AVX1-NEXT: vmovd %xmm1, %r14d +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX1-NEXT: popq %rbx +; AVX1-NEXT: popq %r14 +; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrw $6, %xmm2, %eax +; AVX2-NEXT: vpextrw $4, %xmm2, %ecx +; AVX2-NEXT: vpextrw $2, %xmm2, %edx +; AVX2-NEXT: vmovd %xmm2, %esi +; AVX2-NEXT: vpextrw $6, %xmm1, %edi +; AVX2-NEXT: vpextrw $4, %xmm1, %r8d +; AVX2-NEXT: vpextrw $2, %xmm1, %r9d +; AVX2-NEXT: vmovd %xmm1, %r10d +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrw $6, %xmm1, %r11d +; AVX2-NEXT: vpextrw $4, %xmm1, %ebx +; AVX2-NEXT: vpextrw $2, %xmm1, %ebp +; AVX2-NEXT: vmovd %xmm1, %r14d +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: pushq %rbp +; AVX512F-NEXT: pushq %rbx +; AVX512F-NEXT: vpmovdb %zmm0, %xmm1 +; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; AVX512F-NEXT: vpextrw $6, %xmm2, %eax +; AVX512F-NEXT: vpextrw $4, %xmm2, %ecx +; AVX512F-NEXT: vpextrw $2, %xmm2, %edx +; AVX512F-NEXT: vmovd %xmm2, %esi +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; AVX512F-NEXT: vpextrw $6, %xmm2, %edi +; AVX512F-NEXT: vpextrw $4, %xmm2, %r8d +; AVX512F-NEXT: vpextrw $2, %xmm2, %r9d +; AVX512F-NEXT: vmovd %xmm2, %r10d +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpextrw $6, %xmm0, %r11d +; AVX512F-NEXT: vpextrw $4, %xmm0, %ebx +; AVX512F-NEXT: vpextrw $2, %xmm0, %ebp +; AVX512F-NEXT: vpinsrb $5, %ebp, %xmm1, %xmm0 +; AVX512F-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: popq %rbx +; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: pushq %rbp +; AVX512VL-NEXT: pushq %r14 +; AVX512VL-NEXT: pushq %rbx +; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; AVX512VL-NEXT: vpextrw $6, %xmm1, %eax +; AVX512VL-NEXT: vpextrw $4, %xmm1, %ecx +; AVX512VL-NEXT: vpextrw $2, %xmm1, %edx +; AVX512VL-NEXT: vmovd %xmm1, %esi +; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; AVX512VL-NEXT: vpextrw $6, %xmm1, %edi +; AVX512VL-NEXT: vpextrw $4, %xmm1, %r8d +; AVX512VL-NEXT: vpextrw $2, %xmm1, %r9d +; AVX512VL-NEXT: vmovd %xmm1, %r10d +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vpextrw $6, %xmm1, %r11d +; AVX512VL-NEXT: vpextrw $4, %xmm1, %ebx +; AVX512VL-NEXT: vpextrw $2, %xmm1, %ebp +; AVX512VL-NEXT: vmovd %xmm1, %r14d +; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: popq %rbx +; AVX512VL-NEXT: popq %r14 +; AVX512VL-NEXT: popq %rbp +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: pushq %rbp +; AVX512BW-NEXT: pushq %rbx +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm1 +; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm2 +; AVX512BW-NEXT: vpextrw $6, %xmm2, %eax +; AVX512BW-NEXT: vpextrw $4, %xmm2, %ecx +; AVX512BW-NEXT: vpextrw $2, %xmm2, %edx +; AVX512BW-NEXT: vmovd %xmm2, %esi +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; AVX512BW-NEXT: vpextrw $6, %xmm2, %edi +; AVX512BW-NEXT: vpextrw $4, %xmm2, %r8d +; AVX512BW-NEXT: vpextrw $2, %xmm2, %r9d +; AVX512BW-NEXT: vmovd %xmm2, %r10d +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512BW-NEXT: vpextrw $6, %xmm0, %r11d +; AVX512BW-NEXT: vpextrw $4, %xmm0, %ebx +; AVX512BW-NEXT: vpextrw $2, %xmm0, %ebp +; AVX512BW-NEXT: vpinsrb $5, %ebp, %xmm1, %xmm0 +; AVX512BW-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512BW-NEXT: popq %rbx +; AVX512BW-NEXT: popq %rbp +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-ONLY-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512BWVL-ONLY: # %bb.0: +; AVX512BWVL-ONLY-NEXT: pushq %rbp +; AVX512BWVL-ONLY-NEXT: pushq %r14 +; AVX512BWVL-ONLY-NEXT: pushq %rbx +; AVX512BWVL-ONLY-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; AVX512BWVL-ONLY-NEXT: vpextrw $6, %xmm1, %eax +; AVX512BWVL-ONLY-NEXT: vpextrw $4, %xmm1, %ecx +; AVX512BWVL-ONLY-NEXT: vpextrw $2, %xmm1, %edx +; AVX512BWVL-ONLY-NEXT: vmovd %xmm1, %esi +; AVX512BWVL-ONLY-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; AVX512BWVL-ONLY-NEXT: vpextrw $6, %xmm1, %edi +; AVX512BWVL-ONLY-NEXT: vpextrw $4, %xmm1, %r8d +; AVX512BWVL-ONLY-NEXT: vpextrw $2, %xmm1, %r9d +; AVX512BWVL-ONLY-NEXT: vmovd %xmm1, %r10d +; AVX512BWVL-ONLY-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BWVL-ONLY-NEXT: vpextrw $6, %xmm1, %r11d +; AVX512BWVL-ONLY-NEXT: vpextrw $4, %xmm1, %ebx +; AVX512BWVL-ONLY-NEXT: vpextrw $2, %xmm1, %ebp +; AVX512BWVL-ONLY-NEXT: vmovd %xmm1, %r14d +; AVX512BWVL-ONLY-NEXT: vpmovdb %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: popq %rbx +; AVX512BWVL-ONLY-NEXT: popq %r14 +; AVX512BWVL-ONLY-NEXT: popq %rbp +; AVX512BWVL-ONLY-NEXT: vzeroupper +; AVX512BWVL-ONLY-NEXT: retq +; +; AVX512VBMI-FAST-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512VBMI-FAST: # %bb.0: +; AVX512VBMI-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,79] +; AVX512VBMI-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VBMI-FAST-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 +; AVX512VBMI-FAST-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512VBMI-FAST-NEXT: vpextrw $6, %xmm0, %eax +; AVX512VBMI-FAST-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0 +; AVX512VBMI-FAST-NEXT: vzeroupper +; AVX512VBMI-FAST-NEXT: retq +; +; AVX512VBMI-SLOW-LABEL: evenelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512VBMI-SLOW: # %bb.0: +; AVX512VBMI-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [0,4,8,12,16,20,24,28,32,36,40,44,48,77,78,79] +; AVX512VBMI-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VBMI-SLOW-NEXT: vpermi2b %zmm2, %zmm0, %zmm1 +; AVX512VBMI-SLOW-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512VBMI-SLOW-NEXT: vpextrw $6, %xmm0, %eax +; AVX512VBMI-SLOW-NEXT: vpextrw $4, %xmm0, %ecx +; AVX512VBMI-SLOW-NEXT: vpextrw $2, %xmm0, %edx +; AVX512VBMI-SLOW-NEXT: vpinsrb $13, %edx, %xmm1, %xmm0 +; AVX512VBMI-SLOW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512VBMI-SLOW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512VBMI-SLOW-NEXT: vzeroupper +; AVX512VBMI-SLOW-NEXT: retq + %n0 = shufflevector <32 x i16> %n2, <32 x i16> poison, <16 x i32> + %n1 = trunc <16 x i16> %n0 to <16 x i8> + ret <16 x i8> %n1 +} + +define <16 x i8> @oddelts_v32i16_trunc_v16i16_to_v16i8(<32 x i16> %n2) nounwind { +; SSE2-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; SSE2: # %bb.0: +; SSE2-NEXT: pextrw $7, %xmm3, %eax +; SSE2-NEXT: movd %eax, %xmm4 +; SSE2-NEXT: pextrw $5, %xmm3, %eax +; SSE2-NEXT: movd %eax, %xmm5 +; SSE2-NEXT: pextrw $3, %xmm3, %eax +; SSE2-NEXT: movd %eax, %xmm6 +; SSE2-NEXT: pextrw $1, %xmm3, %eax +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: pextrw $7, %xmm2, %eax +; SSE2-NEXT: movd %eax, %xmm7 +; SSE2-NEXT: pextrw $5, %xmm2, %eax +; SSE2-NEXT: movd %eax, %xmm8 +; SSE2-NEXT: pextrw $3, %xmm2, %eax +; SSE2-NEXT: movd %eax, %xmm9 +; SSE2-NEXT: pextrw $1, %xmm2, %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: pextrw $7, %xmm1, %eax +; SSE2-NEXT: movd %eax, %xmm10 +; SSE2-NEXT: pextrw $5, %xmm1, %eax +; SSE2-NEXT: movd %eax, %xmm11 +; SSE2-NEXT: pextrw $3, %xmm1, %eax +; SSE2-NEXT: movd %eax, %xmm12 +; SSE2-NEXT: pextrw $1, %xmm1, %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pextrw $7, %xmm0, %eax +; SSE2-NEXT: movd %eax, %xmm13 +; SSE2-NEXT: pextrw $5, %xmm0, %eax +; SSE2-NEXT: movd %eax, %xmm14 +; SSE2-NEXT: pextrw $3, %xmm0, %eax +; SSE2-NEXT: movd %eax, %xmm15 +; SSE2-NEXT: pextrw $1, %xmm0, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3],xmm3[4],xmm6[4],xmm3[5],xmm6[5],xmm3[6],xmm6[6],xmm3[7],xmm6[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3],xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm9[0],xmm2[1],xmm9[1],xmm2[2],xmm9[2],xmm2[3],xmm9[3],xmm2[4],xmm9[4],xmm2[5],xmm9[5],xmm2[6],xmm9[6],xmm2[7],xmm9[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm11 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm12[0],xmm1[1],xmm12[1],xmm1[2],xmm12[2],xmm1[3],xmm12[3],xmm1[4],xmm12[4],xmm1[5],xmm12[5],xmm1[6],xmm12[6],xmm1[7],xmm12[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm11[0],xmm1[1],xmm11[1],xmm1[2],xmm11[2],xmm1[3],xmm11[3] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm14 = xmm14[0],xmm13[0],xmm14[1],xmm13[1],xmm14[2],xmm13[2],xmm14[3],xmm13[3],xmm14[4],xmm13[4],xmm14[5],xmm13[5],xmm14[6],xmm13[6],xmm14[7],xmm13[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm15[0],xmm0[1],xmm15[1],xmm0[2],xmm15[2],xmm0[3],xmm15[3],xmm0[4],xmm15[4],xmm0[5],xmm15[5],xmm0[6],xmm15[6],xmm0[7],xmm15[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm14[0],xmm0[1],xmm14[1],xmm0[2],xmm14[2],xmm0[3],xmm14[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: retq +; +; SSE42-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; SSE42: # %bb.0: +; SSE42-NEXT: pushq %rbp +; SSE42-NEXT: pushq %r14 +; SSE42-NEXT: pushq %rbx +; SSE42-NEXT: pextrw $7, %xmm3, %eax +; SSE42-NEXT: pextrw $5, %xmm3, %ecx +; SSE42-NEXT: pextrw $3, %xmm3, %edx +; SSE42-NEXT: pextrw $1, %xmm3, %esi +; SSE42-NEXT: pextrw $7, %xmm2, %edi +; SSE42-NEXT: pextrw $5, %xmm2, %r8d +; SSE42-NEXT: pextrw $3, %xmm2, %r9d +; SSE42-NEXT: pextrw $1, %xmm2, %r10d +; SSE42-NEXT: pextrw $7, %xmm1, %r11d +; SSE42-NEXT: pextrw $5, %xmm1, %ebx +; SSE42-NEXT: pextrw $3, %xmm1, %ebp +; SSE42-NEXT: pextrw $1, %xmm1, %r14d +; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u] +; SSE42-NEXT: pinsrb $4, %r14d, %xmm0 +; SSE42-NEXT: pinsrb $5, %ebp, %xmm0 +; SSE42-NEXT: pinsrb $6, %ebx, %xmm0 +; SSE42-NEXT: pinsrb $7, %r11d, %xmm0 +; SSE42-NEXT: pinsrb $8, %r10d, %xmm0 +; SSE42-NEXT: pinsrb $9, %r9d, %xmm0 +; SSE42-NEXT: pinsrb $10, %r8d, %xmm0 +; SSE42-NEXT: pinsrb $11, %edi, %xmm0 +; SSE42-NEXT: pinsrb $12, %esi, %xmm0 +; SSE42-NEXT: pinsrb $13, %edx, %xmm0 +; SSE42-NEXT: pinsrb $14, %ecx, %xmm0 +; SSE42-NEXT: pinsrb $15, %eax, %xmm0 +; SSE42-NEXT: popq %rbx +; SSE42-NEXT: popq %r14 +; SSE42-NEXT: popq %rbp +; SSE42-NEXT: retq +; +; AVX1-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; AVX1: # %bb.0: +; AVX1-NEXT: pushq %rbp +; AVX1-NEXT: pushq %r14 +; AVX1-NEXT: pushq %rbx +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrw $7, %xmm2, %eax +; AVX1-NEXT: vpextrw $5, %xmm2, %ecx +; AVX1-NEXT: vpextrw $3, %xmm2, %edx +; AVX1-NEXT: vpextrw $1, %xmm2, %esi +; AVX1-NEXT: vpextrw $7, %xmm1, %edi +; AVX1-NEXT: vpextrw $5, %xmm1, %r8d +; AVX1-NEXT: vpextrw $3, %xmm1, %r9d +; AVX1-NEXT: vpextrw $1, %xmm1, %r10d +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpextrw $7, %xmm1, %r11d +; AVX1-NEXT: vpextrw $5, %xmm1, %ebx +; AVX1-NEXT: vpextrw $3, %xmm1, %ebp +; AVX1-NEXT: vpextrw $1, %xmm1, %r14d +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX1-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX1-NEXT: popq %rbx +; AVX1-NEXT: popq %r14 +; AVX1-NEXT: popq %rbp +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; AVX2: # %bb.0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrw $7, %xmm2, %eax +; AVX2-NEXT: vpextrw $5, %xmm2, %ecx +; AVX2-NEXT: vpextrw $3, %xmm2, %edx +; AVX2-NEXT: vpextrw $1, %xmm2, %esi +; AVX2-NEXT: vpextrw $7, %xmm1, %edi +; AVX2-NEXT: vpextrw $5, %xmm1, %r8d +; AVX2-NEXT: vpextrw $3, %xmm1, %r9d +; AVX2-NEXT: vpextrw $1, %xmm1, %r10d +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpextrw $7, %xmm1, %r11d +; AVX2-NEXT: vpextrw $5, %xmm1, %ebx +; AVX2-NEXT: vpextrw $3, %xmm1, %ebp +; AVX2-NEXT: vpextrw $1, %xmm1, %r14d +; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: pushq %rbp +; AVX512F-NEXT: pushq %r14 +; AVX512F-NEXT: pushq %rbx +; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; AVX512F-NEXT: vpextrw $7, %xmm1, %eax +; AVX512F-NEXT: vpextrw $5, %xmm1, %ecx +; AVX512F-NEXT: vpextrw $3, %xmm1, %edx +; AVX512F-NEXT: vpextrw $1, %xmm1, %esi +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; AVX512F-NEXT: vpextrw $7, %xmm1, %edi +; AVX512F-NEXT: vpextrw $5, %xmm1, %r8d +; AVX512F-NEXT: vpextrw $3, %xmm1, %r9d +; AVX512F-NEXT: vpextrw $1, %xmm1, %r10d +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpextrw $7, %xmm1, %r11d +; AVX512F-NEXT: vpextrw $5, %xmm1, %ebx +; AVX512F-NEXT: vpextrw $3, %xmm1, %ebp +; AVX512F-NEXT: vpextrw $1, %xmm1, %r14d +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: popq %rbx +; AVX512F-NEXT: popq %r14 +; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: pushq %rbp +; AVX512VL-NEXT: pushq %r14 +; AVX512VL-NEXT: pushq %rbx +; AVX512VL-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; AVX512VL-NEXT: vpextrw $7, %xmm1, %eax +; AVX512VL-NEXT: vpextrw $5, %xmm1, %ecx +; AVX512VL-NEXT: vpextrw $3, %xmm1, %edx +; AVX512VL-NEXT: vpextrw $1, %xmm1, %esi +; AVX512VL-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; AVX512VL-NEXT: vpextrw $7, %xmm1, %edi +; AVX512VL-NEXT: vpextrw $5, %xmm1, %r8d +; AVX512VL-NEXT: vpextrw $3, %xmm1, %r9d +; AVX512VL-NEXT: vpextrw $1, %xmm1, %r10d +; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512VL-NEXT: vpextrw $7, %xmm1, %r11d +; AVX512VL-NEXT: vpextrw $5, %xmm1, %ebx +; AVX512VL-NEXT: vpextrw $3, %xmm1, %ebp +; AVX512VL-NEXT: vpextrw $1, %xmm1, %r14d +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512VL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512VL-NEXT: popq %rbx +; AVX512VL-NEXT: popq %r14 +; AVX512VL-NEXT: popq %rbp +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: pushq %rbp +; AVX512BW-NEXT: pushq %r14 +; AVX512BW-NEXT: pushq %rbx +; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; AVX512BW-NEXT: vpextrw $7, %xmm1, %eax +; AVX512BW-NEXT: vpextrw $5, %xmm1, %ecx +; AVX512BW-NEXT: vpextrw $3, %xmm1, %edx +; AVX512BW-NEXT: vpextrw $1, %xmm1, %esi +; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; AVX512BW-NEXT: vpextrw $7, %xmm1, %edi +; AVX512BW-NEXT: vpextrw $5, %xmm1, %r8d +; AVX512BW-NEXT: vpextrw $3, %xmm1, %r9d +; AVX512BW-NEXT: vpextrw $1, %xmm1, %r10d +; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BW-NEXT: vpextrw $7, %xmm1, %r11d +; AVX512BW-NEXT: vpextrw $5, %xmm1, %ebx +; AVX512BW-NEXT: vpextrw $3, %xmm1, %ebp +; AVX512BW-NEXT: vpextrw $1, %xmm1, %r14d +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512BW-NEXT: popq %rbx +; AVX512BW-NEXT: popq %r14 +; AVX512BW-NEXT: popq %rbp +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-ONLY-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512BWVL-ONLY: # %bb.0: +; AVX512BWVL-ONLY-NEXT: pushq %rbp +; AVX512BWVL-ONLY-NEXT: pushq %r14 +; AVX512BWVL-ONLY-NEXT: pushq %rbx +; AVX512BWVL-ONLY-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; AVX512BWVL-ONLY-NEXT: vpextrw $7, %xmm1, %eax +; AVX512BWVL-ONLY-NEXT: vpextrw $5, %xmm1, %ecx +; AVX512BWVL-ONLY-NEXT: vpextrw $3, %xmm1, %edx +; AVX512BWVL-ONLY-NEXT: vpextrw $1, %xmm1, %esi +; AVX512BWVL-ONLY-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; AVX512BWVL-ONLY-NEXT: vpextrw $7, %xmm1, %edi +; AVX512BWVL-ONLY-NEXT: vpextrw $5, %xmm1, %r8d +; AVX512BWVL-ONLY-NEXT: vpextrw $3, %xmm1, %r9d +; AVX512BWVL-ONLY-NEXT: vpextrw $1, %xmm1, %r10d +; AVX512BWVL-ONLY-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512BWVL-ONLY-NEXT: vpextrw $7, %xmm1, %r11d +; AVX512BWVL-ONLY-NEXT: vpextrw $5, %xmm1, %ebx +; AVX512BWVL-ONLY-NEXT: vpextrw $3, %xmm1, %ebp +; AVX512BWVL-ONLY-NEXT: vpextrw $1, %xmm1, %r14d +; AVX512BWVL-ONLY-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-ONLY-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $5, %ebp, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $6, %ebx, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $7, %r11d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $9, %r9d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $10, %r8d, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $12, %esi, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512BWVL-ONLY-NEXT: popq %rbx +; AVX512BWVL-ONLY-NEXT: popq %r14 +; AVX512BWVL-ONLY-NEXT: popq %rbp +; AVX512BWVL-ONLY-NEXT: vzeroupper +; AVX512BWVL-ONLY-NEXT: retq +; +; AVX512VBMI-FAST-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512VBMI-FAST: # %bb.0: +; AVX512VBMI-FAST-NEXT: vmovdqa {{.*#+}} xmm1 = [2,6,10,14,18,22,26,30,34,38,42,46,50,54,58,62] +; AVX512VBMI-FAST-NEXT: vpermb %zmm0, %zmm1, %zmm0 +; AVX512VBMI-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512VBMI-FAST-NEXT: vzeroupper +; AVX512VBMI-FAST-NEXT: retq +; +; AVX512VBMI-SLOW-LABEL: oddelts_v32i16_trunc_v16i16_to_v16i8: +; AVX512VBMI-SLOW: # %bb.0: +; AVX512VBMI-SLOW-NEXT: vmovdqa {{.*#+}} xmm1 = [2,6,10,14,18,22,26,30,34,38,42,46,50,u,u,u] +; AVX512VBMI-SLOW-NEXT: vpermb %zmm0, %zmm1, %zmm1 +; AVX512VBMI-SLOW-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512VBMI-SLOW-NEXT: vpextrw $7, %xmm0, %eax +; AVX512VBMI-SLOW-NEXT: vpextrw $5, %xmm0, %ecx +; AVX512VBMI-SLOW-NEXT: vpextrw $3, %xmm0, %edx +; AVX512VBMI-SLOW-NEXT: vpinsrb $13, %edx, %xmm1, %xmm0 +; AVX512VBMI-SLOW-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; AVX512VBMI-SLOW-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512VBMI-SLOW-NEXT: vzeroupper +; AVX512VBMI-SLOW-NEXT: retq + %n0 = shufflevector <32 x i16> %n2, <32 x i16> poison, <16 x i32> + %n1 = trunc <16 x i16> %n0 to <16 x i8> + ret <16 x i8> %n1 +} diff --git a/llvm/test/CodeGen/X86/strict-fadd-combines.ll b/llvm/test/CodeGen/X86/strict-fadd-combines.ll index e0c61ac8d395e..14944fab7d00d 100644 --- a/llvm/test/CodeGen/X86/strict-fadd-combines.ll +++ b/llvm/test/CodeGen/X86/strict-fadd-combines.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64 -define float @fneg_strict_fadd_to_strict_fsub(float %x, float %y) nounwind { +define float @fneg_strict_fadd_to_strict_fsub(float %x, float %y) nounwind strictfp { ; X86-LABEL: fneg_strict_fadd_to_strict_fsub: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -10,6 +10,7 @@ define float @fneg_strict_fadd_to_strict_fsub(float %x, float %y) nounwind { ; X86-NEXT: subss {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movss %xmm0, (%esp) ; X86-NEXT: flds (%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: retl ; @@ -22,7 +23,7 @@ define float @fneg_strict_fadd_to_strict_fsub(float %x, float %y) nounwind { ret float %add } -define float @fneg_strict_fadd_to_strict_fsub_2(float %x, float %y) nounwind { +define float @fneg_strict_fadd_to_strict_fsub_2(float %x, float %y) nounwind strictfp { ; X86-LABEL: fneg_strict_fadd_to_strict_fsub_2: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -30,6 +31,7 @@ define float @fneg_strict_fadd_to_strict_fsub_2(float %x, float %y) nounwind { ; X86-NEXT: subss {{[0-9]+}}(%esp), %xmm0 ; X86-NEXT: movss %xmm0, (%esp) ; X86-NEXT: flds (%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: retl ; @@ -42,7 +44,7 @@ define float @fneg_strict_fadd_to_strict_fsub_2(float %x, float %y) nounwind { ret float %add } -define double @fneg_strict_fadd_to_strict_fsub_d(double %x, double %y) nounwind { +define double @fneg_strict_fadd_to_strict_fsub_d(double %x, double %y) nounwind strictfp { ; X86-LABEL: fneg_strict_fadd_to_strict_fsub_d: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -53,6 +55,7 @@ define double @fneg_strict_fadd_to_strict_fsub_d(double %x, double %y) nounwind ; X86-NEXT: subsd 16(%ebp), %xmm0 ; X86-NEXT: movsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) +; X86-NEXT: wait ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl @@ -66,7 +69,7 @@ define double @fneg_strict_fadd_to_strict_fsub_d(double %x, double %y) nounwind ret double %add } -define double @fneg_strict_fadd_to_strict_fsub_2d(double %x, double %y) nounwind { +define double @fneg_strict_fadd_to_strict_fsub_2d(double %x, double %y) nounwind strictfp { ; X86-LABEL: fneg_strict_fadd_to_strict_fsub_2d: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -77,6 +80,7 @@ define double @fneg_strict_fadd_to_strict_fsub_2d(double %x, double %y) nounwind ; X86-NEXT: subsd 16(%ebp), %xmm0 ; X86-NEXT: movsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) +; X86-NEXT: wait ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/strict-fsub-combines.ll b/llvm/test/CodeGen/X86/strict-fsub-combines.ll index 8cb591a980e11..774ea02ccd87a 100644 --- a/llvm/test/CodeGen/X86/strict-fsub-combines.ll +++ b/llvm/test/CodeGen/X86/strict-fsub-combines.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64 ; FIXME: Missing fsub(x,fneg(y)) -> fadd(x,y) fold -define float @fneg_strict_fsub_to_strict_fadd(float %x, float %y) nounwind { +define float @fneg_strict_fsub_to_strict_fadd(float %x, float %y) nounwind strictfp { ; X86-LABEL: fneg_strict_fsub_to_strict_fadd: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -13,6 +13,7 @@ define float @fneg_strict_fsub_to_strict_fadd(float %x, float %y) nounwind { ; X86-NEXT: subss %xmm1, %xmm0 ; X86-NEXT: movss %xmm0, (%esp) ; X86-NEXT: flds (%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: retl ; @@ -27,7 +28,7 @@ define float @fneg_strict_fsub_to_strict_fadd(float %x, float %y) nounwind { } ; FIXME: Missing fsub(x,fneg(y)) -> fadd(x,y) fold -define double @fneg_strict_fsub_to_strict_fadd_d(double %x, double %y) nounwind { +define double @fneg_strict_fsub_to_strict_fadd_d(double %x, double %y) nounwind strictfp { ; X86-LABEL: fneg_strict_fsub_to_strict_fadd_d: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -40,6 +41,7 @@ define double @fneg_strict_fsub_to_strict_fadd_d(double %x, double %y) nounwind ; X86-NEXT: subsd %xmm1, %xmm0 ; X86-NEXT: movsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) +; X86-NEXT: wait ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl @@ -55,7 +57,7 @@ define double @fneg_strict_fsub_to_strict_fadd_d(double %x, double %y) nounwind } ; FIXME: Missing fneg(fsub(x,y)) -> fsub(y,x) fold -define float @strict_fsub_fneg_to_strict_fsub(float %x, float %y) nounwind { +define float @strict_fsub_fneg_to_strict_fsub(float %x, float %y) nounwind strictfp { ; X86-LABEL: strict_fsub_fneg_to_strict_fsub: ; X86: # %bb.0: ; X86-NEXT: pushl %eax @@ -64,6 +66,7 @@ define float @strict_fsub_fneg_to_strict_fsub(float %x, float %y) nounwind { ; X86-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-NEXT: movss %xmm0, (%esp) ; X86-NEXT: flds (%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: retl ; @@ -78,7 +81,7 @@ define float @strict_fsub_fneg_to_strict_fsub(float %x, float %y) nounwind { } ; FIXME: Missing fneg(fsub(x,y)) -> fsub(y,x) fold -define double @strict_fsub_fneg_to_strict_fsub_d(double %x, double %y) nounwind { +define double @strict_fsub_fneg_to_strict_fsub_d(double %x, double %y) nounwind strictfp { ; X86-LABEL: strict_fsub_fneg_to_strict_fsub_d: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp @@ -90,6 +93,7 @@ define double @strict_fsub_fneg_to_strict_fsub_d(double %x, double %y) nounwind ; X86-NEXT: xorpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-NEXT: movlpd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) +; X86-NEXT: wait ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/uint_to_fp.ll b/llvm/test/CodeGen/X86/uint_to_fp.ll index d8e0b61ed199f..8c8cbb151974d 100644 --- a/llvm/test/CodeGen/X86/uint_to_fp.ll +++ b/llvm/test/CodeGen/X86/uint_to_fp.ll @@ -25,3 +25,57 @@ entry: store float %1, ptr %y ret void } + +define float @test_without_nneg(i32 %x) nounwind { +; X86-LABEL: test_without_nneg: +; X86: ## %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: cvtsd2ss %xmm0, %xmm0 +; X86-NEXT: movss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: test_without_nneg: +; X64: ## %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: cvtsi2ss %rax, %xmm0 +; X64-NEXT: retq + %r = uitofp i32 %x to float + ret float %r +} + +define float @test_with_nneg(i32 %x) nounwind { +; X86-LABEL: test_with_nneg: +; X86: ## %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: movss %xmm0, (%esp) +; X86-NEXT: flds (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: retl +; +; X64-LABEL: test_with_nneg: +; X64: ## %bb.0: +; X64-NEXT: cvtsi2ss %edi, %xmm0 +; X64-NEXT: retq + %r = uitofp nneg i32 %x to float + ret float %r +} + +define <4 x float> @test_with_nneg_vec(<4 x i32> %x) nounwind { +; X86-LABEL: test_with_nneg_vec: +; X86: ## %bb.0: +; X86-NEXT: cvtdq2ps %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test_with_nneg_vec: +; X64: ## %bb.0: +; X64-NEXT: cvtdq2ps %xmm0, %xmm0 +; X64-NEXT: retq + %r = uitofp nneg <4 x i32> %x to <4 x float> + ret <4 x float> %r +} diff --git a/llvm/test/CodeGen/X86/vec_cast.ll b/llvm/test/CodeGen/X86/vec_cast.ll index 0a6bc2f59b685..e0089354cc953 100644 --- a/llvm/test/CodeGen/X86/vec_cast.ll +++ b/llvm/test/CodeGen/X86/vec_cast.ll @@ -156,7 +156,7 @@ define <3 x i16> @h(<3 x i32> %a) nounwind { ; CHECK-WIN-LABEL: h: ; CHECK-WIN: # %bb.0: ; CHECK-WIN-NEXT: movdqa (%rcx), %xmm0 -; CHECK-WIN-NEXT: movd %xmm0, %eax +; CHECK-WIN-NEXT: movl (%rcx), %eax ; CHECK-WIN-NEXT: pextrw $2, %xmm0, %edx ; CHECK-WIN-NEXT: pextrw $4, %xmm0, %ecx ; CHECK-WIN-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index 563cf01650134..a360cf8ca83d0 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -5025,16 +5025,14 @@ define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind { ; F16C-LABEL: fptosi_4f16_to_4i32: ; F16C: # %bb.0: ; F16C-NEXT: vcvtph2ps %xmm0, %ymm0 -; F16C-NEXT: vcvttps2dq %ymm0, %ymm0 -; F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; F16C-NEXT: vcvttps2dq %xmm0, %xmm0 ; F16C-NEXT: vzeroupper ; F16C-NEXT: retq ; ; AVX512-LABEL: fptosi_4f16_to_4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0 -; AVX512-NEXT: vcvttps2dq %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cvt = fptosi <4 x half> %a to <4 x i32> diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll index 68b180ef52565..f054c7edfff16 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i16-stride-4.ll @@ -1589,104 +1589,81 @@ define void @store_i16_stride4_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; ; AVX512-LABEL: store_i16_stride4_vf32: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa (%rcx), %xmm0 -; AVX512-NEXT: vmovdqa 16(%rcx), %xmm2 -; AVX512-NEXT: vmovdqa 32(%rcx), %xmm9 -; AVX512-NEXT: vmovdqa 48(%rcx), %xmm5 -; AVX512-NEXT: vmovdqa (%rdx), %xmm1 -; AVX512-NEXT: vmovdqa 16(%rdx), %xmm3 -; AVX512-NEXT: vmovdqa 32(%rdx), %xmm10 -; AVX512-NEXT: vmovdqa 48(%rdx), %xmm6 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm7 = xmm4[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm7, %ymm4 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 -; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm13 -; AVX512-NEXT: vmovdqa (%rsi), %xmm2 -; AVX512-NEXT: vmovdqa 16(%rsi), %xmm4 -; AVX512-NEXT: vmovdqa 32(%rsi), %xmm11 -; AVX512-NEXT: vmovdqa 48(%rsi), %xmm7 -; AVX512-NEXT: vmovdqa (%rdi), %xmm3 -; AVX512-NEXT: vmovdqa 16(%rdi), %xmm14 -; AVX512-NEXT: vmovdqa 32(%rdi), %xmm12 -; AVX512-NEXT: vmovdqa 48(%rdi), %xmm8 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm14[4],xmm4[4],xmm14[5],xmm4[5],xmm14[6],xmm4[6],xmm14[7],xmm4[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm15[0],zero,xmm15[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] -; AVX512-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm14[0],xmm4[0],xmm14[1],xmm4[1],xmm14[2],xmm4[2],xmm14[3],xmm4[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm14 = xmm4[0],zero,xmm4[1],zero +; AVX512-NEXT: vmovdqa (%rsi), %xmm0 +; AVX512-NEXT: vmovdqa 16(%rsi), %xmm1 +; AVX512-NEXT: vmovdqa 32(%rsi), %xmm10 +; AVX512-NEXT: vmovdqa 48(%rsi), %xmm6 +; AVX512-NEXT: vmovdqa (%rdi), %xmm2 +; AVX512-NEXT: vmovdqa 16(%rdi), %xmm3 +; AVX512-NEXT: vmovdqa 32(%rdi), %xmm11 +; AVX512-NEXT: vmovdqa 48(%rdi), %xmm7 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm14, %ymm4 -; AVX512-NEXT: vinserti64x4 $1, %ymm15, %zmm4, %zmm4 +; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1 +; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 +; AVX512-NEXT: vmovdqa (%rcx), %xmm3 +; AVX512-NEXT: vmovdqa 16(%rcx), %xmm5 +; AVX512-NEXT: vmovdqa 32(%rcx), %xmm12 +; AVX512-NEXT: vmovdqa 48(%rcx), %xmm8 +; AVX512-NEXT: vmovdqa (%rdx), %xmm4 +; AVX512-NEXT: vmovdqa 16(%rdx), %xmm13 +; AVX512-NEXT: vmovdqa 32(%rdx), %xmm14 +; AVX512-NEXT: vmovdqa 48(%rdx), %xmm9 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm13[4],xmm5[4],xmm13[5],xmm5[5],xmm13[6],xmm5[6],xmm13[7],xmm5[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm13 = xmm13[0],xmm5[0],xmm13[1],xmm5[1],xmm13[2],xmm5[2],xmm13[3],xmm5[3] +; AVX512-NEXT: vpmovsxbd {{.*#+}} zmm5 = [0,0,1,1,2,2,3,3,16,16,17,17,18,18,19,19] +; AVX512-NEXT: vpermt2d %zmm15, %zmm5, %zmm13 ; AVX512-NEXT: movw $-21846, %ax # imm = 0xAAAA ; AVX512-NEXT: kmovw %eax, %k1 -; AVX512-NEXT: vmovdqa32 %zmm13, %zmm4 {%k1} -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm13 = xmm10[4],xmm9[4],xmm10[5],xmm9[5],xmm10[6],xmm9[6],xmm10[7],xmm9[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm14 = xmm13[0,0,1,1] +; AVX512-NEXT: vmovdqa32 %zmm13, %zmm1 {%k1} +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm13 = xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm15 = xmm13[0],zero,xmm13[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm13, %ymm14, %ymm13 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm9 = xmm10[0],xmm9[0],xmm10[1],xmm9[1],xmm10[2],xmm9[2],xmm10[3],xmm9[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm10 = xmm9[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm10, %ymm9 -; AVX512-NEXT: vinserti64x4 $1, %ymm13, %zmm9, %zmm9 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm10 = xmm12[4],xmm11[4],xmm12[5],xmm11[5],xmm12[6],xmm11[6],xmm12[7],xmm11[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm13 = xmm10[0],zero,xmm10[1],zero +; AVX512-NEXT: vinserti128 $1, %xmm13, %ymm15, %ymm13 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm10 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm11 = xmm10[0],zero,xmm10[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm10 = xmm10[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm10, %ymm13, %ymm10 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm11 = xmm12[0],xmm11[0],xmm12[1],xmm11[1],xmm12[2],xmm11[2],xmm12[3],xmm11[3] +; AVX512-NEXT: vinserti128 $1, %xmm10, %ymm11, %ymm10 +; AVX512-NEXT: vinserti64x4 $1, %ymm13, %zmm10, %zmm10 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm11 = xmm14[4],xmm12[4],xmm14[5],xmm12[5],xmm14[6],xmm12[6],xmm14[7],xmm12[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm12 = xmm14[0],xmm12[0],xmm14[1],xmm12[1],xmm14[2],xmm12[2],xmm14[3],xmm12[3] +; AVX512-NEXT: vpermt2d %zmm11, %zmm5, %zmm12 +; AVX512-NEXT: vmovdqa32 %zmm12, %zmm10 {%k1} +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm11 = xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7] ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm12 = xmm11[0],zero,xmm11[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm11 = xmm11[2,2,3,3] ; AVX512-NEXT: vinserti128 $1, %xmm11, %ymm12, %ymm11 -; AVX512-NEXT: vinserti64x4 $1, %ymm10, %zmm11, %zmm10 -; AVX512-NEXT: vmovdqa32 %zmm9, %zmm10 {%k1} -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm9 = xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm11 = xmm9[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm11, %ymm9 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm6 = xmm5[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm5 -; AVX512-NEXT: vinserti64x4 $1, %ymm9, %zmm5, %zmm5 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm9 = xmm6[0],zero,xmm6[1],zero +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm7 = xmm6[0],zero,xmm6[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm6, %ymm9, %ymm6 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm7 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3] +; AVX512-NEXT: vinserti128 $1, %xmm6, %ymm7, %ymm6 +; AVX512-NEXT: vinserti64x4 $1, %ymm11, %zmm6, %zmm6 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm7 = xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3] +; AVX512-NEXT: vpermt2d %zmm7, %zmm5, %zmm8 +; AVX512-NEXT: vmovdqa32 %zmm8, %zmm6 {%k1} +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm7 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm8 = xmm7[0],zero,xmm7[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[2,2,3,3] ; AVX512-NEXT: vinserti128 $1, %xmm7, %ymm8, %ymm7 -; AVX512-NEXT: vinserti64x4 $1, %ymm6, %zmm7, %zmm6 -; AVX512-NEXT: vmovdqa32 %zmm5, %zmm6 {%k1} -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm7 = xmm5[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm5 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm5, %zmm0, %zmm0 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm1[0],zero,xmm1[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm5, %ymm1 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 -; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; AVX512-NEXT: vmovdqa64 %zmm1, (%r8) +; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm0 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; AVX512-NEXT: vpermt2d %zmm2, %zmm5, %zmm3 +; AVX512-NEXT: vmovdqa32 %zmm3, %zmm0 {%k1} +; AVX512-NEXT: vmovdqa64 %zmm0, (%r8) ; AVX512-NEXT: vmovdqa64 %zmm6, 192(%r8) ; AVX512-NEXT: vmovdqa64 %zmm10, 128(%r8) -; AVX512-NEXT: vmovdqa64 %zmm4, 64(%r8) +; AVX512-NEXT: vmovdqa64 %zmm1, 64(%r8) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq ; @@ -1772,100 +1749,81 @@ define void @store_i16_stride4_vf32(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; ; AVX512DQ-LABEL: store_i16_stride4_vf32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa (%rsi), %xmm1 -; AVX512DQ-NEXT: vmovdqa 16(%rsi), %xmm0 -; AVX512DQ-NEXT: vmovdqa 32(%rsi), %xmm7 -; AVX512DQ-NEXT: vmovdqa 48(%rsi), %xmm5 +; AVX512DQ-NEXT: vmovdqa (%rsi), %xmm0 +; AVX512DQ-NEXT: vmovdqa 16(%rsi), %xmm1 +; AVX512DQ-NEXT: vmovdqa 32(%rsi), %xmm10 +; AVX512DQ-NEXT: vmovdqa 48(%rsi), %xmm6 ; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm2 ; AVX512DQ-NEXT: vmovdqa 16(%rdi), %xmm3 -; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm10 -; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm6 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm8 = xmm4[0],zero,xmm4[1],zero +; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm11 +; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm7 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm8, %ymm4 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm1, %zmm1 ; AVX512DQ-NEXT: vmovdqa (%rcx), %xmm3 -; AVX512DQ-NEXT: vmovdqa 16(%rcx), %xmm13 -; AVX512DQ-NEXT: vmovdqa 32(%rcx), %xmm11 +; AVX512DQ-NEXT: vmovdqa 16(%rcx), %xmm5 +; AVX512DQ-NEXT: vmovdqa 32(%rcx), %xmm12 ; AVX512DQ-NEXT: vmovdqa 48(%rcx), %xmm8 ; AVX512DQ-NEXT: vmovdqa (%rdx), %xmm4 -; AVX512DQ-NEXT: vmovdqa 16(%rdx), %xmm14 -; AVX512DQ-NEXT: vmovdqa 32(%rdx), %xmm12 +; AVX512DQ-NEXT: vmovdqa 16(%rdx), %xmm13 +; AVX512DQ-NEXT: vmovdqa 32(%rdx), %xmm14 ; AVX512DQ-NEXT: vmovdqa 48(%rdx), %xmm9 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm14[4],xmm13[4],xmm14[5],xmm13[5],xmm14[6],xmm13[6],xmm14[7],xmm13[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm16 = xmm15[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] -; AVX512DQ-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm13 = xmm14[0],xmm13[0],xmm14[1],xmm13[1],xmm14[2],xmm13[2],xmm14[3],xmm13[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm14 = xmm13[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm13, %ymm14, %ymm13 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm13[4],xmm5[4],xmm13[5],xmm5[5],xmm13[6],xmm5[6],xmm13[7],xmm5[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm13 = xmm13[0],xmm5[0],xmm13[1],xmm5[1],xmm13[2],xmm5[2],xmm13[3],xmm5[3] +; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm5 = [0,0,1,1,2,2,3,3,16,16,17,17,18,18,19,19] +; AVX512DQ-NEXT: vpermt2d %zmm15, %zmm5, %zmm13 ; AVX512DQ-NEXT: movw $-21846, %ax # imm = 0xAAAA ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm15, %zmm13, %zmm0 {%k1} -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm13 = xmm10[4],xmm7[4],xmm10[5],xmm7[5],xmm10[6],xmm7[6],xmm10[7],xmm7[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm14 = xmm13[0],zero,xmm13[1],zero +; AVX512DQ-NEXT: vmovdqa32 %zmm13, %zmm1 {%k1} +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm13 = xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm15 = xmm13[0],zero,xmm13[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm13, %ymm14, %ymm13 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm7 = xmm10[0],xmm7[0],xmm10[1],xmm7[1],xmm10[2],xmm7[2],xmm10[3],xmm7[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm10 = xmm7[0],zero,xmm7[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm7, %ymm10, %ymm7 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm13, %zmm7, %zmm7 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm10 = xmm12[4],xmm11[4],xmm12[5],xmm11[5],xmm12[6],xmm11[6],xmm12[7],xmm11[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm13 = xmm10[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm10 = xmm10[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm10, %ymm13, %ymm10 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm11 = xmm12[0],xmm11[0],xmm12[1],xmm11[1],xmm12[2],xmm11[2],xmm12[3],xmm11[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm12 = xmm11[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm11 = xmm11[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm11, %ymm12, %ymm11 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm10, %zmm11, %zmm7 {%k1} -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm10 = xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] +; AVX512DQ-NEXT: vinserti128 $1, %xmm13, %ymm15, %ymm13 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm10 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3] ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm11 = xmm10[0],zero,xmm10[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm10 = xmm10[2,2,3,3] ; AVX512DQ-NEXT: vinserti128 $1, %xmm10, %ymm11, %ymm10 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm5[0],zero,xmm5[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm10, %zmm5, %zmm5 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm10 = xmm6[0,0,1,1] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm13, %zmm10, %zmm10 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm11 = xmm14[4],xmm12[4],xmm14[5],xmm12[5],xmm14[6],xmm12[6],xmm14[7],xmm12[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm12 = xmm14[0],xmm12[0],xmm14[1],xmm12[1],xmm14[2],xmm12[2],xmm14[3],xmm12[3] +; AVX512DQ-NEXT: vpermt2d %zmm11, %zmm5, %zmm12 +; AVX512DQ-NEXT: vmovdqa32 %zmm12, %zmm10 {%k1} +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm11 = xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm12 = xmm11[0],zero,xmm11[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm11 = xmm11[2,2,3,3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm11, %ymm12, %ymm11 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm7 = xmm6[0],zero,xmm6[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm10, %ymm6 +; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm7, %ymm6 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm11, %zmm6, %zmm6 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm7 = xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7] ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm9 = xmm8[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm8 = xmm8[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm6, %zmm8, %zmm5 {%k1} -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm8 = xmm6[0],zero,xmm6[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm6 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm6, %zmm1, %zmm1 +; AVX512DQ-NEXT: vpermt2d %zmm7, %zmm5, %zmm8 +; AVX512DQ-NEXT: vmovdqa32 %zmm8, %zmm6 {%k1} +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm7 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm8 = xmm7[0],zero,xmm7[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[2,2,3,3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm7, %ymm8, %ymm7 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm7, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm6, %ymm2 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm1 {%k1} -; AVX512DQ-NEXT: vmovdqa64 %zmm1, (%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm5, 192(%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm7, 128(%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm0, 64(%r8) +; AVX512DQ-NEXT: vpermt2d %zmm2, %zmm5, %zmm3 +; AVX512DQ-NEXT: vmovdqa32 %zmm3, %zmm0 {%k1} +; AVX512DQ-NEXT: vmovdqa64 %zmm0, (%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm6, 192(%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm10, 128(%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm1, 64(%r8) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; @@ -3112,201 +3070,155 @@ define void @store_i16_stride4_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; ; AVX512-LABEL: store_i16_stride4_vf64: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa64 (%rcx), %xmm20 -; AVX512-NEXT: vmovdqa 16(%rcx), %xmm2 -; AVX512-NEXT: vmovdqa 32(%rcx), %xmm10 -; AVX512-NEXT: vmovdqa 48(%rcx), %xmm5 -; AVX512-NEXT: vmovdqa (%rdx), %xmm1 -; AVX512-NEXT: vmovdqa 16(%rdx), %xmm3 -; AVX512-NEXT: vmovdqa 32(%rdx), %xmm11 -; AVX512-NEXT: vmovdqa 48(%rdx), %xmm6 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm7 = xmm4[0,0,1,1] +; AVX512-NEXT: vmovdqa64 (%rsi), %xmm19 +; AVX512-NEXT: vmovdqa 16(%rsi), %xmm0 +; AVX512-NEXT: vmovdqa 32(%rsi), %xmm11 +; AVX512-NEXT: vmovdqa 48(%rsi), %xmm6 +; AVX512-NEXT: vmovdqa64 (%rdi), %xmm20 +; AVX512-NEXT: vmovdqa 16(%rdi), %xmm3 +; AVX512-NEXT: vmovdqa 32(%rdi), %xmm12 +; AVX512-NEXT: vmovdqa 48(%rdi), %xmm7 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm7, %ymm4 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 -; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm9 -; AVX512-NEXT: vmovdqa (%rsi), %xmm2 -; AVX512-NEXT: vmovdqa 16(%rsi), %xmm3 -; AVX512-NEXT: vmovdqa 32(%rsi), %xmm12 -; AVX512-NEXT: vmovdqa 48(%rsi), %xmm7 -; AVX512-NEXT: vmovdqa (%rdi), %xmm4 -; AVX512-NEXT: vmovdqa 16(%rdi), %xmm14 -; AVX512-NEXT: vmovdqa 32(%rdi), %xmm13 -; AVX512-NEXT: vmovdqa 48(%rdi), %xmm8 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm14[4],xmm3[4],xmm14[5],xmm3[5],xmm14[6],xmm3[6],xmm14[7],xmm3[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm15[0],zero,xmm15[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] -; AVX512-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm14[0],xmm3[0],xmm14[1],xmm3[1],xmm14[2],xmm3[2],xmm14[3],xmm3[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm14 = xmm3[0],zero,xmm3[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm14, %ymm3 -; AVX512-NEXT: vinserti64x4 $1, %ymm15, %zmm3, %zmm17 +; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] +; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm18 +; AVX512-NEXT: vmovdqa (%rcx), %xmm3 +; AVX512-NEXT: vmovdqa 16(%rcx), %xmm5 +; AVX512-NEXT: vmovdqa 32(%rcx), %xmm13 +; AVX512-NEXT: vmovdqa 48(%rcx), %xmm9 +; AVX512-NEXT: vmovdqa (%rdx), %xmm4 +; AVX512-NEXT: vmovdqa 16(%rdx), %xmm8 +; AVX512-NEXT: vmovdqa 32(%rdx), %xmm14 +; AVX512-NEXT: vmovdqa 48(%rdx), %xmm10 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm8[4],xmm5[4],xmm8[5],xmm5[5],xmm8[6],xmm5[6],xmm8[7],xmm5[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm5[0],xmm8[1],xmm5[1],xmm8[2],xmm5[2],xmm8[3],xmm5[3] +; AVX512-NEXT: vpmovsxbd {{.*#+}} zmm5 = [0,0,1,1,2,2,3,3,16,16,17,17,18,18,19,19] +; AVX512-NEXT: vpermt2d %zmm15, %zmm5, %zmm8 ; AVX512-NEXT: movw $-21846, %ax # imm = 0xAAAA ; AVX512-NEXT: kmovw %eax, %k1 -; AVX512-NEXT: vmovdqa32 %zmm9, %zmm17 {%k1} -; AVX512-NEXT: vmovdqa 96(%rcx), %xmm9 -; AVX512-NEXT: vmovdqa 96(%rdx), %xmm14 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm14[4],xmm9[4],xmm14[5],xmm9[5],xmm14[6],xmm9[6],xmm14[7],xmm9[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm16 = xmm15[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] -; AVX512-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm9 = xmm14[0],xmm9[0],xmm14[1],xmm9[1],xmm14[2],xmm9[2],xmm14[3],xmm9[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm14 = xmm9[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm14, %ymm9 -; AVX512-NEXT: vinserti64x4 $1, %ymm15, %zmm9, %zmm14 -; AVX512-NEXT: vmovdqa 96(%rsi), %xmm9 +; AVX512-NEXT: vmovdqa32 %zmm8, %zmm18 {%k1} +; AVX512-NEXT: vmovdqa 96(%rsi), %xmm8 ; AVX512-NEXT: vmovdqa 96(%rdi), %xmm15 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm15[4],xmm9[4],xmm15[5],xmm9[5],xmm15[6],xmm9[6],xmm15[7],xmm9[7] +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7] ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] ; AVX512-NEXT: vinserti32x4 $1, %xmm0, %ymm16, %ymm0 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm9 = xmm15[0],xmm9[0],xmm15[1],xmm9[1],xmm15[2],xmm9[2],xmm15[3],xmm9[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm15 = xmm9[0],zero,xmm9[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm15, %ymm9 -; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm9, %zmm18 -; AVX512-NEXT: vmovdqa32 %zmm14, %zmm18 {%k1} -; AVX512-NEXT: vmovdqa 112(%rcx), %xmm0 -; AVX512-NEXT: vmovdqa 112(%rdx), %xmm14 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm14[4],xmm0[4],xmm14[5],xmm0[5],xmm14[6],xmm0[6],xmm14[7],xmm0[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm16 = xmm15[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] -; AVX512-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm14[0],xmm0[0],xmm14[1],xmm0[1],xmm14[2],xmm0[2],xmm14[3],xmm0[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm14 = xmm0[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm14, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm15, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa 112(%rsi), %xmm14 -; AVX512-NEXT: vmovdqa 112(%rdi), %xmm15 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm3[0],zero,xmm3[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] -; AVX512-NEXT: vinserti32x4 $1, %xmm3, %ymm16, %ymm3 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm14 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm15 = xmm14[0],zero,xmm14[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm14 = xmm14[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm14, %ymm15, %ymm14 -; AVX512-NEXT: vinserti64x4 $1, %ymm3, %zmm14, %zmm19 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm19 {%k1} -; AVX512-NEXT: vmovdqa 64(%rcx), %xmm0 -; AVX512-NEXT: vmovdqa 64(%rdx), %xmm3 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm16 = xmm15[0,0,1,1] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm15 = xmm8[0],zero,xmm8[1],zero +; AVX512-NEXT: vpshufd {{.*#+}} xmm8 = xmm8[2,2,3,3] +; AVX512-NEXT: vinserti128 $1, %xmm8, %ymm15, %ymm8 +; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm8 +; AVX512-NEXT: vmovdqa 96(%rcx), %xmm0 +; AVX512-NEXT: vmovdqa 96(%rdx), %xmm15 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3] +; AVX512-NEXT: vpermt2d %zmm1, %zmm5, %zmm0 +; AVX512-NEXT: vmovdqa32 %zmm0, %zmm8 {%k1} +; AVX512-NEXT: vmovdqa 112(%rsi), %xmm0 +; AVX512-NEXT: vmovdqa 112(%rdi), %xmm1 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm15[0],zero,xmm15[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] ; AVX512-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[0,0,1,1] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm15, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa 64(%rsi), %xmm3 -; AVX512-NEXT: vmovdqa 64(%rdi), %xmm15 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm9 = xmm15[4],xmm3[4],xmm15[5],xmm3[5],xmm15[6],xmm3[6],xmm15[7],xmm3[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm9[0],zero,xmm9[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[2,2,3,3] -; AVX512-NEXT: vinserti32x4 $1, %xmm9, %ymm16, %ymm9 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm15[0],xmm3[0],xmm15[1],xmm3[1],xmm15[2],xmm3[2],xmm15[3],xmm3[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm15 = xmm3[0],zero,xmm3[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm15, %ymm3 -; AVX512-NEXT: vinserti64x4 $1, %ymm9, %zmm3, %zmm15 +; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm15, %zmm0, %zmm15 +; AVX512-NEXT: vmovdqa 112(%rcx), %xmm0 +; AVX512-NEXT: vmovdqa 112(%rdx), %xmm1 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512-NEXT: vpermt2d %zmm2, %zmm5, %zmm0 ; AVX512-NEXT: vmovdqa32 %zmm0, %zmm15 {%k1} -; AVX512-NEXT: vmovdqa 80(%rcx), %xmm0 -; AVX512-NEXT: vmovdqa 80(%rdx), %xmm3 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm9 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm16 = xmm9[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[2,2,3,3] -; AVX512-NEXT: vinserti32x4 $1, %xmm9, %ymm16, %ymm9 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[0,0,1,1] +; AVX512-NEXT: vmovdqa 64(%rsi), %xmm0 +; AVX512-NEXT: vmovdqa 64(%rdi), %xmm1 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm2[0],zero,xmm2[1],zero +; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] +; AVX512-NEXT: vinserti32x4 $1, %xmm2, %ymm16, %ymm2 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 -; AVX512-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa 80(%rsi), %xmm3 -; AVX512-NEXT: vmovdqa 80(%rdi), %xmm9 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm14 = xmm9[4],xmm3[4],xmm9[5],xmm3[5],xmm9[6],xmm3[6],xmm9[7],xmm3[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm14[0],zero,xmm14[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm14 = xmm14[2,2,3,3] -; AVX512-NEXT: vinserti32x4 $1, %xmm14, %ymm16, %ymm14 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm9[0],xmm3[0],xmm9[1],xmm3[1],xmm9[2],xmm3[2],xmm9[3],xmm3[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm9 = xmm3[0],zero,xmm3[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm9, %ymm3 -; AVX512-NEXT: vinserti64x4 $1, %ymm14, %zmm3, %zmm16 +; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm16 +; AVX512-NEXT: vmovdqa 64(%rcx), %xmm0 +; AVX512-NEXT: vmovdqa 64(%rdx), %xmm1 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512-NEXT: vpermt2d %zmm2, %zmm5, %zmm0 ; AVX512-NEXT: vmovdqa32 %zmm0, %zmm16 {%k1} -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm3[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm9, %ymm3 -; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm13[4],xmm12[4],xmm13[5],xmm12[5],xmm13[6],xmm12[6],xmm13[7],xmm12[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm9 = xmm3[0],zero,xmm3[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm9, %ymm3 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm9 = xmm13[0],xmm12[0],xmm13[1],xmm12[1],xmm13[2],xmm12[2],xmm13[3],xmm12[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm10 = xmm9[0],zero,xmm9[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm9, %ymm10, %ymm9 -; AVX512-NEXT: vinserti64x4 $1, %ymm3, %zmm9, %zmm3 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1} -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm9 = xmm0[0,0,1,1] +; AVX512-NEXT: vmovdqa 80(%rsi), %xmm0 +; AVX512-NEXT: vmovdqa 80(%rdi), %xmm1 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm17 = xmm2[0],zero,xmm2[1],zero +; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] +; AVX512-NEXT: vinserti32x4 $1, %xmm2, %ymm17, %ymm2 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm9, %ymm0 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm6 = xmm5[0,0,1,1] -; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm5 -; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm5, %zmm0 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm8[4],xmm7[4],xmm8[5],xmm7[5],xmm8[6],xmm7[6],xmm8[7],xmm7[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm5[0],zero,xmm5[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm5 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm8[0],xmm7[0],xmm8[1],xmm7[1],xmm8[2],xmm7[2],xmm8[3],xmm7[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm7 = xmm6[0],zero,xmm6[1],zero -; AVX512-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm6, %ymm7, %ymm6 -; AVX512-NEXT: vinserti64x4 $1, %ymm5, %zmm6, %zmm5 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm5 {%k1} -; AVX512-NEXT: vmovdqa64 %xmm20, %xmm7 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm7[4],xmm1[5],xmm7[5],xmm1[6],xmm7[6],xmm1[7],xmm7[7] -; AVX512-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[0,0,1,1] +; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm17 +; AVX512-NEXT: vmovdqa 80(%rcx), %xmm0 +; AVX512-NEXT: vmovdqa 80(%rdx), %xmm1 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512-NEXT: vpermt2d %zmm2, %zmm5, %zmm0 +; AVX512-NEXT: vmovdqa32 %zmm0, %zmm17 {%k1} +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm12[4],xmm11[4],xmm12[5],xmm11[5],xmm12[6],xmm11[6],xmm12[7],xmm11[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm6, %ymm0 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3] -; AVX512-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[0,0,1,1] +; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm12[0],xmm11[0],xmm12[1],xmm11[1],xmm12[2],xmm11[2],xmm12[3],xmm11[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm6, %ymm1 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 -; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm1[0],zero,xmm1[1],zero +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm14[4],xmm13[4],xmm14[5],xmm13[5],xmm14[6],xmm13[6],xmm14[7],xmm13[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm14[0],xmm13[0],xmm14[1],xmm13[1],xmm14[2],xmm13[2],xmm14[3],xmm13[3] +; AVX512-NEXT: vpermt2d %zmm1, %zmm5, %zmm2 +; AVX512-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm6, %ymm1 -; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3] -; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm2[0],zero,xmm2[1],zero ; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] -; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 +; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm6, %ymm2 ; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} -; AVX512-NEXT: vmovdqa64 %zmm1, (%r8) -; AVX512-NEXT: vmovdqa64 %zmm5, 192(%r8) -; AVX512-NEXT: vmovdqa64 %zmm3, 128(%r8) -; AVX512-NEXT: vmovdqa64 %zmm16, 320(%r8) -; AVX512-NEXT: vmovdqa64 %zmm15, 256(%r8) -; AVX512-NEXT: vmovdqa64 %zmm19, 448(%r8) -; AVX512-NEXT: vmovdqa64 %zmm18, 384(%r8) -; AVX512-NEXT: vmovdqa64 %zmm17, 64(%r8) +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm10[4],xmm9[4],xmm10[5],xmm9[5],xmm10[6],xmm9[6],xmm10[7],xmm9[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm10[0],xmm9[0],xmm10[1],xmm9[1],xmm10[2],xmm9[2],xmm10[3],xmm9[3] +; AVX512-NEXT: vpermt2d %zmm2, %zmm5, %zmm6 +; AVX512-NEXT: vmovdqa32 %zmm6, %zmm1 {%k1} +; AVX512-NEXT: vmovdqa64 %xmm19, %xmm7 +; AVX512-NEXT: vmovdqa64 %xmm20, %xmm9 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm9[4],xmm7[4],xmm9[5],xmm7[5],xmm9[6],xmm7[6],xmm9[7],xmm7[7] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm2[0],zero,xmm2[1],zero +; AVX512-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] +; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm6, %ymm2 +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm9[0],xmm7[0],xmm9[1],xmm7[1],xmm9[2],xmm7[2],xmm9[3],xmm7[3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm7 = xmm6[0],zero,xmm6[1],zero +; AVX512-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] +; AVX512-NEXT: vinserti128 $1, %xmm6, %ymm7, %ymm6 +; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm6, %zmm2 +; AVX512-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] +; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; AVX512-NEXT: vpermt2d %zmm6, %zmm5, %zmm3 +; AVX512-NEXT: vmovdqa32 %zmm3, %zmm2 {%k1} +; AVX512-NEXT: vmovdqa64 %zmm2, (%r8) +; AVX512-NEXT: vmovdqa64 %zmm1, 192(%r8) +; AVX512-NEXT: vmovdqa64 %zmm0, 128(%r8) +; AVX512-NEXT: vmovdqa64 %zmm17, 320(%r8) +; AVX512-NEXT: vmovdqa64 %zmm16, 256(%r8) +; AVX512-NEXT: vmovdqa64 %zmm15, 448(%r8) +; AVX512-NEXT: vmovdqa64 %zmm8, 384(%r8) +; AVX512-NEXT: vmovdqa64 %zmm18, 64(%r8) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq ; @@ -3466,192 +3378,155 @@ define void @store_i16_stride4_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.ve ; ; AVX512DQ-LABEL: store_i16_stride4_vf64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovdqa (%rsi), %xmm1 +; AVX512DQ-NEXT: vmovdqa64 (%rsi), %xmm19 ; AVX512DQ-NEXT: vmovdqa 16(%rsi), %xmm0 -; AVX512DQ-NEXT: vmovdqa 32(%rsi), %xmm10 -; AVX512DQ-NEXT: vmovdqa 48(%rsi), %xmm5 -; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm2 +; AVX512DQ-NEXT: vmovdqa 32(%rsi), %xmm11 +; AVX512DQ-NEXT: vmovdqa 48(%rsi), %xmm6 +; AVX512DQ-NEXT: vmovdqa64 (%rdi), %xmm20 ; AVX512DQ-NEXT: vmovdqa 16(%rdi), %xmm3 -; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm11 +; AVX512DQ-NEXT: vmovdqa 32(%rdi), %xmm12 ; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm7 ; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm4[0],zero,xmm4[1],zero +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm5 = xmm4[0],zero,xmm4[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm6, %ymm4 +; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm17 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm18 ; AVX512DQ-NEXT: vmovdqa (%rcx), %xmm3 -; AVX512DQ-NEXT: vmovdqa 16(%rcx), %xmm6 -; AVX512DQ-NEXT: vmovdqa 32(%rcx), %xmm12 -; AVX512DQ-NEXT: vmovdqa 48(%rcx), %xmm8 +; AVX512DQ-NEXT: vmovdqa 16(%rcx), %xmm5 +; AVX512DQ-NEXT: vmovdqa 32(%rcx), %xmm13 +; AVX512DQ-NEXT: vmovdqa 48(%rcx), %xmm9 ; AVX512DQ-NEXT: vmovdqa (%rdx), %xmm4 -; AVX512DQ-NEXT: vmovdqa 16(%rdx), %xmm13 +; AVX512DQ-NEXT: vmovdqa 16(%rdx), %xmm8 ; AVX512DQ-NEXT: vmovdqa 32(%rdx), %xmm14 -; AVX512DQ-NEXT: vmovdqa 48(%rdx), %xmm9 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm13[4],xmm6[4],xmm13[5],xmm6[5],xmm13[6],xmm6[6],xmm13[7],xmm6[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm16 = xmm15[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] -; AVX512DQ-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm13[0],xmm6[0],xmm13[1],xmm6[1],xmm13[2],xmm6[2],xmm13[3],xmm6[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm13 = xmm6[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm13, %ymm6 +; AVX512DQ-NEXT: vmovdqa 48(%rdx), %xmm10 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm8[4],xmm5[4],xmm8[5],xmm5[5],xmm8[6],xmm5[6],xmm8[7],xmm5[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm5[0],xmm8[1],xmm5[1],xmm8[2],xmm5[2],xmm8[3],xmm5[3] +; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm5 = [0,0,1,1,2,2,3,3,16,16,17,17,18,18,19,19] +; AVX512DQ-NEXT: vpermt2d %zmm15, %zmm5, %zmm8 ; AVX512DQ-NEXT: movw $-21846, %ax # imm = 0xAAAA ; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm15, %zmm6, %zmm17 {%k1} -; AVX512DQ-NEXT: vmovdqa 96(%rsi), %xmm6 -; AVX512DQ-NEXT: vmovdqa 96(%rdi), %xmm13 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm13[4],xmm6[4],xmm13[5],xmm6[5],xmm13[6],xmm6[6],xmm13[7],xmm6[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm15[0],zero,xmm15[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] -; AVX512DQ-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm13[0],xmm6[0],xmm13[1],xmm6[1],xmm13[2],xmm6[2],xmm13[3],xmm6[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm13 = xmm6[0],zero,xmm6[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm13, %ymm6 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm15, %zmm6, %zmm18 -; AVX512DQ-NEXT: vmovdqa 96(%rcx), %xmm13 -; AVX512DQ-NEXT: vmovdqa 96(%rdx), %xmm15 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm15[4],xmm13[4],xmm15[5],xmm13[5],xmm15[6],xmm13[6],xmm15[7],xmm13[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm16 = xmm0[0,0,1,1] +; AVX512DQ-NEXT: vmovdqa32 %zmm8, %zmm18 {%k1} +; AVX512DQ-NEXT: vmovdqa 96(%rsi), %xmm8 +; AVX512DQ-NEXT: vmovdqa 96(%rdi), %xmm15 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm0[0],zero,xmm0[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] ; AVX512DQ-NEXT: vinserti32x4 $1, %xmm0, %ymm16, %ymm0 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm13 = xmm15[0],xmm13[0],xmm15[1],xmm13[1],xmm15[2],xmm13[2],xmm15[3],xmm13[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm15 = xmm13[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm13, %ymm15, %ymm13 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm13, %zmm18 {%k1} +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm15 = xmm8[0],zero,xmm8[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm8 = xmm8[2,2,3,3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm8, %ymm15, %ymm8 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm8 +; AVX512DQ-NEXT: vmovdqa 96(%rcx), %xmm0 +; AVX512DQ-NEXT: vmovdqa 96(%rdx), %xmm15 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3] +; AVX512DQ-NEXT: vpermt2d %zmm1, %zmm5, %zmm0 +; AVX512DQ-NEXT: vmovdqa32 %zmm0, %zmm8 {%k1} ; AVX512DQ-NEXT: vmovdqa 112(%rsi), %xmm0 -; AVX512DQ-NEXT: vmovdqa 112(%rdi), %xmm13 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm13[4],xmm0[4],xmm13[5],xmm0[5],xmm13[6],xmm0[6],xmm13[7],xmm0[7] +; AVX512DQ-NEXT: vmovdqa 112(%rdi), %xmm1 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm15[0],zero,xmm15[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] ; AVX512DQ-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm13[0],xmm0[0],xmm13[1],xmm0[1],xmm13[2],xmm0[2],xmm13[3],xmm0[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm13 = xmm0[0],zero,xmm0[1],zero +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm13, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm15, %zmm0, %zmm19 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm15, %zmm0, %zmm15 ; AVX512DQ-NEXT: vmovdqa 112(%rcx), %xmm0 -; AVX512DQ-NEXT: vmovdqa 112(%rdx), %xmm15 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm16 = xmm6[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512DQ-NEXT: vinserti32x4 $1, %xmm6, %ymm16, %ymm6 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm15 = xmm0[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm15, %ymm0 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm6, %zmm0, %zmm19 {%k1} +; AVX512DQ-NEXT: vmovdqa 112(%rdx), %xmm1 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512DQ-NEXT: vpermt2d %zmm2, %zmm5, %zmm0 +; AVX512DQ-NEXT: vmovdqa32 %zmm0, %zmm15 {%k1} ; AVX512DQ-NEXT: vmovdqa 64(%rsi), %xmm0 -; AVX512DQ-NEXT: vmovdqa 64(%rdi), %xmm6 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm15[0],zero,xmm15[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[2,2,3,3] -; AVX512DQ-NEXT: vinserti32x4 $1, %xmm15, %ymm16, %ymm15 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero +; AVX512DQ-NEXT: vmovdqa 64(%rdi), %xmm1 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm2[0],zero,xmm2[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] +; AVX512DQ-NEXT: vinserti32x4 $1, %xmm2, %ymm16, %ymm2 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm15, %zmm0, %zmm15 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm16 ; AVX512DQ-NEXT: vmovdqa 64(%rcx), %xmm0 -; AVX512DQ-NEXT: vmovdqa 64(%rdx), %xmm6 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm13 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm16 = xmm13[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[2,2,3,3] -; AVX512DQ-NEXT: vinserti32x4 $1, %xmm13, %ymm16, %ymm13 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm13, %zmm0, %zmm15 {%k1} +; AVX512DQ-NEXT: vmovdqa 64(%rdx), %xmm1 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512DQ-NEXT: vpermt2d %zmm2, %zmm5, %zmm0 +; AVX512DQ-NEXT: vmovdqa32 %zmm0, %zmm16 {%k1} ; AVX512DQ-NEXT: vmovdqa 80(%rsi), %xmm0 -; AVX512DQ-NEXT: vmovdqa 80(%rdi), %xmm6 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm13 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm16 = xmm13[0],zero,xmm13[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[2,2,3,3] -; AVX512DQ-NEXT: vinserti32x4 $1, %xmm13, %ymm16, %ymm13 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero +; AVX512DQ-NEXT: vmovdqa 80(%rdi), %xmm1 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm17 = xmm2[0],zero,xmm2[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] +; AVX512DQ-NEXT: vinserti32x4 $1, %xmm2, %ymm17, %ymm2 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm13, %zmm0, %zmm16 +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm17 ; AVX512DQ-NEXT: vmovdqa 80(%rcx), %xmm0 -; AVX512DQ-NEXT: vmovdqa 80(%rdx), %xmm6 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm13 = xmm6[4],xmm0[4],xmm6[5],xmm0[5],xmm6[6],xmm0[6],xmm6[7],xmm0[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm20 = xmm13[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[2,2,3,3] -; AVX512DQ-NEXT: vinserti32x4 $1, %xmm13, %ymm20, %ymm13 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm13, %zmm0, %zmm16 {%k1} -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm10 = xmm6[0],zero,xmm6[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm10, %ymm6 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm6, %zmm10 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm14[4],xmm12[4],xmm14[5],xmm12[5],xmm14[6],xmm12[6],xmm14[7],xmm12[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[0,0,1,1] +; AVX512DQ-NEXT: vmovdqa 80(%rdx), %xmm1 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX512DQ-NEXT: vpermt2d %zmm2, %zmm5, %zmm0 +; AVX512DQ-NEXT: vmovdqa32 %zmm0, %zmm17 {%k1} +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm12[4],xmm11[4],xmm12[5],xmm11[5],xmm12[6],xmm11[6],xmm12[7],xmm11[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm14[0],xmm12[0],xmm14[1],xmm12[1],xmm14[2],xmm12[2],xmm14[3],xmm12[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm11 = xmm6[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm11, %ymm6 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm6, %zmm10 {%k1} -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm7[4],xmm5[4],xmm7[5],xmm5[5],xmm7[6],xmm5[6],xmm7[7],xmm5[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1],xmm7[2],xmm5[2],xmm7[3],xmm5[3] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm5[0],zero,xmm5[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm5, %zmm0 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm9[4],xmm8[4],xmm9[5],xmm8[5],xmm9[6],xmm8[6],xmm9[7],xmm8[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm5[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm9[0],xmm8[0],xmm9[1],xmm8[1],xmm9[2],xmm8[2],xmm9[3],xmm8[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm7 = xmm6[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm7, %ymm6 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm5, %zmm6, %zmm0 {%k1} -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm5[0],zero,xmm5[1],zero -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm12[0],xmm11[0],xmm12[1],xmm11[1],xmm12[2],xmm11[2],xmm12[3],xmm11[3] ; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] ; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm5, %zmm1, %zmm1 -; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm5 = xmm2[0,0,1,1] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm14[4],xmm13[4],xmm14[5],xmm13[5],xmm14[6],xmm13[6],xmm14[7],xmm13[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm14[0],xmm13[0],xmm14[1],xmm13[1],xmm14[2],xmm13[2],xmm14[3],xmm13[3] +; AVX512DQ-NEXT: vpermt2d %zmm1, %zmm5, %zmm2 +; AVX512DQ-NEXT: vmovdqa32 %zmm2, %zmm0 {%k1} +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm2[0],zero,xmm2[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm6, %ymm2 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm10[4],xmm9[4],xmm10[5],xmm9[5],xmm10[6],xmm9[6],xmm10[7],xmm9[7] +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm10[0],xmm9[0],xmm10[1],xmm9[1],xmm10[2],xmm9[2],xmm10[3],xmm9[3] +; AVX512DQ-NEXT: vpermt2d %zmm2, %zmm5, %zmm6 +; AVX512DQ-NEXT: vmovdqa32 %zmm6, %zmm1 {%k1} +; AVX512DQ-NEXT: vmovdqa64 %xmm19, %xmm7 +; AVX512DQ-NEXT: vmovdqa64 %xmm20, %xmm9 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm9[4],xmm7[4],xmm9[5],xmm7[5],xmm9[6],xmm7[6],xmm9[7],xmm7[7] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm6 = xmm2[0],zero,xmm2[1],zero ; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm5, %ymm2 +; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm6, %ymm2 +; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm6 = xmm9[0],xmm7[0],xmm9[1],xmm7[1],xmm9[2],xmm7[2],xmm9[3],xmm7[3] +; AVX512DQ-NEXT: vpmovzxdq {{.*#+}} xmm7 = xmm6[0],zero,xmm6[1],zero +; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[2,2,3,3] +; AVX512DQ-NEXT: vinserti128 $1, %xmm6, %ymm7, %ymm6 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm6, %zmm2 +; AVX512DQ-NEXT: vpunpckhwd {{.*#+}} xmm6 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] ; AVX512DQ-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[0,0,1,1] -; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3] -; AVX512DQ-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 -; AVX512DQ-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm1 {%k1} -; AVX512DQ-NEXT: vmovdqa64 %zmm1, (%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm0, 192(%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm10, 128(%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm16, 320(%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm15, 256(%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm19, 448(%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm18, 384(%r8) -; AVX512DQ-NEXT: vmovdqa64 %zmm17, 64(%r8) +; AVX512DQ-NEXT: vpermt2d %zmm6, %zmm5, %zmm3 +; AVX512DQ-NEXT: vmovdqa32 %zmm3, %zmm2 {%k1} +; AVX512DQ-NEXT: vmovdqa64 %zmm2, (%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm1, 192(%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm0, 128(%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm17, 320(%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm16, 256(%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm15, 448(%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm8, 384(%r8) +; AVX512DQ-NEXT: vmovdqa64 %zmm18, 64(%r8) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll index d8fd21f4877f0..a343df8428fb4 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-8.ll @@ -1151,24 +1151,25 @@ define void @store_i8_stride8_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] ; AVX512BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} zmm1 = zmm0[0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u] +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm3 = zmm0[0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u] ; AVX512BW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] ; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u,u,u] ; AVX512BW-NEXT: movl $287445282, %ecx # imm = 0x11221122 ; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[2,3,0,1,2,3,0,1] -; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u] -; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb {{.*#+}} zmm2 = zmm2[u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63] +; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm3 {%k1} +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[4,5,6,7,4,5,6,7] +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63] +; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[2,3,0,1,2,3,0,1] +; AVX512BW-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u] ; AVX512BW-NEXT: movl $1149781128, %ecx # imm = 0x44884488 ; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} +; AVX512BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: movw $-21846, %cx # imm = 0xAAAA ; AVX512BW-NEXT: kmovd %ecx, %k1 -; AVX512BW-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} -; AVX512BW-NEXT: vmovdqa64 %zmm1, (%rax) +; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1} +; AVX512BW-NEXT: vmovdqa64 %zmm3, (%rax) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -1224,24 +1225,25 @@ define void @store_i8_stride8_vf8(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vecp ; AVX512DQ-BW-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] ; AVX512DQ-BW-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX512DQ-BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm1 ; AVX512DQ-BW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm1 = zmm0[0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm3 = zmm0[0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u] ; AVX512DQ-BW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] ; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u,u,u,u,u] ; AVX512DQ-BW-NEXT: movl $287445282, %ecx # imm = 0x11221122 ; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; AVX512DQ-BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[2,3,0,1,2,3,0,1] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u] -; AVX512DQ-BW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3,0,1,2,3] -; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm2 = zmm2[u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63] +; AVX512DQ-BW-NEXT: vmovdqu16 %zmm0, %zmm3 {%k1} +; AVX512DQ-BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[4,5,6,7,4,5,6,7] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,u,u,u,u,54,62,u,u,u,u,u,u,55,63] +; AVX512DQ-BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm2[2,3,0,1,2,3,0,1] +; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} zmm1 = zmm1[u,u,u,u,u,u,0,8,u,u,u,u,u,u,1,9,u,u,u,u,18,26,u,u,u,u,u,u,19,27,u,u,u,u,u,u,u,u,36,44,u,u,u,u,u,u,37,45,u,u,u,u,54,62,u,u,u,u,u,u,55,63,u,u] ; AVX512DQ-BW-NEXT: movl $1149781128, %ecx # imm = 0x44884488 ; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-NEXT: vmovdqu16 %zmm0, %zmm2 {%k1} +; AVX512DQ-BW-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} ; AVX512DQ-BW-NEXT: movw $-21846, %cx # imm = 0xAAAA ; AVX512DQ-BW-NEXT: kmovd %ecx, %k1 -; AVX512DQ-BW-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} -; AVX512DQ-BW-NEXT: vmovdqa64 %zmm1, (%rax) +; AVX512DQ-BW-NEXT: vmovdqa32 %zmm0, %zmm3 {%k1} +; AVX512DQ-BW-NEXT: vmovdqa64 %zmm3, (%rax) ; AVX512DQ-BW-NEXT: vzeroupper ; AVX512DQ-BW-NEXT: retq ; @@ -8870,234 +8872,172 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512BW-FCP: # %bb.0: ; AVX512BW-FCP-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512BW-FCP-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512BW-FCP-NEXT: vmovdqa (%r10), %xmm0 -; AVX512BW-FCP-NEXT: vmovdqa 16(%r10), %xmm14 -; AVX512BW-FCP-NEXT: vmovdqa64 32(%r10), %xmm18 -; AVX512BW-FCP-NEXT: vmovdqa64 48(%r10), %xmm17 -; AVX512BW-FCP-NEXT: vmovdqa (%rax), %xmm1 -; AVX512BW-FCP-NEXT: vmovdqa 16(%rax), %xmm15 -; AVX512BW-FCP-NEXT: vmovdqa64 32(%rax), %xmm19 -; AVX512BW-FCP-NEXT: vmovdqa64 48(%rax), %xmm20 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm2 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm3 -; AVX512BW-FCP-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,0,1,0,1,0,1,8,9,10,11,4,5,2,3,0,1,4,5,0,1,4,5,8,9,10,11,4,5,6,7,0,1,2,3,8,9,8,9,8,9,8,9,12,13,10,11,0,1,2,3,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX512BW-FCP-NEXT: vpshufb %zmm2, %zmm3, %zmm6 -; AVX512BW-FCP-NEXT: vmovdqa (%r9), %xmm3 -; AVX512BW-FCP-NEXT: vmovdqa64 48(%r9), %xmm21 -; AVX512BW-FCP-NEXT: vmovdqa (%r8), %xmm4 -; AVX512BW-FCP-NEXT: vmovdqa64 48(%r8), %xmm22 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm5, %ymm5, %ymm5 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm5, %zmm5, %zmm7 -; AVX512BW-FCP-NEXT: vmovdqa64 {{.*#+}} zmm5 = [0,1,2,3,0,1,2,3,8,9,10,11,2,3,6,7,4,5,2,3,4,5,2,3,8,9,10,11,6,7,6,7,0,1,2,3,8,9,10,11,8,9,10,11,10,11,14,15,0,1,2,3,12,13,10,11,12,13,10,11,14,15,14,15] -; AVX512BW-FCP-NEXT: vpshufb %zmm5, %zmm7, %zmm16 -; AVX512BW-FCP-NEXT: movl $-2004318072, %eax # imm = 0x88888888 -; AVX512BW-FCP-NEXT: kmovd %eax, %k1 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm6, %zmm16 {%k1} -; AVX512BW-FCP-NEXT: vmovdqa (%rcx), %xmm6 -; AVX512BW-FCP-NEXT: vmovdqa64 48(%rcx), %xmm23 +; AVX512BW-FCP-NEXT: vmovdqa (%rsi), %xmm0 +; AVX512BW-FCP-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512BW-FCP-NEXT: vmovdqa64 32(%rsi), %xmm20 +; AVX512BW-FCP-NEXT: vmovdqa64 48(%rsi), %xmm17 +; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %xmm2 +; AVX512BW-FCP-NEXT: vmovdqa64 32(%rdi), %xmm21 +; AVX512BW-FCP-NEXT: vmovdqa64 48(%rdi), %xmm18 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3 +; AVX512BW-FCP-NEXT: vpmovsxwq {{.*#+}} ymm4 = [2312,2826,3340,3854] +; AVX512BW-FCP-NEXT: vpshufb %ymm4, %ymm3, %ymm3 +; AVX512BW-FCP-NEXT: vpmovsxwq {{.*#+}} xmm5 = [1284,1798] +; AVX512BW-FCP-NEXT: vpshufb %xmm5, %xmm0, %xmm6 +; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm1 +; AVX512BW-FCP-NEXT: vmovdqa (%rcx), %xmm3 +; AVX512BW-FCP-NEXT: vmovdqa64 32(%rcx), %xmm22 +; AVX512BW-FCP-NEXT: vmovdqa64 48(%rcx), %xmm19 ; AVX512BW-FCP-NEXT: vmovdqa (%rdx), %xmm7 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm8 = xmm7[8],xmm6[8],xmm7[9],xmm6[9],xmm7[10],xmm6[10],xmm7[11],xmm6[11],xmm7[12],xmm6[12],xmm7[13],xmm6[13],xmm7[14],xmm6[14],xmm7[15],xmm6[15] -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm8, %ymm8, %ymm8 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm8, %zmm8, %zmm8 -; AVX512BW-FCP-NEXT: vmovdqa64 {{.*#+}} zmm9 = [0,1,0,1,4,5,2,3,4,5,2,3,12,13,14,15,0,1,4,5,4,5,6,7,4,5,6,7,12,13,14,15,8,9,8,9,4,5,6,7,12,13,10,11,12,13,10,11,8,9,12,13,4,5,6,7,12,13,14,15,12,13,14,15] -; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm8, %zmm24 -; AVX512BW-FCP-NEXT: vmovdqa (%rsi), %xmm10 -; AVX512BW-FCP-NEXT: vmovdqa64 48(%rsi), %xmm25 -; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %xmm11 -; AVX512BW-FCP-NEXT: vmovdqa64 48(%rdi), %xmm28 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm8 = xmm11[8],xmm10[8],xmm11[9],xmm10[9],xmm11[10],xmm10[10],xmm11[11],xmm10[11],xmm11[12],xmm10[12],xmm11[13],xmm10[13],xmm11[14],xmm10[14],xmm11[15],xmm10[15] -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm8, %ymm8, %ymm13 -; AVX512BW-FCP-NEXT: vpmovsxwq {{.*#+}} ymm12 = [2312,2826,3340,3854] -; AVX512BW-FCP-NEXT: vpshufb %ymm12, %ymm13, %ymm26 -; AVX512BW-FCP-NEXT: vpmovsxwq {{.*#+}} xmm13 = [1284,1798] -; AVX512BW-FCP-NEXT: vpshufb %xmm13, %xmm8, %xmm27 -; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm27, %ymm8, %ymm8 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm26, %zmm8, %zmm8 -; AVX512BW-FCP-NEXT: movl $572662306, %eax # imm = 0x22222222 -; AVX512BW-FCP-NEXT: kmovd %eax, %k2 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm24, %zmm8 {%k2} -; AVX512BW-FCP-NEXT: movw $-21846, %ax # imm = 0xAAAA -; AVX512BW-FCP-NEXT: kmovd %eax, %k3 -; AVX512BW-FCP-NEXT: vmovdqa32 %zmm16, %zmm8 {%k3} -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm16 = xmm20[0],xmm17[0],xmm20[1],xmm17[1],xmm20[2],xmm17[2],xmm20[3],xmm17[3],xmm20[4],xmm17[4],xmm20[5],xmm17[5],xmm20[6],xmm17[6],xmm20[7],xmm17[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm16, %ymm16, %ymm16 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm16, %zmm16, %zmm16 -; AVX512BW-FCP-NEXT: vpshufb %zmm2, %zmm16, %zmm16 -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm24 = xmm22[0],xmm21[0],xmm22[1],xmm21[1],xmm22[2],xmm21[2],xmm22[3],xmm21[3],xmm22[4],xmm21[4],xmm22[5],xmm21[5],xmm22[6],xmm21[6],xmm22[7],xmm21[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm24, %ymm24, %ymm24 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm24, %zmm24, %zmm24 -; AVX512BW-FCP-NEXT: vpshufb %zmm5, %zmm24, %zmm24 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm16, %zmm24 {%k1} -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm16 = xmm28[0],xmm25[0],xmm28[1],xmm25[1],xmm28[2],xmm25[2],xmm28[3],xmm25[3],xmm28[4],xmm25[4],xmm28[5],xmm25[5],xmm28[6],xmm25[6],xmm28[7],xmm25[7] -; AVX512BW-FCP-NEXT: vpshufb %xmm13, %xmm16, %xmm26 -; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm27 = xmm16[0],zero,zero,zero,xmm16[1],zero,zero,zero -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm26, %ymm27, %ymm26 -; AVX512BW-FCP-NEXT: vmovdqa64 48(%rdx), %xmm30 -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm16, %ymm16, %ymm16 -; AVX512BW-FCP-NEXT: vpshufb %ymm12, %ymm16, %ymm16 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm16, %zmm26, %zmm16 -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm26 = xmm30[0],xmm23[0],xmm30[1],xmm23[1],xmm30[2],xmm23[2],xmm30[3],xmm23[3],xmm30[4],xmm23[4],xmm30[5],xmm23[5],xmm30[6],xmm23[6],xmm30[7],xmm23[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm26, %ymm26, %ymm26 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm26, %zmm26, %zmm26 -; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm26, %zmm26 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm26, %zmm16 {%k2} -; AVX512BW-FCP-NEXT: vmovdqa64 32(%r9), %xmm26 -; AVX512BW-FCP-NEXT: vmovdqa32 %zmm24, %zmm16 {%k3} -; AVX512BW-FCP-NEXT: vmovdqa64 32(%r8), %xmm27 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm17 = xmm20[8],xmm17[8],xmm20[9],xmm17[9],xmm20[10],xmm17[10],xmm20[11],xmm17[11],xmm20[12],xmm17[12],xmm20[13],xmm17[13],xmm20[14],xmm17[14],xmm20[15],xmm17[15] -; AVX512BW-FCP-NEXT: vmovdqa64 32(%rcx), %xmm24 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm20 = xmm22[8],xmm21[8],xmm22[9],xmm21[9],xmm22[10],xmm21[10],xmm22[11],xmm21[11],xmm22[12],xmm21[12],xmm22[13],xmm21[13],xmm22[14],xmm21[14],xmm22[15],xmm21[15] -; AVX512BW-FCP-NEXT: vmovdqa64 32(%rsi), %xmm21 +; AVX512BW-FCP-NEXT: vmovdqa64 32(%rdx), %xmm23 +; AVX512BW-FCP-NEXT: vmovdqa64 48(%rdx), %xmm24 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm7[8],xmm3[8],xmm7[9],xmm3[9],xmm7[10],xmm3[10],xmm7[11],xmm3[11],xmm7[12],xmm3[12],xmm7[13],xmm3[13],xmm7[14],xmm3[14],xmm7[15],xmm3[15] +; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} zmm8 = [0,0,2,1,2,1,6,7,0,2,2,3,2,3,6,7,4,4,2,3,6,5,6,5,4,6,2,3,6,7,6,7] +; AVX512BW-FCP-NEXT: movl $572662306, %r11d # imm = 0x22222222 +; AVX512BW-FCP-NEXT: kmovd %r11d, %k1 +; AVX512BW-FCP-NEXT: vpermw %zmm6, %zmm8, %zmm1 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa (%r10), %xmm6 +; AVX512BW-FCP-NEXT: vmovdqa64 48(%r10), %xmm25 +; AVX512BW-FCP-NEXT: vmovdqa (%rax), %xmm9 +; AVX512BW-FCP-NEXT: vmovdqa64 48(%rax), %xmm26 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm15 = xmm9[8],xmm6[8],xmm9[9],xmm6[9],xmm9[10],xmm6[10],xmm9[11],xmm6[11],xmm9[12],xmm6[12],xmm9[13],xmm6[13],xmm9[14],xmm6[14],xmm9[15],xmm6[15] +; AVX512BW-FCP-NEXT: vmovdqa (%r9), %xmm10 +; AVX512BW-FCP-NEXT: vmovdqa64 48(%r9), %xmm27 +; AVX512BW-FCP-NEXT: vmovdqa (%r8), %xmm11 +; AVX512BW-FCP-NEXT: vmovdqa64 48(%r8), %xmm28 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm12 = xmm11[8],xmm10[8],xmm11[9],xmm10[9],xmm11[10],xmm10[10],xmm11[11],xmm10[11],xmm11[12],xmm10[12],xmm11[13],xmm10[13],xmm11[14],xmm10[14],xmm11[15],xmm10[15] +; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} zmm13 = [0,1,0,1,4,5,1,3,2,1,2,1,4,5,3,3,0,1,4,5,4,5,5,7,0,1,6,5,6,5,7,7] +; AVX512BW-FCP-NEXT: vpermw %zmm12, %zmm13, %zmm12 +; AVX512BW-FCP-NEXT: vpmovsxbw {{.*#+}} zmm14 = [0,0,0,0,4,5,2,1,0,2,0,2,4,5,2,3,0,1,4,4,4,4,6,5,0,1,4,6,4,6,6,7] +; AVX512BW-FCP-NEXT: movl $-2004318072, %r11d # imm = 0x88888888 +; AVX512BW-FCP-NEXT: kmovd %r11d, %k2 +; AVX512BW-FCP-NEXT: vpermw %zmm15, %zmm14, %zmm12 {%k2} +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm15 = xmm18[0],xmm17[0],xmm18[1],xmm17[1],xmm18[2],xmm17[2],xmm18[3],xmm17[3],xmm18[4],xmm17[4],xmm18[5],xmm17[5],xmm18[6],xmm17[6],xmm18[7],xmm17[7] +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm15, %ymm15, %ymm16 +; AVX512BW-FCP-NEXT: vpshufb %ymm4, %ymm16, %ymm16 +; AVX512BW-FCP-NEXT: vpshufb %xmm5, %xmm15, %xmm29 +; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm15 = xmm15[0],zero,zero,zero,xmm15[1],zero,zero,zero +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm29, %ymm15, %ymm15 +; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm16, %zmm15, %zmm15 +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm16 = xmm24[0],xmm19[0],xmm24[1],xmm19[1],xmm24[2],xmm19[2],xmm24[3],xmm19[3],xmm24[4],xmm19[4],xmm24[5],xmm19[5],xmm24[6],xmm19[6],xmm24[7],xmm19[7] +; AVX512BW-FCP-NEXT: vpermw %zmm16, %zmm8, %zmm15 {%k1} +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm29 = xmm26[0],xmm25[0],xmm26[1],xmm25[1],xmm26[2],xmm25[2],xmm26[3],xmm25[3],xmm26[4],xmm25[4],xmm26[5],xmm25[5],xmm26[6],xmm25[6],xmm26[7],xmm25[7] +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm16 = xmm28[0],xmm27[0],xmm28[1],xmm27[1],xmm28[2],xmm27[2],xmm28[3],xmm27[3],xmm28[4],xmm27[4],xmm28[5],xmm27[5],xmm28[6],xmm27[6],xmm28[7],xmm27[7] +; AVX512BW-FCP-NEXT: vpermw %zmm16, %zmm13, %zmm16 +; AVX512BW-FCP-NEXT: vpermw %zmm29, %zmm14, %zmm16 {%k2} +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm17 = xmm18[8],xmm17[8],xmm18[9],xmm17[9],xmm18[10],xmm17[10],xmm18[11],xmm17[11],xmm18[12],xmm17[12],xmm18[13],xmm17[13],xmm18[14],xmm17[14],xmm18[15],xmm17[15] +; AVX512BW-FCP-NEXT: vpshufb %xmm5, %xmm17, %xmm18 +; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm29 = xmm17[0],zero,zero,zero,xmm17[1],zero,zero,zero +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm18, %ymm29, %ymm18 +; AVX512BW-FCP-NEXT: vmovdqa64 32(%r10), %xmm29 ; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm17, %ymm17, %ymm17 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm17, %zmm17, %zmm17 -; AVX512BW-FCP-NEXT: vpshufb %zmm2, %zmm17, %zmm17 -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm20, %ymm20, %ymm20 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm20, %zmm20, %zmm20 -; AVX512BW-FCP-NEXT: vpshufb %zmm5, %zmm20, %zmm20 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm17, %zmm20 {%k1} -; AVX512BW-FCP-NEXT: vmovdqa64 32(%rdi), %xmm29 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm17 = xmm30[8],xmm23[8],xmm30[9],xmm23[9],xmm30[10],xmm23[10],xmm30[11],xmm23[11],xmm30[12],xmm23[12],xmm30[13],xmm23[13],xmm30[14],xmm23[14],xmm30[15],xmm23[15] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm17, %ymm17, %ymm17 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm17, %zmm17, %zmm17 -; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm17, %zmm22 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm17 = xmm28[8],xmm25[8],xmm28[9],xmm25[9],xmm28[10],xmm25[10],xmm28[11],xmm25[11],xmm28[12],xmm25[12],xmm28[13],xmm25[13],xmm28[14],xmm25[14],xmm28[15],xmm25[15] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm17, %ymm17, %ymm23 -; AVX512BW-FCP-NEXT: vpshufb %ymm12, %ymm23, %ymm23 -; AVX512BW-FCP-NEXT: vpshufb %xmm13, %xmm17, %xmm25 -; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm17 = xmm17[0],zero,zero,zero,xmm17[1],zero,zero,zero -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm25, %ymm17, %ymm17 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm23, %zmm17, %zmm17 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm22, %zmm17 {%k2} -; AVX512BW-FCP-NEXT: vmovdqa32 %zmm20, %zmm17 {%k3} -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm20 = xmm19[0],xmm18[0],xmm19[1],xmm18[1],xmm19[2],xmm18[2],xmm19[3],xmm18[3],xmm19[4],xmm18[4],xmm19[5],xmm18[5],xmm19[6],xmm18[6],xmm19[7],xmm18[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm20, %ymm20, %ymm20 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm20, %zmm20, %zmm20 -; AVX512BW-FCP-NEXT: vpshufb %zmm2, %zmm20, %zmm20 -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm22 = xmm27[0],xmm26[0],xmm27[1],xmm26[1],xmm27[2],xmm26[2],xmm27[3],xmm26[3],xmm27[4],xmm26[4],xmm27[5],xmm26[5],xmm27[6],xmm26[6],xmm27[7],xmm26[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm22, %ymm22, %ymm22 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm22, %zmm22, %zmm22 -; AVX512BW-FCP-NEXT: vpshufb %zmm5, %zmm22, %zmm22 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm20, %zmm22 {%k1} -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm20 = xmm29[0],xmm21[0],xmm29[1],xmm21[1],xmm29[2],xmm21[2],xmm29[3],xmm21[3],xmm29[4],xmm21[4],xmm29[5],xmm21[5],xmm29[6],xmm21[6],xmm29[7],xmm21[7] -; AVX512BW-FCP-NEXT: vpshufb %xmm13, %xmm20, %xmm23 +; AVX512BW-FCP-NEXT: vpshufb %ymm4, %ymm17, %ymm17 +; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm17, %zmm18, %zmm17 +; AVX512BW-FCP-NEXT: vmovdqa64 32(%rax), %xmm30 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm24[8],xmm19[8],xmm24[9],xmm19[9],xmm24[10],xmm19[10],xmm24[11],xmm19[11],xmm24[12],xmm19[12],xmm24[13],xmm19[13],xmm24[14],xmm19[14],xmm24[15],xmm19[15] +; AVX512BW-FCP-NEXT: vmovdqa64 32(%r9), %xmm31 +; AVX512BW-FCP-NEXT: vpermw %zmm18, %zmm8, %zmm17 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa 32(%r8), %xmm0 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm19 = xmm26[8],xmm25[8],xmm26[9],xmm25[9],xmm26[10],xmm25[10],xmm26[11],xmm25[11],xmm26[12],xmm25[12],xmm26[13],xmm25[13],xmm26[14],xmm25[14],xmm26[15],xmm25[15] +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm28[8],xmm27[8],xmm28[9],xmm27[9],xmm28[10],xmm27[10],xmm28[11],xmm27[11],xmm28[12],xmm27[12],xmm28[13],xmm27[13],xmm28[14],xmm27[14],xmm28[15],xmm27[15] +; AVX512BW-FCP-NEXT: vpermw %zmm18, %zmm13, %zmm18 +; AVX512BW-FCP-NEXT: vpermw %zmm19, %zmm14, %zmm18 {%k2} +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm19 = xmm21[0],xmm20[0],xmm21[1],xmm20[1],xmm21[2],xmm20[2],xmm21[3],xmm20[3],xmm21[4],xmm20[4],xmm21[5],xmm20[5],xmm21[6],xmm20[6],xmm21[7],xmm20[7] +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm19, %ymm19, %ymm24 +; AVX512BW-FCP-NEXT: vpshufb %ymm4, %ymm24, %ymm24 +; AVX512BW-FCP-NEXT: vpshufb %xmm5, %xmm19, %xmm25 +; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm19 = xmm19[0],zero,zero,zero,xmm19[1],zero,zero,zero +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm25, %ymm19, %ymm19 +; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm24, %zmm19, %zmm19 +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm24 = xmm23[0],xmm22[0],xmm23[1],xmm22[1],xmm23[2],xmm22[2],xmm23[3],xmm22[3],xmm23[4],xmm22[4],xmm23[5],xmm22[5],xmm23[6],xmm22[6],xmm23[7],xmm22[7] +; AVX512BW-FCP-NEXT: vpermw %zmm24, %zmm8, %zmm19 {%k1} +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm25 = xmm30[0],xmm29[0],xmm30[1],xmm29[1],xmm30[2],xmm29[2],xmm30[3],xmm29[3],xmm30[4],xmm29[4],xmm30[5],xmm29[5],xmm30[6],xmm29[6],xmm30[7],xmm29[7] +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm24 = xmm0[0],xmm31[0],xmm0[1],xmm31[1],xmm0[2],xmm31[2],xmm0[3],xmm31[3],xmm0[4],xmm31[4],xmm0[5],xmm31[5],xmm0[6],xmm31[6],xmm0[7],xmm31[7] +; AVX512BW-FCP-NEXT: vpermw %zmm24, %zmm13, %zmm24 +; AVX512BW-FCP-NEXT: vpermw %zmm25, %zmm14, %zmm24 {%k2} +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm20 = xmm21[8],xmm20[8],xmm21[9],xmm20[9],xmm21[10],xmm20[10],xmm21[11],xmm20[11],xmm21[12],xmm20[12],xmm21[13],xmm20[13],xmm21[14],xmm20[14],xmm21[15],xmm20[15] +; AVX512BW-FCP-NEXT: vpshufb %xmm5, %xmm20, %xmm21 ; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm25 = xmm20[0],zero,zero,zero,xmm20[1],zero,zero,zero -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm23, %ymm25, %ymm23 -; AVX512BW-FCP-NEXT: vmovdqa64 32(%rdx), %xmm28 +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm21, %ymm25, %ymm21 ; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm20, %ymm20, %ymm20 -; AVX512BW-FCP-NEXT: vpshufb %ymm12, %ymm20, %ymm20 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm20, %zmm23, %zmm20 -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm23 = xmm28[0],xmm24[0],xmm28[1],xmm24[1],xmm28[2],xmm24[2],xmm28[3],xmm24[3],xmm28[4],xmm24[4],xmm28[5],xmm24[5],xmm28[6],xmm24[6],xmm28[7],xmm24[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm23, %ymm23, %ymm23 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm23, %zmm23, %zmm23 -; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm23, %zmm23 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm23, %zmm20 {%k2} -; AVX512BW-FCP-NEXT: vmovdqa64 16(%r9), %xmm23 -; AVX512BW-FCP-NEXT: vmovdqa32 %zmm22, %zmm20 {%k3} -; AVX512BW-FCP-NEXT: vmovdqa64 16(%r8), %xmm25 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm19[8],xmm18[8],xmm19[9],xmm18[9],xmm19[10],xmm18[10],xmm19[11],xmm18[11],xmm19[12],xmm18[12],xmm19[13],xmm18[13],xmm19[14],xmm18[14],xmm19[15],xmm18[15] -; AVX512BW-FCP-NEXT: vmovdqa64 16(%rcx), %xmm19 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm26 = xmm27[8],xmm26[8],xmm27[9],xmm26[9],xmm27[10],xmm26[10],xmm27[11],xmm26[11],xmm27[12],xmm26[12],xmm27[13],xmm26[13],xmm27[14],xmm26[14],xmm27[15],xmm26[15] -; AVX512BW-FCP-NEXT: vmovdqa64 16(%rsi), %xmm22 -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm18, %ymm18, %ymm18 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm18, %zmm18, %zmm18 -; AVX512BW-FCP-NEXT: vpshufb %zmm2, %zmm18, %zmm18 -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm26, %ymm26, %ymm26 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm26, %zmm26, %zmm26 -; AVX512BW-FCP-NEXT: vpshufb %zmm5, %zmm26, %zmm27 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm18, %zmm27 {%k1} -; AVX512BW-FCP-NEXT: vmovdqa64 16(%rdi), %xmm26 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm28[8],xmm24[8],xmm28[9],xmm24[9],xmm28[10],xmm24[10],xmm28[11],xmm24[11],xmm28[12],xmm24[12],xmm28[13],xmm24[13],xmm28[14],xmm24[14],xmm28[15],xmm24[15] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm18, %ymm18, %ymm18 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm18, %zmm18, %zmm18 -; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm18, %zmm24 -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm29[8],xmm21[8],xmm29[9],xmm21[9],xmm29[10],xmm21[10],xmm29[11],xmm21[11],xmm29[12],xmm21[12],xmm29[13],xmm21[13],xmm29[14],xmm21[14],xmm29[15],xmm21[15] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm18, %ymm18, %ymm21 -; AVX512BW-FCP-NEXT: vpshufb %ymm12, %ymm21, %ymm21 -; AVX512BW-FCP-NEXT: vpshufb %xmm13, %xmm18, %xmm28 -; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm18 = xmm18[0],zero,zero,zero,xmm18[1],zero,zero,zero -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm28, %ymm18, %ymm18 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm21, %zmm18, %zmm18 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm24, %zmm18 {%k2} -; AVX512BW-FCP-NEXT: vmovdqa32 %zmm27, %zmm18 {%k3} -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm21 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm21, %ymm21, %ymm21 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm21, %zmm21, %zmm21 -; AVX512BW-FCP-NEXT: vpshufb %zmm2, %zmm21, %zmm21 -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm24 = xmm25[0],xmm23[0],xmm25[1],xmm23[1],xmm25[2],xmm23[2],xmm25[3],xmm23[3],xmm25[4],xmm23[4],xmm25[5],xmm23[5],xmm25[6],xmm23[6],xmm25[7],xmm23[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm24, %ymm24, %ymm24 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm24, %zmm24, %zmm24 -; AVX512BW-FCP-NEXT: vpshufb %zmm5, %zmm24, %zmm24 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm21, %zmm24 {%k1} -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm21 = xmm26[0],xmm22[0],xmm26[1],xmm22[1],xmm26[2],xmm22[2],xmm26[3],xmm22[3],xmm26[4],xmm22[4],xmm26[5],xmm22[5],xmm26[6],xmm22[6],xmm26[7],xmm22[7] -; AVX512BW-FCP-NEXT: vpshufb %xmm13, %xmm21, %xmm27 -; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm28 = xmm21[0],zero,zero,zero,xmm21[1],zero,zero,zero -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm27, %ymm28, %ymm27 -; AVX512BW-FCP-NEXT: vmovdqa64 16(%rdx), %xmm28 -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm21, %ymm21, %ymm21 -; AVX512BW-FCP-NEXT: vpshufb %ymm12, %ymm21, %ymm21 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm21, %zmm27, %zmm21 -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm27 = xmm28[0],xmm19[0],xmm28[1],xmm19[1],xmm28[2],xmm19[2],xmm28[3],xmm19[3],xmm28[4],xmm19[4],xmm28[5],xmm19[5],xmm28[6],xmm19[6],xmm28[7],xmm19[7] -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm27, %ymm27, %ymm27 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm27, %zmm27, %zmm27 -; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm27, %zmm27 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm27, %zmm21 {%k2} -; AVX512BW-FCP-NEXT: vmovdqa32 %zmm24, %zmm21 {%k3} -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm14 = xmm15[8],xmm14[8],xmm15[9],xmm14[9],xmm15[10],xmm14[10],xmm15[11],xmm14[11],xmm15[12],xmm14[12],xmm15[13],xmm14[13],xmm15[14],xmm14[14],xmm15[15],xmm14[15] -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm15 = xmm25[8],xmm23[8],xmm25[9],xmm23[9],xmm25[10],xmm23[10],xmm25[11],xmm23[11],xmm25[12],xmm23[12],xmm25[13],xmm23[13],xmm25[14],xmm23[14],xmm25[15],xmm23[15] -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm14, %ymm14, %ymm14 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm14, %zmm14, %zmm14 -; AVX512BW-FCP-NEXT: vpshufb %zmm2, %zmm14, %zmm14 -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm15, %ymm15, %ymm15 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm15, %zmm15, %zmm15 -; AVX512BW-FCP-NEXT: vpshufb %zmm5, %zmm15, %zmm15 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm14, %zmm15 {%k1} -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm14 = xmm28[8],xmm19[8],xmm28[9],xmm19[9],xmm28[10],xmm19[10],xmm28[11],xmm19[11],xmm28[12],xmm19[12],xmm28[13],xmm19[13],xmm28[14],xmm19[14],xmm28[15],xmm19[15] -; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm19 = xmm26[8],xmm22[8],xmm26[9],xmm22[9],xmm26[10],xmm22[10],xmm26[11],xmm22[11],xmm26[12],xmm22[12],xmm26[13],xmm22[13],xmm26[14],xmm22[14],xmm26[15],xmm22[15] -; AVX512BW-FCP-NEXT: vpshufb %xmm13, %xmm19, %xmm22 -; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm23 = xmm19[0],zero,zero,zero,xmm19[1],zero,zero,zero -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm22, %ymm23, %ymm22 -; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm19, %ymm19, %ymm19 -; AVX512BW-FCP-NEXT: vpshufb %ymm12, %ymm19, %ymm19 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm19, %zmm22, %zmm19 -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm14, %ymm14, %ymm14 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm14, %zmm14, %zmm14 -; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm14, %zmm14 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm14, %zmm19 {%k2} -; AVX512BW-FCP-NEXT: vmovdqa32 %zmm15, %zmm19 {%k3} -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512BW-FCP-NEXT: vpshufb %ymm4, %ymm20, %ymm20 +; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm20, %zmm21, %zmm20 +; AVX512BW-FCP-NEXT: vmovdqa64 16(%rsi), %xmm25 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm21 = xmm23[8],xmm22[8],xmm23[9],xmm22[9],xmm23[10],xmm22[10],xmm23[11],xmm22[11],xmm23[12],xmm22[12],xmm23[13],xmm22[13],xmm23[14],xmm22[14],xmm23[15],xmm22[15] +; AVX512BW-FCP-NEXT: vmovdqa64 16(%rdi), %xmm23 +; AVX512BW-FCP-NEXT: vpermw %zmm21, %zmm8, %zmm20 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa64 16(%rcx), %xmm26 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm22 = xmm30[8],xmm29[8],xmm30[9],xmm29[9],xmm30[10],xmm29[10],xmm30[11],xmm29[11],xmm30[12],xmm29[12],xmm30[13],xmm29[13],xmm30[14],xmm29[14],xmm30[15],xmm29[15] +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm31[8],xmm0[9],xmm31[9],xmm0[10],xmm31[10],xmm0[11],xmm31[11],xmm0[12],xmm31[12],xmm0[13],xmm31[13],xmm0[14],xmm31[14],xmm0[15],xmm31[15] +; AVX512BW-FCP-NEXT: vpermw %zmm0, %zmm13, %zmm21 +; AVX512BW-FCP-NEXT: vpermw %zmm22, %zmm14, %zmm21 {%k2} +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm23[0],xmm25[0],xmm23[1],xmm25[1],xmm23[2],xmm25[2],xmm23[3],xmm25[3],xmm23[4],xmm25[4],xmm23[5],xmm25[5],xmm23[6],xmm25[6],xmm23[7],xmm25[7] +; AVX512BW-FCP-NEXT: vpshufb %xmm5, %xmm0, %xmm22 +; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm27 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm22, %ymm27, %ymm22 +; AVX512BW-FCP-NEXT: vmovdqa64 16(%rdx), %xmm27 ; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512BW-FCP-NEXT: vpshufb %zmm2, %zmm0, %zmm0 -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 -; AVX512BW-FCP-NEXT: vpshufb %zmm5, %zmm1, %zmm1 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7] +; AVX512BW-FCP-NEXT: vpshufb %ymm4, %ymm0, %ymm0 +; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm22, %zmm22 +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm27[0],xmm26[0],xmm27[1],xmm26[1],xmm27[2],xmm26[2],xmm27[3],xmm26[3],xmm27[4],xmm26[4],xmm27[5],xmm26[5],xmm27[6],xmm26[6],xmm27[7],xmm26[7] +; AVX512BW-FCP-NEXT: vpermw %zmm0, %zmm8, %zmm22 {%k1} +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm23[8],xmm25[8],xmm23[9],xmm25[9],xmm23[10],xmm25[10],xmm23[11],xmm25[11],xmm23[12],xmm25[12],xmm23[13],xmm25[13],xmm23[14],xmm25[14],xmm23[15],xmm25[15] +; AVX512BW-FCP-NEXT: vpshufb %xmm5, %xmm0, %xmm23 +; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm25 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm23, %ymm25, %ymm23 ; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512BW-FCP-NEXT: vpshufb %zmm9, %zmm0, %zmm0 -; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm3 -; AVX512BW-FCP-NEXT: vpshufb %ymm12, %ymm3, %ymm3 -; AVX512BW-FCP-NEXT: vpshufb %xmm13, %xmm2, %xmm4 +; AVX512BW-FCP-NEXT: vpshufb %ymm4, %ymm0, %ymm0 +; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm23, %zmm0 +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm23 = xmm27[8],xmm26[8],xmm27[9],xmm26[9],xmm27[10],xmm26[10],xmm27[11],xmm26[11],xmm27[12],xmm26[12],xmm27[13],xmm26[13],xmm27[14],xmm26[14],xmm27[15],xmm26[15] +; AVX512BW-FCP-NEXT: vpermw %zmm23, %zmm8, %zmm0 {%k1} +; AVX512BW-FCP-NEXT: vpunpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload +; AVX512BW-FCP-NEXT: # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3],xmm2[4],mem[4],xmm2[5],mem[5],xmm2[6],mem[6],xmm2[7],mem[7] +; AVX512BW-FCP-NEXT: vinserti32x4 $1, %xmm2, %ymm2, %ymm23 +; AVX512BW-FCP-NEXT: vpshufb %ymm4, %ymm23, %ymm4 +; AVX512BW-FCP-NEXT: vmovdqa64 16(%r10), %xmm23 +; AVX512BW-FCP-NEXT: vpshufb %xmm5, %xmm2, %xmm5 ; AVX512BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2 -; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512BW-FCP-NEXT: vmovdqu16 %zmm0, %zmm2 {%k2} -; AVX512BW-FCP-NEXT: vmovdqa32 %zmm1, %zmm2 {%k3} +; AVX512BW-FCP-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; AVX512BW-FCP-NEXT: vmovdqa 16(%rax), %xmm5 +; AVX512BW-FCP-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512BW-FCP-NEXT: vmovdqa 16(%r9), %xmm4 +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] +; AVX512BW-FCP-NEXT: vmovdqa 16(%r8), %xmm7 +; AVX512BW-FCP-NEXT: vpermw %zmm3, %zmm8, %zmm2 {%k1} +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm5[0],xmm23[0],xmm5[1],xmm23[1],xmm5[2],xmm23[2],xmm5[3],xmm23[3],xmm5[4],xmm23[4],xmm5[5],xmm23[5],xmm5[6],xmm23[6],xmm5[7],xmm23[7] +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm8 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3],xmm7[4],xmm4[4],xmm7[5],xmm4[5],xmm7[6],xmm4[6],xmm7[7],xmm4[7] +; AVX512BW-FCP-NEXT: vpermw %zmm8, %zmm13, %zmm8 +; AVX512BW-FCP-NEXT: vpermw %zmm3, %zmm14, %zmm8 {%k2} +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm5[8],xmm23[8],xmm5[9],xmm23[9],xmm5[10],xmm23[10],xmm5[11],xmm23[11],xmm5[12],xmm23[12],xmm5[13],xmm23[13],xmm5[14],xmm23[14],xmm5[15],xmm23[15] +; AVX512BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm7[8],xmm4[8],xmm7[9],xmm4[9],xmm7[10],xmm4[10],xmm7[11],xmm4[11],xmm7[12],xmm4[12],xmm7[13],xmm4[13],xmm7[14],xmm4[14],xmm7[15],xmm4[15] +; AVX512BW-FCP-NEXT: vpermw %zmm4, %zmm13, %zmm4 +; AVX512BW-FCP-NEXT: vpermw %zmm3, %zmm14, %zmm4 {%k2} +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm9[0],xmm6[0],xmm9[1],xmm6[1],xmm9[2],xmm6[2],xmm9[3],xmm6[3],xmm9[4],xmm6[4],xmm9[5],xmm6[5],xmm9[6],xmm6[6],xmm9[7],xmm6[7] +; AVX512BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] +; AVX512BW-FCP-NEXT: vpermw %zmm5, %zmm13, %zmm5 +; AVX512BW-FCP-NEXT: vpermw %zmm3, %zmm14, %zmm5 {%k2} +; AVX512BW-FCP-NEXT: movw $-21846, %ax # imm = 0xAAAA +; AVX512BW-FCP-NEXT: kmovd %eax, %k1 +; AVX512BW-FCP-NEXT: vmovdqa32 %zmm12, %zmm1 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa32 %zmm16, %zmm15 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa32 %zmm18, %zmm17 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa32 %zmm24, %zmm19 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa32 %zmm21, %zmm20 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa32 %zmm8, %zmm22 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa32 %zmm4, %zmm0 {%k1} +; AVX512BW-FCP-NEXT: vmovdqa32 %zmm5, %zmm2 {%k1} ; AVX512BW-FCP-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512BW-FCP-NEXT: vmovdqa64 %zmm2, (%rax) -; AVX512BW-FCP-NEXT: vmovdqa64 %zmm19, 192(%rax) -; AVX512BW-FCP-NEXT: vmovdqa64 %zmm21, 128(%rax) -; AVX512BW-FCP-NEXT: vmovdqa64 %zmm18, 320(%rax) -; AVX512BW-FCP-NEXT: vmovdqa64 %zmm20, 256(%rax) +; AVX512BW-FCP-NEXT: vmovdqa64 %zmm0, 192(%rax) +; AVX512BW-FCP-NEXT: vmovdqa64 %zmm22, 128(%rax) +; AVX512BW-FCP-NEXT: vmovdqa64 %zmm20, 320(%rax) +; AVX512BW-FCP-NEXT: vmovdqa64 %zmm19, 256(%rax) ; AVX512BW-FCP-NEXT: vmovdqa64 %zmm17, 448(%rax) -; AVX512BW-FCP-NEXT: vmovdqa64 %zmm16, 384(%rax) -; AVX512BW-FCP-NEXT: vmovdqa64 %zmm8, 64(%rax) +; AVX512BW-FCP-NEXT: vmovdqa64 %zmm15, 384(%rax) +; AVX512BW-FCP-NEXT: vmovdqa64 %zmm1, 64(%rax) ; AVX512BW-FCP-NEXT: vzeroupper ; AVX512BW-FCP-NEXT: retq ; @@ -9336,234 +9276,172 @@ define void @store_i8_stride8_vf64(ptr %in.vecptr0, ptr %in.vecptr1, ptr %in.vec ; AVX512DQ-BW-FCP: # %bb.0: ; AVX512DQ-BW-FCP-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512DQ-BW-FCP-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%r10), %xmm0 -; AVX512DQ-BW-FCP-NEXT: vmovdqa 16(%r10), %xmm14 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%r10), %xmm18 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%r10), %xmm17 -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rax), %xmm1 -; AVX512DQ-BW-FCP-NEXT: vmovdqa 16(%rax), %xmm15 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rax), %xmm19 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rax), %xmm20 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm2 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm2, %zmm2, %zmm3 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,0,1,0,1,0,1,8,9,10,11,4,5,2,3,0,1,4,5,0,1,4,5,8,9,10,11,4,5,6,7,0,1,2,3,8,9,8,9,8,9,8,9,12,13,10,11,0,1,2,3,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm2, %zmm3, %zmm6 -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%r9), %xmm3 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%r9), %xmm21 -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%r8), %xmm4 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%r8), %xmm22 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15] -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm5, %ymm5, %ymm5 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm5, %zmm5, %zmm7 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 {{.*#+}} zmm5 = [0,1,2,3,0,1,2,3,8,9,10,11,2,3,6,7,4,5,2,3,4,5,2,3,8,9,10,11,6,7,6,7,0,1,2,3,8,9,10,11,8,9,10,11,10,11,14,15,0,1,2,3,12,13,10,11,12,13,10,11,14,15,14,15] -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm5, %zmm7, %zmm16 -; AVX512DQ-BW-FCP-NEXT: movl $-2004318072, %eax # imm = 0x88888888 -; AVX512DQ-BW-FCP-NEXT: kmovd %eax, %k1 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm6, %zmm16 {%k1} -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rcx), %xmm6 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rcx), %xmm23 +; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rsi), %xmm0 +; AVX512DQ-BW-FCP-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rsi), %xmm20 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rsi), %xmm17 +; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %xmm2 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rdi), %xmm21 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rdi), %xmm18 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm3 +; AVX512DQ-BW-FCP-NEXT: vpmovsxwq {{.*#+}} ymm4 = [2312,2826,3340,3854] +; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm4, %ymm3, %ymm3 +; AVX512DQ-BW-FCP-NEXT: vpmovsxwq {{.*#+}} xmm5 = [1284,1798] +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm5, %xmm0, %xmm6 +; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0 +; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm1 +; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rcx), %xmm3 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rcx), %xmm22 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rcx), %xmm19 ; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdx), %xmm7 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm8 = xmm7[8],xmm6[8],xmm7[9],xmm6[9],xmm7[10],xmm6[10],xmm7[11],xmm6[11],xmm7[12],xmm6[12],xmm7[13],xmm6[13],xmm7[14],xmm6[14],xmm7[15],xmm6[15] -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm8, %ymm8, %ymm8 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm8, %zmm8, %zmm8 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 {{.*#+}} zmm9 = [0,1,0,1,4,5,2,3,4,5,2,3,12,13,14,15,0,1,4,5,4,5,6,7,4,5,6,7,12,13,14,15,8,9,8,9,4,5,6,7,12,13,10,11,12,13,10,11,8,9,12,13,4,5,6,7,12,13,14,15,12,13,14,15] -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm8, %zmm24 -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rsi), %xmm10 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rsi), %xmm25 -; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %xmm11 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rdi), %xmm28 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm8 = xmm11[8],xmm10[8],xmm11[9],xmm10[9],xmm11[10],xmm10[10],xmm11[11],xmm10[11],xmm11[12],xmm10[12],xmm11[13],xmm10[13],xmm11[14],xmm10[14],xmm11[15],xmm10[15] -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm8, %ymm8, %ymm13 -; AVX512DQ-BW-FCP-NEXT: vpmovsxwq {{.*#+}} ymm12 = [2312,2826,3340,3854] -; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm12, %ymm13, %ymm26 -; AVX512DQ-BW-FCP-NEXT: vpmovsxwq {{.*#+}} xmm13 = [1284,1798] -; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm13, %xmm8, %xmm27 -; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm27, %ymm8, %ymm8 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm26, %zmm8, %zmm8 -; AVX512DQ-BW-FCP-NEXT: movl $572662306, %eax # imm = 0x22222222 -; AVX512DQ-BW-FCP-NEXT: kmovd %eax, %k2 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm24, %zmm8 {%k2} -; AVX512DQ-BW-FCP-NEXT: movw $-21846, %ax # imm = 0xAAAA -; AVX512DQ-BW-FCP-NEXT: kmovd %eax, %k3 -; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm16, %zmm8 {%k3} -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm16 = xmm20[0],xmm17[0],xmm20[1],xmm17[1],xmm20[2],xmm17[2],xmm20[3],xmm17[3],xmm20[4],xmm17[4],xmm20[5],xmm17[5],xmm20[6],xmm17[6],xmm20[7],xmm17[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm16, %ymm16, %ymm16 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm16, %zmm16, %zmm16 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm2, %zmm16, %zmm16 -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm24 = xmm22[0],xmm21[0],xmm22[1],xmm21[1],xmm22[2],xmm21[2],xmm22[3],xmm21[3],xmm22[4],xmm21[4],xmm22[5],xmm21[5],xmm22[6],xmm21[6],xmm22[7],xmm21[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm24, %ymm24, %ymm24 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm24, %zmm24, %zmm24 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm5, %zmm24, %zmm24 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm16, %zmm24 {%k1} -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm16 = xmm28[0],xmm25[0],xmm28[1],xmm25[1],xmm28[2],xmm25[2],xmm28[3],xmm25[3],xmm28[4],xmm25[4],xmm28[5],xmm25[5],xmm28[6],xmm25[6],xmm28[7],xmm25[7] -; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm13, %xmm16, %xmm26 -; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm27 = xmm16[0],zero,zero,zero,xmm16[1],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm26, %ymm27, %ymm26 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rdx), %xmm30 -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm16, %ymm16, %ymm16 -; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm12, %ymm16, %ymm16 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm16, %zmm26, %zmm16 -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm26 = xmm30[0],xmm23[0],xmm30[1],xmm23[1],xmm30[2],xmm23[2],xmm30[3],xmm23[3],xmm30[4],xmm23[4],xmm30[5],xmm23[5],xmm30[6],xmm23[6],xmm30[7],xmm23[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm26, %ymm26, %ymm26 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm26, %zmm26, %zmm26 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm26, %zmm26 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm26, %zmm16 {%k2} -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%r9), %xmm26 -; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm24, %zmm16 {%k3} -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%r8), %xmm27 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm17 = xmm20[8],xmm17[8],xmm20[9],xmm17[9],xmm20[10],xmm17[10],xmm20[11],xmm17[11],xmm20[12],xmm17[12],xmm20[13],xmm17[13],xmm20[14],xmm17[14],xmm20[15],xmm17[15] -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rcx), %xmm24 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm20 = xmm22[8],xmm21[8],xmm22[9],xmm21[9],xmm22[10],xmm21[10],xmm22[11],xmm21[11],xmm22[12],xmm21[12],xmm22[13],xmm21[13],xmm22[14],xmm21[14],xmm22[15],xmm21[15] -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rsi), %xmm21 -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm17, %ymm17, %ymm17 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm17, %zmm17, %zmm17 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm2, %zmm17, %zmm17 -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm20, %ymm20, %ymm20 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm20, %zmm20, %zmm20 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm5, %zmm20, %zmm20 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm17, %zmm20 {%k1} -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rdi), %xmm29 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm17 = xmm30[8],xmm23[8],xmm30[9],xmm23[9],xmm30[10],xmm23[10],xmm30[11],xmm23[11],xmm30[12],xmm23[12],xmm30[13],xmm23[13],xmm30[14],xmm23[14],xmm30[15],xmm23[15] +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rdx), %xmm23 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rdx), %xmm24 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm7[8],xmm3[8],xmm7[9],xmm3[9],xmm7[10],xmm3[10],xmm7[11],xmm3[11],xmm7[12],xmm3[12],xmm7[13],xmm3[13],xmm7[14],xmm3[14],xmm7[15],xmm3[15] +; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} zmm8 = [0,0,2,1,2,1,6,7,0,2,2,3,2,3,6,7,4,4,2,3,6,5,6,5,4,6,2,3,6,7,6,7] +; AVX512DQ-BW-FCP-NEXT: movl $572662306, %r11d # imm = 0x22222222 +; AVX512DQ-BW-FCP-NEXT: kmovd %r11d, %k1 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm6, %zmm8, %zmm1 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa (%r10), %xmm6 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%r10), %xmm25 +; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rax), %xmm9 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%rax), %xmm26 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm15 = xmm9[8],xmm6[8],xmm9[9],xmm6[9],xmm9[10],xmm6[10],xmm9[11],xmm6[11],xmm9[12],xmm6[12],xmm9[13],xmm6[13],xmm9[14],xmm6[14],xmm9[15],xmm6[15] +; AVX512DQ-BW-FCP-NEXT: vmovdqa (%r9), %xmm10 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%r9), %xmm27 +; AVX512DQ-BW-FCP-NEXT: vmovdqa (%r8), %xmm11 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 48(%r8), %xmm28 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm12 = xmm11[8],xmm10[8],xmm11[9],xmm10[9],xmm11[10],xmm10[10],xmm11[11],xmm10[11],xmm11[12],xmm10[12],xmm11[13],xmm10[13],xmm11[14],xmm10[14],xmm11[15],xmm10[15] +; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} zmm13 = [0,1,0,1,4,5,1,3,2,1,2,1,4,5,3,3,0,1,4,5,4,5,5,7,0,1,6,5,6,5,7,7] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm12, %zmm13, %zmm12 +; AVX512DQ-BW-FCP-NEXT: vpmovsxbw {{.*#+}} zmm14 = [0,0,0,0,4,5,2,1,0,2,0,2,4,5,2,3,0,1,4,4,4,4,6,5,0,1,4,6,4,6,6,7] +; AVX512DQ-BW-FCP-NEXT: movl $-2004318072, %r11d # imm = 0x88888888 +; AVX512DQ-BW-FCP-NEXT: kmovd %r11d, %k2 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm15, %zmm14, %zmm12 {%k2} +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm15 = xmm18[0],xmm17[0],xmm18[1],xmm17[1],xmm18[2],xmm17[2],xmm18[3],xmm17[3],xmm18[4],xmm17[4],xmm18[5],xmm17[5],xmm18[6],xmm17[6],xmm18[7],xmm17[7] +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm15, %ymm15, %ymm16 +; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm4, %ymm16, %ymm16 +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm5, %xmm15, %xmm29 +; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm15 = xmm15[0],zero,zero,zero,xmm15[1],zero,zero,zero +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm29, %ymm15, %ymm15 +; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm16, %zmm15, %zmm15 +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm16 = xmm24[0],xmm19[0],xmm24[1],xmm19[1],xmm24[2],xmm19[2],xmm24[3],xmm19[3],xmm24[4],xmm19[4],xmm24[5],xmm19[5],xmm24[6],xmm19[6],xmm24[7],xmm19[7] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm16, %zmm8, %zmm15 {%k1} +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm29 = xmm26[0],xmm25[0],xmm26[1],xmm25[1],xmm26[2],xmm25[2],xmm26[3],xmm25[3],xmm26[4],xmm25[4],xmm26[5],xmm25[5],xmm26[6],xmm25[6],xmm26[7],xmm25[7] +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm16 = xmm28[0],xmm27[0],xmm28[1],xmm27[1],xmm28[2],xmm27[2],xmm28[3],xmm27[3],xmm28[4],xmm27[4],xmm28[5],xmm27[5],xmm28[6],xmm27[6],xmm28[7],xmm27[7] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm16, %zmm13, %zmm16 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm29, %zmm14, %zmm16 {%k2} +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm17 = xmm18[8],xmm17[8],xmm18[9],xmm17[9],xmm18[10],xmm17[10],xmm18[11],xmm17[11],xmm18[12],xmm17[12],xmm18[13],xmm17[13],xmm18[14],xmm17[14],xmm18[15],xmm17[15] +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm5, %xmm17, %xmm18 +; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm29 = xmm17[0],zero,zero,zero,xmm17[1],zero,zero,zero +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm18, %ymm29, %ymm18 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%r10), %xmm29 ; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm17, %ymm17, %ymm17 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm17, %zmm17, %zmm17 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm17, %zmm22 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm17 = xmm28[8],xmm25[8],xmm28[9],xmm25[9],xmm28[10],xmm25[10],xmm28[11],xmm25[11],xmm28[12],xmm25[12],xmm28[13],xmm25[13],xmm28[14],xmm25[14],xmm28[15],xmm25[15] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm17, %ymm17, %ymm23 -; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm12, %ymm23, %ymm23 -; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm13, %xmm17, %xmm25 -; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm17 = xmm17[0],zero,zero,zero,xmm17[1],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm25, %ymm17, %ymm17 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm23, %zmm17, %zmm17 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm22, %zmm17 {%k2} -; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm20, %zmm17 {%k3} -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm20 = xmm19[0],xmm18[0],xmm19[1],xmm18[1],xmm19[2],xmm18[2],xmm19[3],xmm18[3],xmm19[4],xmm18[4],xmm19[5],xmm18[5],xmm19[6],xmm18[6],xmm19[7],xmm18[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm20, %ymm20, %ymm20 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm20, %zmm20, %zmm20 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm2, %zmm20, %zmm20 -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm22 = xmm27[0],xmm26[0],xmm27[1],xmm26[1],xmm27[2],xmm26[2],xmm27[3],xmm26[3],xmm27[4],xmm26[4],xmm27[5],xmm26[5],xmm27[6],xmm26[6],xmm27[7],xmm26[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm22, %ymm22, %ymm22 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm22, %zmm22, %zmm22 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm5, %zmm22, %zmm22 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm20, %zmm22 {%k1} -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm20 = xmm29[0],xmm21[0],xmm29[1],xmm21[1],xmm29[2],xmm21[2],xmm29[3],xmm21[3],xmm29[4],xmm21[4],xmm29[5],xmm21[5],xmm29[6],xmm21[6],xmm29[7],xmm21[7] -; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm13, %xmm20, %xmm23 +; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm4, %ymm17, %ymm17 +; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm17, %zmm18, %zmm17 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rax), %xmm30 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm24[8],xmm19[8],xmm24[9],xmm19[9],xmm24[10],xmm19[10],xmm24[11],xmm19[11],xmm24[12],xmm19[12],xmm24[13],xmm19[13],xmm24[14],xmm19[14],xmm24[15],xmm19[15] +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%r9), %xmm31 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm18, %zmm8, %zmm17 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa 32(%r8), %xmm0 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm19 = xmm26[8],xmm25[8],xmm26[9],xmm25[9],xmm26[10],xmm25[10],xmm26[11],xmm25[11],xmm26[12],xmm25[12],xmm26[13],xmm25[13],xmm26[14],xmm25[14],xmm26[15],xmm25[15] +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm28[8],xmm27[8],xmm28[9],xmm27[9],xmm28[10],xmm27[10],xmm28[11],xmm27[11],xmm28[12],xmm27[12],xmm28[13],xmm27[13],xmm28[14],xmm27[14],xmm28[15],xmm27[15] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm18, %zmm13, %zmm18 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm19, %zmm14, %zmm18 {%k2} +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm19 = xmm21[0],xmm20[0],xmm21[1],xmm20[1],xmm21[2],xmm20[2],xmm21[3],xmm20[3],xmm21[4],xmm20[4],xmm21[5],xmm20[5],xmm21[6],xmm20[6],xmm21[7],xmm20[7] +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm19, %ymm19, %ymm24 +; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm4, %ymm24, %ymm24 +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm5, %xmm19, %xmm25 +; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm19 = xmm19[0],zero,zero,zero,xmm19[1],zero,zero,zero +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm25, %ymm19, %ymm19 +; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm24, %zmm19, %zmm19 +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm24 = xmm23[0],xmm22[0],xmm23[1],xmm22[1],xmm23[2],xmm22[2],xmm23[3],xmm22[3],xmm23[4],xmm22[4],xmm23[5],xmm22[5],xmm23[6],xmm22[6],xmm23[7],xmm22[7] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm24, %zmm8, %zmm19 {%k1} +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm25 = xmm30[0],xmm29[0],xmm30[1],xmm29[1],xmm30[2],xmm29[2],xmm30[3],xmm29[3],xmm30[4],xmm29[4],xmm30[5],xmm29[5],xmm30[6],xmm29[6],xmm30[7],xmm29[7] +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm24 = xmm0[0],xmm31[0],xmm0[1],xmm31[1],xmm0[2],xmm31[2],xmm0[3],xmm31[3],xmm0[4],xmm31[4],xmm0[5],xmm31[5],xmm0[6],xmm31[6],xmm0[7],xmm31[7] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm24, %zmm13, %zmm24 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm25, %zmm14, %zmm24 {%k2} +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm20 = xmm21[8],xmm20[8],xmm21[9],xmm20[9],xmm21[10],xmm20[10],xmm21[11],xmm20[11],xmm21[12],xmm20[12],xmm21[13],xmm20[13],xmm21[14],xmm20[14],xmm21[15],xmm20[15] +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm5, %xmm20, %xmm21 ; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm25 = xmm20[0],zero,zero,zero,xmm20[1],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm23, %ymm25, %ymm23 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 32(%rdx), %xmm28 +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm21, %ymm25, %ymm21 ; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm20, %ymm20, %ymm20 -; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm12, %ymm20, %ymm20 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm20, %zmm23, %zmm20 -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm23 = xmm28[0],xmm24[0],xmm28[1],xmm24[1],xmm28[2],xmm24[2],xmm28[3],xmm24[3],xmm28[4],xmm24[4],xmm28[5],xmm24[5],xmm28[6],xmm24[6],xmm28[7],xmm24[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm23, %ymm23, %ymm23 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm23, %zmm23, %zmm23 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm23, %zmm23 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm23, %zmm20 {%k2} -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%r9), %xmm23 -; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm22, %zmm20 {%k3} -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%r8), %xmm25 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm19[8],xmm18[8],xmm19[9],xmm18[9],xmm19[10],xmm18[10],xmm19[11],xmm18[11],xmm19[12],xmm18[12],xmm19[13],xmm18[13],xmm19[14],xmm18[14],xmm19[15],xmm18[15] -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%rcx), %xmm19 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm26 = xmm27[8],xmm26[8],xmm27[9],xmm26[9],xmm27[10],xmm26[10],xmm27[11],xmm26[11],xmm27[12],xmm26[12],xmm27[13],xmm26[13],xmm27[14],xmm26[14],xmm27[15],xmm26[15] -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%rsi), %xmm22 -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm18, %ymm18, %ymm18 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm18, %zmm18, %zmm18 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm2, %zmm18, %zmm18 -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm26, %ymm26, %ymm26 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm26, %zmm26, %zmm26 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm5, %zmm26, %zmm27 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm18, %zmm27 {%k1} -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%rdi), %xmm26 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm28[8],xmm24[8],xmm28[9],xmm24[9],xmm28[10],xmm24[10],xmm28[11],xmm24[11],xmm28[12],xmm24[12],xmm28[13],xmm24[13],xmm28[14],xmm24[14],xmm28[15],xmm24[15] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm18, %ymm18, %ymm18 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm18, %zmm18, %zmm18 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm18, %zmm24 -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm18 = xmm29[8],xmm21[8],xmm29[9],xmm21[9],xmm29[10],xmm21[10],xmm29[11],xmm21[11],xmm29[12],xmm21[12],xmm29[13],xmm21[13],xmm29[14],xmm21[14],xmm29[15],xmm21[15] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm18, %ymm18, %ymm21 -; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm12, %ymm21, %ymm21 -; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm13, %xmm18, %xmm28 -; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm18 = xmm18[0],zero,zero,zero,xmm18[1],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm28, %ymm18, %ymm18 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm21, %zmm18, %zmm18 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm24, %zmm18 {%k2} -; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm27, %zmm18 {%k3} -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm21 = xmm15[0],xmm14[0],xmm15[1],xmm14[1],xmm15[2],xmm14[2],xmm15[3],xmm14[3],xmm15[4],xmm14[4],xmm15[5],xmm14[5],xmm15[6],xmm14[6],xmm15[7],xmm14[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm21, %ymm21, %ymm21 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm21, %zmm21, %zmm21 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm2, %zmm21, %zmm21 -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm24 = xmm25[0],xmm23[0],xmm25[1],xmm23[1],xmm25[2],xmm23[2],xmm25[3],xmm23[3],xmm25[4],xmm23[4],xmm25[5],xmm23[5],xmm25[6],xmm23[6],xmm25[7],xmm23[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm24, %ymm24, %ymm24 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm24, %zmm24, %zmm24 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm5, %zmm24, %zmm24 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm21, %zmm24 {%k1} -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm21 = xmm26[0],xmm22[0],xmm26[1],xmm22[1],xmm26[2],xmm22[2],xmm26[3],xmm22[3],xmm26[4],xmm22[4],xmm26[5],xmm22[5],xmm26[6],xmm22[6],xmm26[7],xmm22[7] -; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm13, %xmm21, %xmm27 -; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm28 = xmm21[0],zero,zero,zero,xmm21[1],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm27, %ymm28, %ymm27 -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%rdx), %xmm28 -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm21, %ymm21, %ymm21 -; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm12, %ymm21, %ymm21 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm21, %zmm27, %zmm21 -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm27 = xmm28[0],xmm19[0],xmm28[1],xmm19[1],xmm28[2],xmm19[2],xmm28[3],xmm19[3],xmm28[4],xmm19[4],xmm28[5],xmm19[5],xmm28[6],xmm19[6],xmm28[7],xmm19[7] -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm27, %ymm27, %ymm27 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm27, %zmm27, %zmm27 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm27, %zmm27 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm27, %zmm21 {%k2} -; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm24, %zmm21 {%k3} -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm14 = xmm15[8],xmm14[8],xmm15[9],xmm14[9],xmm15[10],xmm14[10],xmm15[11],xmm14[11],xmm15[12],xmm14[12],xmm15[13],xmm14[13],xmm15[14],xmm14[14],xmm15[15],xmm14[15] -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm15 = xmm25[8],xmm23[8],xmm25[9],xmm23[9],xmm25[10],xmm23[10],xmm25[11],xmm23[11],xmm25[12],xmm23[12],xmm25[13],xmm23[13],xmm25[14],xmm23[14],xmm25[15],xmm23[15] -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm14, %ymm14, %ymm14 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm14, %zmm14, %zmm14 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm2, %zmm14, %zmm14 -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm15, %ymm15, %ymm15 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm15, %zmm15, %zmm15 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm5, %zmm15, %zmm15 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm14, %zmm15 {%k1} -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm14 = xmm28[8],xmm19[8],xmm28[9],xmm19[9],xmm28[10],xmm19[10],xmm28[11],xmm19[11],xmm28[12],xmm19[12],xmm28[13],xmm19[13],xmm28[14],xmm19[14],xmm28[15],xmm19[15] -; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm19 = xmm26[8],xmm22[8],xmm26[9],xmm22[9],xmm26[10],xmm22[10],xmm26[11],xmm22[11],xmm26[12],xmm22[12],xmm26[13],xmm22[13],xmm26[14],xmm22[14],xmm26[15],xmm22[15] -; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm13, %xmm19, %xmm22 -; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm23 = xmm19[0],zero,zero,zero,xmm19[1],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm22, %ymm23, %ymm22 -; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm19, %ymm19, %ymm19 -; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm12, %ymm19, %ymm19 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm19, %zmm22, %zmm19 -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm14, %ymm14, %ymm14 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm14, %zmm14, %zmm14 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm14, %zmm14 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm14, %zmm19 {%k2} -; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm15, %zmm19 {%k3} -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm4, %ymm20, %ymm20 +; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm20, %zmm21, %zmm20 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%rsi), %xmm25 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm21 = xmm23[8],xmm22[8],xmm23[9],xmm22[9],xmm23[10],xmm22[10],xmm23[11],xmm22[11],xmm23[12],xmm22[12],xmm23[13],xmm22[13],xmm23[14],xmm22[14],xmm23[15],xmm22[15] +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%rdi), %xmm23 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm21, %zmm8, %zmm20 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%rcx), %xmm26 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm22 = xmm30[8],xmm29[8],xmm30[9],xmm29[9],xmm30[10],xmm29[10],xmm30[11],xmm29[11],xmm30[12],xmm29[12],xmm30[13],xmm29[13],xmm30[14],xmm29[14],xmm30[15],xmm29[15] +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm31[8],xmm0[9],xmm31[9],xmm0[10],xmm31[10],xmm0[11],xmm31[11],xmm0[12],xmm31[12],xmm0[13],xmm31[13],xmm0[14],xmm31[14],xmm0[15],xmm31[15] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm0, %zmm13, %zmm21 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm22, %zmm14, %zmm21 {%k2} +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm23[0],xmm25[0],xmm23[1],xmm25[1],xmm23[2],xmm25[2],xmm23[3],xmm25[3],xmm23[4],xmm25[4],xmm23[5],xmm25[5],xmm23[6],xmm25[6],xmm23[7],xmm25[7] +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm5, %xmm0, %xmm22 +; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm27 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm22, %ymm27, %ymm22 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%rdx), %xmm27 ; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm2, %zmm0, %zmm0 -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm5, %zmm1, %zmm1 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm7[0],xmm6[0],xmm7[1],xmm6[1],xmm7[2],xmm6[2],xmm7[3],xmm6[3],xmm7[4],xmm6[4],xmm7[5],xmm6[5],xmm7[6],xmm6[6],xmm7[7],xmm6[7] +; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm4, %ymm0, %ymm0 +; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm22, %zmm22 +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm27[0],xmm26[0],xmm27[1],xmm26[1],xmm27[2],xmm26[2],xmm27[3],xmm26[3],xmm27[4],xmm26[4],xmm27[5],xmm26[5],xmm27[6],xmm26[6],xmm27[7],xmm26[7] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm0, %zmm8, %zmm22 {%k1} +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm23[8],xmm25[8],xmm23[9],xmm25[9],xmm23[10],xmm25[10],xmm23[11],xmm25[11],xmm23[12],xmm25[12],xmm23[13],xmm25[13],xmm23[14],xmm25[14],xmm23[15],xmm25[15] +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm5, %xmm0, %xmm23 +; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm25 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm23, %ymm25, %ymm23 ; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 -; AVX512DQ-BW-FCP-NEXT: vpshufb %zmm9, %zmm0, %zmm0 -; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm3 -; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm12, %ymm3, %ymm3 -; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm13, %xmm2, %xmm4 +; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm4, %ymm0, %ymm0 +; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm0, %zmm23, %zmm0 +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm23 = xmm27[8],xmm26[8],xmm27[9],xmm26[9],xmm27[10],xmm26[10],xmm27[11],xmm26[11],xmm27[12],xmm26[12],xmm27[13],xmm26[13],xmm27[14],xmm26[14],xmm27[15],xmm26[15] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm23, %zmm8, %zmm0 {%k1} +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload +; AVX512DQ-BW-FCP-NEXT: # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3],xmm2[4],mem[4],xmm2[5],mem[5],xmm2[6],mem[6],xmm2[7],mem[7] +; AVX512DQ-BW-FCP-NEXT: vinserti32x4 $1, %xmm2, %ymm2, %ymm23 +; AVX512DQ-BW-FCP-NEXT: vpshufb %ymm4, %ymm23, %ymm4 +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 16(%r10), %xmm23 +; AVX512DQ-BW-FCP-NEXT: vpshufb %xmm5, %xmm2, %xmm5 ; AVX512DQ-BW-FCP-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2 -; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512DQ-BW-FCP-NEXT: vmovdqu16 %zmm0, %zmm2 {%k2} -; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm1, %zmm2 {%k3} +; AVX512DQ-BW-FCP-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 +; AVX512DQ-BW-FCP-NEXT: vmovdqa 16(%rax), %xmm5 +; AVX512DQ-BW-FCP-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512DQ-BW-FCP-NEXT: vmovdqa 16(%r9), %xmm4 +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] +; AVX512DQ-BW-FCP-NEXT: vmovdqa 16(%r8), %xmm7 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm3, %zmm8, %zmm2 {%k1} +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm5[0],xmm23[0],xmm5[1],xmm23[1],xmm5[2],xmm23[2],xmm5[3],xmm23[3],xmm5[4],xmm23[4],xmm5[5],xmm23[5],xmm5[6],xmm23[6],xmm5[7],xmm23[7] +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm8 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3],xmm7[4],xmm4[4],xmm7[5],xmm4[5],xmm7[6],xmm4[6],xmm7[7],xmm4[7] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm8, %zmm13, %zmm8 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm3, %zmm14, %zmm8 {%k2} +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm5[8],xmm23[8],xmm5[9],xmm23[9],xmm5[10],xmm23[10],xmm5[11],xmm23[11],xmm5[12],xmm23[12],xmm5[13],xmm23[13],xmm5[14],xmm23[14],xmm5[15],xmm23[15] +; AVX512DQ-BW-FCP-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm7[8],xmm4[8],xmm7[9],xmm4[9],xmm7[10],xmm4[10],xmm7[11],xmm4[11],xmm7[12],xmm4[12],xmm7[13],xmm4[13],xmm7[14],xmm4[14],xmm7[15],xmm4[15] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm4, %zmm13, %zmm4 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm3, %zmm14, %zmm4 {%k2} +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm9[0],xmm6[0],xmm9[1],xmm6[1],xmm9[2],xmm6[2],xmm9[3],xmm6[3],xmm9[4],xmm6[4],xmm9[5],xmm6[5],xmm9[6],xmm6[6],xmm9[7],xmm6[7] +; AVX512DQ-BW-FCP-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm11[0],xmm10[0],xmm11[1],xmm10[1],xmm11[2],xmm10[2],xmm11[3],xmm10[3],xmm11[4],xmm10[4],xmm11[5],xmm10[5],xmm11[6],xmm10[6],xmm11[7],xmm10[7] +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm5, %zmm13, %zmm5 +; AVX512DQ-BW-FCP-NEXT: vpermw %zmm3, %zmm14, %zmm5 {%k2} +; AVX512DQ-BW-FCP-NEXT: movw $-21846, %ax # imm = 0xAAAA +; AVX512DQ-BW-FCP-NEXT: kmovd %eax, %k1 +; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm12, %zmm1 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm16, %zmm15 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm18, %zmm17 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm24, %zmm19 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm21, %zmm20 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm8, %zmm22 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm4, %zmm0 {%k1} +; AVX512DQ-BW-FCP-NEXT: vmovdqa32 %zmm5, %zmm2 {%k1} ; AVX512DQ-BW-FCP-NEXT: movq {{[0-9]+}}(%rsp), %rax ; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm2, (%rax) -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm19, 192(%rax) -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm21, 128(%rax) -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm18, 320(%rax) -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm20, 256(%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm0, 192(%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm22, 128(%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm20, 320(%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm19, 256(%rax) ; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm17, 448(%rax) -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm16, 384(%rax) -; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm8, 64(%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm15, 384(%rax) +; AVX512DQ-BW-FCP-NEXT: vmovdqa64 %zmm1, 64(%rax) ; AVX512DQ-BW-FCP-NEXT: vzeroupper ; AVX512DQ-BW-FCP-NEXT: retq %in.vec0 = load <64 x i8>, ptr %in.vecptr0, align 64 diff --git a/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll b/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll index b15b76d1690c4..b154a4ff162d5 100644 --- a/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll +++ b/llvm/test/DebugInfo/roundtrip-non-instruction-debug-info.ll @@ -18,6 +18,14 @@ ; RUN: opt --passes=verify -S --try-experimental-debuginfo-iterators --write-experimental-debuginfo=true < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,NEWDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg +;; Test that the preserving flag overrides the write flag. +; RUN: opt --passes=verify -S --preserve-input-debuginfo-format=true --write-experimental-debuginfo=true < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,OLDDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + +; RUN: opt --passes=verify -S --write-experimental-debuginfo=true < %s \ +; RUN: | opt --passes=verify -S --preserve-input-debuginfo-format=true --write-experimental-debuginfo=false \ +; RUN: | FileCheck %s --check-prefixes=CHECK,NEWDBG --implicit-check-not=llvm.dbg --implicit-check-not=#dbg + ; CHECK: @f(i32 %[[VAL_A:[0-9a-zA-Z]+]]) ; CHECK-NEXT: entry: ; OLDDBG-NEXT: call void @llvm.dbg.value(metadata i32 %[[VAL_A]], metadata ![[VAR_A:[0-9]+]], metadata !DIExpression()), !dbg ![[LOC_1:[0-9]+]] diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll index 8d96ab0212885..f75042b1a1580 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll @@ -1,7 +1,5 @@ -; RUN: opt < %s -passes='require,hwasan' -S \ -; RUN: -hwasan-selective-instrumentation=0 | FileCheck %s --check-prefix=FULL -; RUN: opt < %s -passes='require,hwasan' -S \ -; RUN: -hwasan-selective-instrumentation=1 | FileCheck %s --check-prefix=SELSAN +; RUN: opt < %s -passes='require,hwasan' -S | FileCheck %s --check-prefix=FULL +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-percentile-cutoff-hot=990000 | FileCheck %s --check-prefix=SELSAN ; FULL: @not_sanitized ; FULL-NEXT: %x = alloca i8, i64 4 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll index 28e43a99883e5..f2661a02da7a0 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll @@ -1,23 +1,19 @@ -; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: | FileCheck %s --check-prefix=DEFAULT -; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-percentile-cutoff-hot=700000 | FileCheck %s --check-prefix=HOT_RATE -; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-random-skip-rate=0.0 | FileCheck %s --check-prefix=RANDOM_RATE_0 -; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-random-skip-rate=1.0 | FileCheck %s --check-prefix=RANDOM_RATE_1 - -; DEFAULT: @sanitized -; DEFAULT-NEXT: %x = alloca i8, i64 4 - -; HOT_RATE: @sanitized -; HOT_RATE-NEXT: @__hwasan_tls - -; RANDOM_RATE_0: @sanitized -; RANDOM_RATE_0-NEXT: @__hwasan_tls - -; RANDOM_RATE_1: @sanitized -; RANDOM_RATE_1-NEXT: %x = alloca i8, i64 4 +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-percentile-cutoff-hot=700000 | FileCheck %s --check-prefix=HOT70 +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-percentile-cutoff-hot=990000 | FileCheck %s --check-prefix=HOT99 +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-random-rate=1.0 | FileCheck %s --check-prefix=ALL +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-random-rate=0.0 | FileCheck %s --check-prefix=NONE + +; HOT70: @sanitized +; HOT70-NEXT: @__hwasan_tls + +; HOT99: @sanitized +; HOT99-NEXT: %x = alloca i8, i64 4 + +; ALL: @sanitized +; ALL-NEXT: @__hwasan_tls + +; NONE: @sanitized +; NONE-NEXT: %x = alloca i8, i64 4 declare void @use(ptr) diff --git a/llvm/test/Instrumentation/MemorySanitizer/overflow.ll b/llvm/test/Instrumentation/MemorySanitizer/overflow.ll new file mode 100644 index 0000000000000..b1304faec3df0 --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/overflow.ll @@ -0,0 +1,163 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define {i64, i1} @test_sadd_with_overflow(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define { i64, i1 } @test_sadd_with_overflow( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { i64, i1 } [[RES]] +; + %res = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + ret { i64, i1 } %res +} + +define {i64, i1} @test_uadd_with_overflow(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define { i64, i1 } @test_uadd_with_overflow( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { i64, i1 } [[RES]] +; + %res = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + ret { i64, i1 } %res +} + +define {i64, i1} @test_smul_with_overflow(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define { i64, i1 } @test_smul_with_overflow( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { i64, i1 } [[RES]] +; + %res = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %a, i64 %b) + ret { i64, i1 } %res +} +define {i64, i1} @test_umul_with_overflow(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define { i64, i1 } @test_umul_with_overflow( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { i64, i1 } [[RES]] +; + %res = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %a, i64 %b) + ret { i64, i1 } %res +} +define {i64, i1} @test_ssub_with_overflow(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define { i64, i1 } @test_ssub_with_overflow( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { i64, i1 } [[RES]] +; + %res = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + ret { i64, i1 } %res +} +define {i64, i1} @test_usub_with_overflow(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define { i64, i1 } @test_usub_with_overflow( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]] +; CHECK: 3: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 4: +; CHECK-NEXT: [[RES:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store { i64, i1 } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { i64, i1 } [[RES]] +; + %res = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + ret { i64, i1 } %res +} + +define {<4 x i32>, <4 x i1>} @test_sadd_with_overflow_vec(<4 x i32> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: define { <4 x i32>, <4 x i1> } @test_sadd_with_overflow_vec( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 +; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] +; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]] +; CHECK: 5: +; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]] +; CHECK-NEXT: unreachable +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) +; CHECK-NEXT: store { <4 x i32>, <4 x i1> } zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret { <4 x i32>, <4 x i1> } [[RES]] +; + %res = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> %a, <4 x i32> %b) + ret { <4 x i32>, <4 x i1> } %res +} + +attributes #0 = { sanitize_memory } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1000} +;. diff --git a/llvm/test/Instrumentation/MemorySanitizer/saturating.ll b/llvm/test/Instrumentation/MemorySanitizer/saturating.ll new file mode 100644 index 0000000000000..dcd8a080144ba --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/saturating.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i64 @test_sadd_sat(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define i64 @test_sadd_sat( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.sadd.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.sadd.sat(i64 %a, i64 %b) + ret i64 %res +} + +define i64 @test_uadd_sat(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define i64 @test_uadd_sat( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.uadd.sat(i64 %a, i64 %b) + ret i64 %res +} + +define i64 @test_ssub_sat(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define i64 @test_ssub_sat( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.ssub.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.ssub.sat(i64 %a, i64 %b) + ret i64 %res +} + +define i64 @test_usub_sat(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define i64 @test_usub_sat( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.usub.sat(i64 %a, i64 %b) + ret i64 %res +} + +define i64 @test_sshl_sat(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define i64 @test_sshl_sat( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.sshl.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.sshl.sat(i64 %a, i64 %b) + ret i64 %res +} + +define i64 @test_ushl_sat(i64 %a, i64 %b) #0 { +; CHECK-LABEL: define i64 @test_ushl_sat( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.ushl.sat.i64(i64 [[A]], i64 [[B]]) +; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret i64 [[RES]] +; + %res = call i64 @llvm.ushl.sat(i64 %a, i64 %b) + ret i64 %res +} + +define <4 x i32> @test_sadd_sat_vec(<4 x i32> %a, <4 x i32> %b) #0 { +; CHECK-LABEL: define <4 x i32> @test_sadd_sat_vec( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[B]]) +; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %res +} + + +attributes #0 = { sanitize_memory } diff --git a/llvm/test/MC/AArch64/SVE/pfalse-diagnostics.s b/llvm/test/MC/AArch64/SVE/pfalse-diagnostics.s index f4d95c5910d89..e44453b4c3265 100644 --- a/llvm/test/MC/AArch64/SVE/pfalse-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/pfalse-diagnostics.s @@ -17,6 +17,6 @@ pfalse pn16.b // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: pfalse pn5.d -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Expected predicate-as-counter register name with .B suffix +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register // CHECK-NEXT: pfalse pn5.d // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/Disassembler/M68k/arithmetic.txt b/llvm/test/MC/Disassembler/M68k/arithmetic.txt index 007a789b3bf0c..8142024940c69 100644 --- a/llvm/test/MC/Disassembler/M68k/arithmetic.txt +++ b/llvm/test/MC/Disassembler/M68k/arithmetic.txt @@ -89,6 +89,12 @@ # CHECK: negx.l %a2 0x40 0x8a +# CHECK: not.l %d5 +0x46 0x85 + +# CHECK: not.b %d1 +0x46 0x01 + # CHECK: or.w (18,%a4,%a0), %d3 0x86 0x74 0x88 0x12 diff --git a/llvm/test/MC/M68k/Arith/Classes/MxNOT.s b/llvm/test/MC/M68k/Arith/Classes/MxNOT.s new file mode 100644 index 0000000000000..93b473334d7b1 --- /dev/null +++ b/llvm/test/MC/M68k/Arith/Classes/MxNOT.s @@ -0,0 +1,11 @@ +; RUN: llvm-mc -triple=m68k -show-encoding %s | FileCheck %s + +; CHECK: not.b %d0 +; CHECK-SAME: encoding: [0x46,0x00] +not.b %d0 +; CHECK: not.w %d0 +; CHECK-SAME: encoding: [0x46,0x40] +not.w %d0 +; CHECK: not.l %d0 +; CHECK-SAME: encoding: [0x46,0x80] +not.l %d0 diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 09daeee2c1b38..a8f493f781ec3 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -394,7 +394,7 @@ # CHECK: attribute 5, "rv32i2p1_zicfilp0p4" .attribute arch, "rv32i_zicfiss0p4" -# CHECK: .attribute 5, "rv32i2p1_zicfiss0p4_zicsr2p0_zimop0p1" +# CHECK: .attribute 5, "rv32i2p1_zicfiss0p4_zicsr2p0_zimop1p0" .attribute arch, "rv64i_xsfvfwmaccqqq" # CHECK: attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0" diff --git a/llvm/test/MC/RISCV/compressed-zicfiss.s b/llvm/test/MC/RISCV/compressed-zicfiss.s index 50ea2e24083e9..2ebf9d3af3be8 100644 --- a/llvm/test/MC/RISCV/compressed-zicfiss.s +++ b/llvm/test/MC/RISCV/compressed-zicfiss.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zicfiss,+experimental-zcmop -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zicfiss,+zcmop -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zicfiss,+experimental-zcmop < %s \ -# RUN: | llvm-objdump --mattr=+experimental-zicfiss,+experimental-zcmop -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zicfiss,+zcmop < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zicfiss,+zcmop -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zicfiss,+experimental-zcmop -riscv-no-aliases -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zicfiss,+zcmop -riscv-no-aliases -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zicfiss,+experimental-zcmop < %s \ -# RUN: | llvm-objdump --mattr=+experimental-zicfiss,+experimental-zcmop -M no-aliases -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zicfiss,+zcmop < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zicfiss,+zcmop -M no-aliases -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # # RUN: not llvm-mc -triple riscv32 -riscv-no-aliases -show-encoding < %s 2>&1 \ diff --git a/llvm/test/MC/RISCV/rv32zcmop-invalid.s b/llvm/test/MC/RISCV/rv32zcmop-invalid.s index 1641c8ddd00ba..71d72d59b0209 100644 --- a/llvm/test/MC/RISCV/rv32zcmop-invalid.s +++ b/llvm/test/MC/RISCV/rv32zcmop-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple riscv32 -mattr=+experimental-zcmop < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zcmop < %s 2>&1 | FileCheck %s cmop.0 # CHECK: :[[@LINE]]:1: error: unrecognized instruction mnemonic diff --git a/llvm/test/MC/RISCV/rv32zimop-invalid.s b/llvm/test/MC/RISCV/rv32zimop-invalid.s index e6c3adc4cd309..e4672016bbf76 100644 --- a/llvm/test/MC/RISCV/rv32zimop-invalid.s +++ b/llvm/test/MC/RISCV/rv32zimop-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc -triple riscv32 -mattr=+experimental-zimop < %s 2>&1 | FileCheck %s +# RUN: not llvm-mc -triple riscv32 -mattr=+zimop < %s 2>&1 | FileCheck %s # Too few operands mop.r.0 t0 # CHECK: :[[@LINE]]:1: error: too few operands for instruction diff --git a/llvm/test/MC/RISCV/rvzcmop-valid.s b/llvm/test/MC/RISCV/rvzcmop-valid.s index c26bb2959fede..c6bb4a1580825 100644 --- a/llvm/test/MC/RISCV/rvzcmop-valid.s +++ b/llvm/test/MC/RISCV/rvzcmop-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zcmop -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zcmop -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zcmop -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zcmop -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zcmop < %s \ -# RUN: | llvm-objdump --mattr=+experimental-zcmop -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zcmop < %s \ +# RUN: | llvm-objdump --mattr=+zcmop -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zcmop < %s \ -# RUN: | llvm-objdump --mattr=+experimental-zcmop -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zcmop < %s \ +# RUN: | llvm-objdump --mattr=+zcmop -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # CHECK-ASM-AND-OBJ: cmop.1 diff --git a/llvm/test/MC/RISCV/rvzimop-valid.s b/llvm/test/MC/RISCV/rvzimop-valid.s index 1552936629902..deb6d41f04453 100644 --- a/llvm/test/MC/RISCV/rvzimop-valid.s +++ b/llvm/test/MC/RISCV/rvzimop-valid.s @@ -1,12 +1,12 @@ -# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zimop -show-encoding \ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+zimop -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zimop -show-encoding \ +# RUN: llvm-mc %s -triple=riscv64 -mattr=+zimop -show-encoding \ # RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zimop < %s \ -# RUN: | llvm-objdump --mattr=+experimental-zimop -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zimop < %s \ +# RUN: | llvm-objdump --mattr=+zimop -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zimop < %s \ -# RUN: | llvm-objdump --mattr=+experimental-zimop -d -r - \ +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zimop < %s \ +# RUN: | llvm-objdump --mattr=+zimop -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s # CHECK-ASM-AND-OBJ: mop.r.0 a2, a1 diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 4ab5567f62876..493350d7bd630 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -984,10 +984,10 @@ static const X86FoldTableEntry Table1[] = { {X86::RORX32ri_EVEX, X86::RORX32mi_EVEX, 0}, {X86::RORX64ri, X86::RORX64mi, 0}, {X86::RORX64ri_EVEX, X86::RORX64mi_EVEX, 0}, - {X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16}, - {X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16}, - {X86::ROUNDSDr, X86::ROUNDSDm, 0}, - {X86::ROUNDSSr, X86::ROUNDSSm, 0}, + {X86::ROUNDPDri, X86::ROUNDPDmi, TB_ALIGN_16}, + {X86::ROUNDPSri, X86::ROUNDPSmi, TB_ALIGN_16}, + {X86::ROUNDSDri, X86::ROUNDSDmi, 0}, + {X86::ROUNDSSri, X86::ROUNDSSmi, 0}, {X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16}, {X86::RSQRTSSr, X86::RSQRTSSm, 0}, {X86::SAR16r1_ND, X86::SAR16m1_ND, 0}, @@ -1791,10 +1791,10 @@ static const X86FoldTableEntry Table1[] = { {X86::VRNDSCALEPSZ128rri, X86::VRNDSCALEPSZ128rmi, 0}, {X86::VRNDSCALEPSZ256rri, X86::VRNDSCALEPSZ256rmi, 0}, {X86::VRNDSCALEPSZrri, X86::VRNDSCALEPSZrmi, 0}, - {X86::VROUNDPDYr, X86::VROUNDPDYm, 0}, - {X86::VROUNDPDr, X86::VROUNDPDm, 0}, - {X86::VROUNDPSYr, X86::VROUNDPSYm, 0}, - {X86::VROUNDPSr, X86::VROUNDPSm, 0}, + {X86::VROUNDPDYri, X86::VROUNDPDYmi, 0}, + {X86::VROUNDPDri, X86::VROUNDPDmi, 0}, + {X86::VROUNDPSYri, X86::VROUNDPSYmi, 0}, + {X86::VROUNDPSri, X86::VROUNDPSmi, 0}, {X86::VRSQRT14PDZ128r, X86::VRSQRT14PDZ128m, 0}, {X86::VRSQRT14PDZ256r, X86::VRSQRT14PDZ256m, 0}, {X86::VRSQRT14PDZr, X86::VRSQRT14PDZm, 0}, @@ -2234,8 +2234,8 @@ static const X86FoldTableEntry Table2[] = { {X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16}, {X86::PXORrr, X86::PXORrm, TB_ALIGN_16}, {X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE}, - {X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE}, - {X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE}, + {X86::ROUNDSDri_Int, X86::ROUNDSDmi_Int, TB_NO_REVERSE}, + {X86::ROUNDSSri_Int, X86::ROUNDSSmi_Int, TB_NO_REVERSE}, {X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE}, {X86::SBB16rr, X86::SBB16rm, 0}, {X86::SBB16rr_ND, X86::SBB16rm_ND, 0}, @@ -3778,10 +3778,10 @@ static const X86FoldTableEntry Table2[] = { {X86::VRNDSCALESHZr_Int, X86::VRNDSCALESHZm_Int, TB_NO_REVERSE}, {X86::VRNDSCALESSZr, X86::VRNDSCALESSZm, 0}, {X86::VRNDSCALESSZr_Int, X86::VRNDSCALESSZm_Int, TB_NO_REVERSE}, - {X86::VROUNDSDr, X86::VROUNDSDm, 0}, - {X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE}, - {X86::VROUNDSSr, X86::VROUNDSSm, 0}, - {X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE}, + {X86::VROUNDSDri, X86::VROUNDSDmi, 0}, + {X86::VROUNDSDri_Int, X86::VROUNDSDmi_Int, TB_NO_REVERSE}, + {X86::VROUNDSSri, X86::VROUNDSSmi, 0}, + {X86::VROUNDSSri_Int, X86::VROUNDSSmi_Int, TB_NO_REVERSE}, {X86::VRSQRT14PDZ128rkz, X86::VRSQRT14PDZ128mkz, 0}, {X86::VRSQRT14PDZ256rkz, X86::VRSQRT14PDZ256mkz, 0}, {X86::VRSQRT14PDZrkz, X86::VRSQRT14PDZmkz, 0}, diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll new file mode 100644 index 0000000000000..1411890e01dc5 --- /dev/null +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomicrmw-fp-vector.ll @@ -0,0 +1,1204 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx900 %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx90a %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand -mcpu=gfx940 %s | FileCheck %s + +;--------------------------------------------------------------------- +; atomicrmw fadd +;--------------------------------------------------------------------- + +define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) { +; CHECK-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[NEWLOADED]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2 + ret <2 x half> %res +} + +define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) { +; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2 + ret <2 x bfloat> %res +} + +define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) { +; CHECK-LABEL: define <2 x half> @test_atomicrmw_fadd_v2f16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x half> [[NEW]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[TMP5]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4 + ret <2 x half> %res +} + +define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) { +; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x bfloat> [[NEW]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x bfloat> [[TMP5]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4 + ret <2 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[NEWLOADED]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2 + ret <4 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[NEWLOADED]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4 + ret <4 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fadd_v4f16_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[TMP5]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fadd_v4bf16_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <4 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[TMP5]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8 + ret <4 x bfloat> %res +} + +define <2 x float> @test_atomicrmw_fadd_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) { +; CHECK-LABEL: define <2 x float> @test_atomicrmw_fadd_v2f32_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fadd <2 x float> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <2 x float> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x float> [[TMP5]] +; + %res = atomicrmw fadd ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8 + ret <2 x float> %res +} + +;--------------------------------------------------------------------- +; atomicrmw fsub +;--------------------------------------------------------------------- + +define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) { +; CHECK-LABEL: define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <2 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[NEWLOADED]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2 + ret <2 x half> %res +} + +define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) { +; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <2 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2 + ret <2 x bfloat> %res +} + +define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) { +; CHECK-LABEL: define <2 x half> @test_atomicrmw_fsub_v2f16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x half> [[NEW]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[TMP5]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4 + ret <2 x half> %res +} + +define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) { +; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x bfloat> [[NEW]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP3]], i32 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x bfloat> [[TMP5]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4 + ret <2 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[NEWLOADED]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2 + ret <4 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x half> [[NEW]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[NEWLOADED]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x bfloat> [[NEW]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP4]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP7:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP7]], i1 [[TMP5]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP8]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4 + ret <4 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fsub_v4f16_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x half> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[TMP5]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fsub_v4bf16_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <4 x bfloat> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x bfloat> [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[TMP5]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8 + ret <4 x bfloat> %res +} + +define <2 x float> @test_atomicrmw_fsub_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) { +; CHECK-LABEL: define <2 x float> @test_atomicrmw_fsub_v2f32_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[NEW:%.*]] = fsub <2 x float> [[LOADED]], [[VALUE]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x float> [[NEW]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP3]], i64 [[TMP2]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0 +; CHECK-NEXT: [[TMP5]] = bitcast i64 [[NEWLOADED]] to <2 x float> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x float> [[TMP5]] +; + %res = atomicrmw fsub ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8 + ret <2 x float> %res +} + +;--------------------------------------------------------------------- +; atomicrmw fmin +;--------------------------------------------------------------------- + +define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) { +; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[NEWLOADED]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2 + ret <2 x half> %res +} + +define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) { +; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2 + ret <2 x bfloat> %res +} + +define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) { +; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmin_v2f16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.minnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[TMP6]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4 + ret <2 x half> %res +} + +define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) { +; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x bfloat> [[TMP6]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4 + ret <2 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[NEWLOADED]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2 + ret <4 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[NEWLOADED]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4 + ret <4 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmin_v4f16_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.minnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[TMP6]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmin_v4bf16_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[TMP6]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8 + ret <4 x bfloat> %res +} + +define <2 x float> @test_atomicrmw_fmin_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) { +; CHECK-LABEL: define <2 x float> @test_atomicrmw_fmin_v2f32_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.minnum.v2f32(<2 x float> [[LOADED]], <2 x float> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <2 x float> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x float> [[TMP6]] +; + %res = atomicrmw fmin ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8 + ret <2 x float> %res +} + +;--------------------------------------------------------------------- +; atomicrmw fmax +;--------------------------------------------------------------------- + +define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2(ptr addrspace(1) %ptr, <2 x half> %value) { +; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x half>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x half>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <2 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <2 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x half>, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x half>, i1 } poison, <2 x half> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x half>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x half>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[NEWLOADED]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 2 + ret <2 x half> %res +} + +define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2(ptr addrspace(1) %ptr, <2 x bfloat> %value) { +; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <2 x bfloat>, align 4, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <2 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <2 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x bfloat>, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <2 x bfloat>, i1 } poison, <2 x bfloat> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <2 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <2 x bfloat>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 2 + ret <2 x bfloat> %res +} + +define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align4(ptr addrspace(1) %ptr, <2 x half> %value) { +; CHECK-LABEL: define <2 x half> @test_atomicrmw_fmax_v2f16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x half>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x half> @llvm.maxnum.v2f16(<2 x half> [[LOADED]], <2 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x half> [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x half> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x half> [[TMP6]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x half> %value syncscope("agent") seq_cst, align 4 + ret <2 x half> %res +} + +define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align4(ptr addrspace(1) %ptr, <2 x bfloat> %value) { +; CHECK-LABEL: define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x bfloat>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat> [[LOADED]], <2 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x bfloat> [[TMP2]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x bfloat> [[LOADED]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[TMP4]], i32 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 4 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i32 [[NEWLOADED]] to <2 x bfloat> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x bfloat> [[TMP6]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x bfloat> %value syncscope("agent") seq_cst, align 4 + ret <2 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[NEWLOADED]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 2 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align2( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 2 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 2 + ret <4 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x half>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x half> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x half> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x half>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x half>, i1 } poison, <4 x half> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x half>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x half>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x half>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[NEWLOADED]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 4 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align4( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x bfloat>, align 8, addrspace(5) +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: store <4 x bfloat> [[LOADED]], ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: store <4 x bfloat> [[TMP4]], ptr addrspace(5) [[TMP2]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 8, ptr [[TMP5]], ptr addrspace(5) [[TMP1]], ptr addrspace(5) [[TMP2]], i32 5, i32 5) +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP2]]) +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr addrspace(5) [[TMP1]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[TMP1]]) +; CHECK-NEXT: [[TMP8:%.*]] = insertvalue { <4 x bfloat>, i1 } poison, <4 x bfloat> [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { <4 x bfloat>, i1 } [[TMP8]], i1 [[TMP6]], 1 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 1 +; CHECK-NEXT: [[NEWLOADED]] = extractvalue { <4 x bfloat>, i1 } [[TMP9]], 0 +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[NEWLOADED]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 4 + ret <4 x bfloat> %res +} + +define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align8(ptr addrspace(1) %ptr, <4 x half> %value) { +; CHECK-LABEL: define <4 x half> @test_atomicrmw_fmax_v4f16_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x half> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x half> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x half> @llvm.maxnum.v4f16(<4 x half> [[LOADED]], <4 x half> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x half> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x half> [[TMP6]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x half> %value syncscope("agent") seq_cst, align 8 + ret <4 x half> %res +} + +define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align8(ptr addrspace(1) %ptr, <4 x bfloat> %value) { +; CHECK-LABEL: define <4 x bfloat> @test_atomicrmw_fmax_v4bf16_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <4 x bfloat> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <4 x bfloat> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat> [[LOADED]], <4 x bfloat> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x bfloat> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x bfloat> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <4 x bfloat> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <4 x bfloat> [[TMP6]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <4 x bfloat> %value syncscope("agent") seq_cst, align 8 + ret <4 x bfloat> %res +} + +define <2 x float> @test_atomicrmw_fmax_v2f32_global_agent_align8(ptr addrspace(1) %ptr, <2 x float> %value) { +; CHECK-LABEL: define <2 x float> @test_atomicrmw_fmax_v2f32_global_agent_align8( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]] +; CHECK: atomicrmw.start: +; CHECK-NEXT: [[LOADED:%.*]] = phi <2 x float> [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[ATOMICRMW_START]] ] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[LOADED]], <2 x float> [[VALUE]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x float> [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x float> [[LOADED]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i64 [[TMP4]], i64 [[TMP3]] syncscope("agent") seq_cst seq_cst, align 8 +; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1 +; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0 +; CHECK-NEXT: [[TMP6]] = bitcast i64 [[NEWLOADED]] to <2 x float> +; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] +; CHECK: atomicrmw.end: +; CHECK-NEXT: ret <2 x float> [[TMP6]] +; + %res = atomicrmw fmax ptr addrspace(1) %ptr, <2 x float> %value syncscope("agent") seq_cst, align 8 + ret <2 x float> %res +} diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index 4df647cf3bb5b..7828629f9fc01 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -100,14 +100,24 @@ define <2 x double> @returned_strange_constant_vector_elt() { ret <2 x double> } -; Test a vector element that's an undef/poison +; Test a vector element that's undef define <3 x double> @returned_undef_constant_vector_elt() { -; CHECK-LABEL: define nofpclass(nan inf pzero sub norm) <3 x double> @returned_undef_constant_vector_elt() { +; CHECK-LABEL: define nofpclass(nan inf sub norm) <3 x double> @returned_undef_constant_vector_elt() { ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret <3 x double> +; CHECK-NEXT: ret <3 x double> ; call void @unknown() - ret <3 x double> + ret <3 x double> +} + +; Test a vector element that's poison +define <3 x double> @returned_poison_constant_vector_elt() { +; CHECK-LABEL: define nofpclass(nan inf sub norm) <3 x double> @returned_poison_constant_vector_elt() { +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret <3 x double> +; + call void @unknown() + ret <3 x double> } define <2 x double> @returned_qnan_zero_vector() { @@ -1513,6 +1523,25 @@ define <4 x float> @insertelement_constant_chain() { ret <4 x float> %ins.3 } +define <4 x float> @insertelement_non_constant_chain(i32 %idx) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero sub) <4 x float> @insertelement_non_constant_chain +; CHECK-SAME: (i32 [[IDX:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[INS_0:%.*]] = insertelement <4 x float> poison, float 1.000000e+00, i32 0 +; CHECK-NEXT: [[INS_1:%.*]] = insertelement <4 x float> [[INS_0]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[INS_2:%.*]] = insertelement <4 x float> [[INS_1]], float -9.000000e+00, i32 2 +; CHECK-NEXT: [[INS_4:%.*]] = insertelement <4 x float> [[INS_2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[INS_3:%.*]] = insertelement <4 x float> [[INS_2]], float 4.000000e+00, i32 [[IDX]] +; CHECK-NEXT: ret <4 x float> [[INS_3]] +; + %ins.0 = insertelement <4 x float> poison, float 1.0, i32 0 + %ins.1 = insertelement <4 x float> %ins.0, float 0.0, i32 1 + %ins.2 = insertelement <4 x float> %ins.1, float -9.0, i32 2 + %ins.3 = insertelement <4 x float> %ins.2, float 3.0, i32 3 + %ins.4 = insertelement <4 x float> %ins.2, float 4.0, i32 %idx + ret <4 x float> %ins.4 +} + define @insertelement_scalable_constant_chain() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define @insertelement_scalable_constant_chain @@ -1582,7 +1611,7 @@ define float @insertelement_extractelement_unknown(<4 x float> nofpclass(zero) % define <4 x float> @insertelement_index_oob_chain() { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define <4 x float> @insertelement_index_oob_chain +; CHECK-LABEL: define nofpclass(nan ninf nzero sub norm) <4 x float> @insertelement_index_oob_chain ; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: [[INSERT:%.*]] = insertelement <4 x float> zeroinitializer, float 0x7FF0000000000000, i32 4 ; CHECK-NEXT: ret <4 x float> [[INSERT]] @@ -1771,6 +1800,32 @@ define float @shufflevector_extractelt3(<2 x float> %arg0, <2 x float> nofpclass ret float %extract } +define float @shufflevector_constantdatavector_demanded0() { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf zero sub nnorm) float @shufflevector_constantdatavector_demanded0 +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x float> , <3 x float> poison, <2 x i32> +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[SHUFFLE]], i32 0 +; CHECK-NEXT: ret float [[EXTRACT]] +; + %shuffle = shufflevector <3 x float> , <3 x float> poison, <2 x i32> + %extract = extractelement <2 x float> %shuffle, i32 0 + ret float %extract +} + +define float @shufflevector_constantdatavector_demanded1() { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero sub norm) float @shufflevector_constantdatavector_demanded1 +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x float> , <3 x float> poison, <2 x i32> +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <2 x float> [[SHUFFLE]], i32 1 +; CHECK-NEXT: ret float [[EXTRACT]] +; + %shuffle = shufflevector <3 x float> , <3 x float> poison, <2 x i32> + %extract = extractelement <2 x float> %shuffle, i32 1 + ret float %extract +} + define i32 @fptosi(float nofpclass(inf nan) %arg) { ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CHECK-LABEL: define i32 @fptosi @@ -2587,6 +2642,33 @@ bb: ret float %implement.pow } +define [4 x float] @constant_aggregate_zero() { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero sub norm) [4 x float] @constant_aggregate_zero +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: ret [4 x float] zeroinitializer +; + ret [4 x float] zeroinitializer +} + +define @scalable_splat_pnorm() { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define @scalable_splat_pnorm +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, float 1.000000e+00, i64 0), poison, zeroinitializer) +; + ret splat (float 1.0) +} + +define @scalable_splat_zero() { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define noundef nofpclass(nan inf nzero sub norm) @scalable_splat_zero +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: ret zeroinitializer +; + ret zeroinitializer +} + declare i64 @_Z13get_global_idj(i32 noundef) attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" } diff --git a/llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll b/llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll index 40014853d8ac5..960828c76f789 100644 --- a/llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll +++ b/llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll @@ -1,5 +1,6 @@ ; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \ ; RUN: %s 2>&1 | FileCheck %s +; XFAIL: * @tls = hidden thread_local addrspace(1) global i32 0, align 4 diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index c8d348df5f427..63b11d0c0bc08 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -3038,3 +3038,63 @@ define i32 @icmp_slt_0_or_icmp_add_1_sge_100_i32_fail(i32 %x) { %D = or i32 %C, %B ret i32 %D } + +define i1 @logical_and_icmps1(i32 %a, i1 %other_cond) { +; CHECK-LABEL: @logical_and_icmps1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i32 [[A:%.*]], 10086 +; CHECK-NEXT: [[RET2:%.*]] = select i1 [[RET1:%.*]], i1 [[CMP3]], i1 false +; CHECK-NEXT: ret i1 [[RET2]] +; +entry: + %cmp1 = icmp sgt i32 %a, -1 + %logical_and = select i1 %other_cond, i1 %cmp1, i1 false + %cmp2 = icmp slt i32 %a, 10086 + %ret = select i1 %logical_and, i1 %cmp2, i1 false + ret i1 %ret +} + +define i1 @logical_and_icmps2(i32 %a, i1 %other_cond) { +; CHECK-LABEL: @logical_and_icmps2( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %cmp1 = icmp slt i32 %a, -1 + %logical_and = select i1 %other_cond, i1 %cmp1, i1 false + %cmp2 = icmp eq i32 %a, 10086 + %ret = select i1 %logical_and, i1 %cmp2, i1 false + ret i1 %ret +} + +define <4 x i1> @logical_and_icmps_vec1(<4 x i32> %a, <4 x i1> %other_cond) { +; CHECK-LABEL: @logical_and_icmps_vec1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP3:%.*]] = icmp ult <4 x i32> [[A:%.*]], +; CHECK-NEXT: [[RET2:%.*]] = select <4 x i1> [[RET1:%.*]], <4 x i1> [[CMP3]], <4 x i1> zeroinitializer +; CHECK-NEXT: ret <4 x i1> [[RET2]] +; +entry: + %cmp1 = icmp sgt <4 x i32> %a, + %logical_and = select <4 x i1> %other_cond, <4 x i1> %cmp1, <4 x i1> zeroinitializer + %cmp2 = icmp slt <4 x i32> %a, + %ret = select <4 x i1> %logical_and, <4 x i1> %cmp2, <4 x i1> zeroinitializer + ret <4 x i1> %ret +} + +define i1 @logical_and_icmps_fail1(i32 %a, i32 %b, i1 %other_cond) { +; CHECK-LABEL: @logical_and_icmps_fail1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[A:%.*]], -1 +; CHECK-NEXT: [[LOGICAL_AND:%.*]] = select i1 [[OTHER_COND:%.*]], i1 [[CMP1]], i1 false +; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[A]], [[B:%.*]] +; CHECK-NEXT: [[RET:%.*]] = select i1 [[LOGICAL_AND]], i1 [[CMP2]], i1 false +; CHECK-NEXT: ret i1 [[RET]] +; +entry: + %cmp1 = icmp sgt i32 %a, -1 + %logical_and = select i1 %other_cond, i1 %cmp1, i1 false + %cmp2 = icmp slt i32 %a, %b + %ret = select i1 %logical_and, i1 %cmp2, i1 false + ret i1 %ret +} diff --git a/llvm/test/Transforms/InstCombine/freeze.ll b/llvm/test/Transforms/InstCombine/freeze.ll index e8105b6287d0c..668f3033ed4b7 100644 --- a/llvm/test/Transforms/InstCombine/freeze.ll +++ b/llvm/test/Transforms/InstCombine/freeze.ll @@ -1127,6 +1127,17 @@ define i32 @freeze_zext_nneg(i8 %x) { ret i32 %fr } +define float @freeze_uitofp_nneg(i8 %x) { +; CHECK-LABEL: @freeze_uitofp_nneg( +; CHECK-NEXT: [[X_FR:%.*]] = freeze i8 [[X:%.*]] +; CHECK-NEXT: [[UITOFP:%.*]] = uitofp i8 [[X_FR]] to float +; CHECK-NEXT: ret float [[UITOFP]] +; + %uitofp = uitofp nneg i8 %x to float + %fr = freeze float %uitofp + ret float %fr +} + define i32 @propagate_drop_flags_or(i32 %arg) { ; CHECK-LABEL: @propagate_drop_flags_or( ; CHECK-NEXT: [[ARG_FR:%.*]] = freeze i32 [[ARG:%.*]] diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index b99ed20d7d431..6b4e5a5372c52 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -9,10 +9,8 @@ declare void @use(i32) define i1 @cvt_icmp_0_zext_plus_zext_eq_i16(i16 %arg, i16 %arg1) { ; CHECK-LABEL: @cvt_icmp_0_zext_plus_zext_eq_i16( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = zext i16 [[ARG:%.*]] to i32 -; CHECK-NEXT: [[I2:%.*]] = zext i16 [[ARG1:%.*]] to i32 -; CHECK-NEXT: [[I3:%.*]] = sub nsw i32 0, [[I]] -; CHECK-NEXT: [[I4:%.*]] = icmp eq i32 [[I2]], [[I3]] +; CHECK-NEXT: [[TMP0:%.*]] = or i16 [[ARG1:%.*]], [[ARG:%.*]] +; CHECK-NEXT: [[I4:%.*]] = icmp eq i16 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[I4]] ; bb: @@ -27,10 +25,8 @@ bb: define i1 @cvt_icmp_0_zext_plus_zext_eq_i8(i8 %arg, i8 %arg1) { ; CHECK-LABEL: @cvt_icmp_0_zext_plus_zext_eq_i8( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I:%.*]] = zext i8 [[ARG:%.*]] to i32 -; CHECK-NEXT: [[I2:%.*]] = zext i8 [[ARG1:%.*]] to i32 -; CHECK-NEXT: [[I3:%.*]] = sub nsw i32 0, [[I]] -; CHECK-NEXT: [[I4:%.*]] = icmp eq i32 [[I2]], [[I3]] +; CHECK-NEXT: [[TMP0:%.*]] = or i8 [[ARG1:%.*]], [[ARG:%.*]] +; CHECK-NEXT: [[I4:%.*]] = icmp eq i8 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[I4]] ; bb: @@ -3003,4 +2999,28 @@ define i1 @icmp_dec_notnonzero(i8 %x) { ret i1 %c } +define i1 @icmp_addnuw_nonzero(i8 %x, i8 %y) { +; CHECK-LABEL: @icmp_addnuw_nonzero( +; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[TMP1]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %i = add nuw i8 %x, %y + %c = icmp eq i8 %i, 0 + ret i1 %c +} + +define i1 @icmp_addnuw_nonzero_fail_multiuse(i32 %x, i32 %y) { +; CHECK-LABEL: @icmp_addnuw_nonzero_fail_multiuse( +; CHECK-NEXT: [[I:%.*]] = add nuw i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[I]], 0 +; CHECK-NEXT: call void @use(i32 [[I]]) +; CHECK-NEXT: ret i1 [[C]] +; + %i = add nuw i32 %x, %y + %c = icmp eq i32 %i, 0 + call void @use(i32 %i) + ret i1 %c +} + declare void @llvm.assume(i1) diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll index 922845c1e7e2d..1f9db5e5db9aa 100644 --- a/llvm/test/Transforms/InstCombine/icmp-or.ll +++ b/llvm/test/Transforms/InstCombine/icmp-or.ll @@ -951,3 +951,52 @@ define i1 @icmp_or_xor_with_sub_3_6(i64 %x1, i64 %y1, i64 %x2, i64 %y2, i64 %x3, %cmp = icmp eq i64 %or1, 0 ret i1 %cmp } + + +define i1 @or_disjoint_with_constants(i8 %x) { +; CHECK-LABEL: @or_disjoint_with_constants( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP1:%.*]], 18 +; CHECK-NEXT: ret i1 [[CMP]] +; + %or = or disjoint i8 %x, 1 + %cmp = icmp eq i8 %or, 19 + ret i1 %cmp +} + + +define i1 @or_disjoint_with_constants2(i8 %x) { +; CHECK-LABEL: @or_disjoint_with_constants2( +; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[TMP1:%.*]], 5 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[TMP1]], 66 +; CHECK-NEXT: call void @use(i8 [[OR]]) +; CHECK-NEXT: ret i1 [[CMP]] +; + %or = or disjoint i8 %x, 5 + %cmp = icmp ne i8 %or, 71 + call void @use(i8 %or) + ret i1 %cmp +} + + +define i1 @or_disjoint_with_constants_fail_missing_const1(i8 %x, i8 %y) { +; CHECK-LABEL: @or_disjoint_with_constants_fail_missing_const1( +; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], 19 +; CHECK-NEXT: ret i1 [[CMP]] +; + %or = or disjoint i8 %x, %y + %cmp = icmp eq i8 %or, 19 + ret i1 %cmp +} + +define i1 @or_disjoint_with_constants_fail_missing_const2(i8 %x, i8 %y) { +; CHECK-LABEL: @or_disjoint_with_constants_fail_missing_const2( +; CHECK-NEXT: [[OR:%.*]] = or disjoint i8 [[X:%.*]], 19 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[OR]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %or = or disjoint i8 %x, 19 + %cmp = icmp eq i8 %or, %y + ret i1 %cmp +} + diff --git a/llvm/test/Transforms/InstCombine/implies.ll b/llvm/test/Transforms/InstCombine/implies.ll new file mode 100644 index 0000000000000..c02d84d3f8371 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/implies.ll @@ -0,0 +1,424 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i1 @or_implies_sle(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @or_implies_sle( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], 23 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[OR]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %or = or i8 %x, 23 + %cond = icmp sle i8 %or, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @or_implies_sle_fail(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @or_implies_sle_fail( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], -34 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[OR]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %or = or i8 %x, -34 + %cond = icmp sle i8 %or, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @or_distjoint_implies_ule(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @or_distjoint_implies_ule( +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = or disjoint i8 %x, 23 + %x2 = or disjoint i8 %x, 24 + + %cond = icmp ule i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @or_distjoint_implies_ule_fail(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @or_distjoint_implies_ule_fail( +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = or disjoint i8 [[X]], 28 +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = or disjoint i8 %x, 28 + %x2 = or disjoint i8 %x, 24 + + %cond = icmp ule i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @or_prove_distjoin_implies_ule(i8 %xx, i8 %y, i1 %other) { +; CHECK-LABEL: @or_prove_distjoin_implies_ule( +; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], -16 +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X]], 10 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x = and i8 %xx, -16 + %x1 = or i8 %x, 7 + %x2 = or i8 %x, 10 + + %cond = icmp ule i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_or_distjoint_implies_sle(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @src_or_distjoint_implies_sle( +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = or disjoint i8 %x, 23 + %x2 = or disjoint i8 %x, 24 + + %cond = icmp sle i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_or_distjoint_implies_sle_fail(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @src_or_distjoint_implies_sle_fail( +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp slt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = or disjoint i8 [[X]], 23 +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = or disjoint i8 %x, 23 + %x2 = or disjoint i8 %x, 24 + + %cond = icmp sle i8 %y, %x2 + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_addnsw_implies_sle(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @src_addnsw_implies_sle( +; CHECK-NEXT: [[X2:%.*]] = add nsw i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = add nsw i8 %x, 23 + %x2 = add nsw i8 %x, 24 + + %cond = icmp sle i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_addnsw_implies_sle_fail(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @src_addnsw_implies_sle_fail( +; CHECK-NEXT: [[X2:%.*]] = add nsw i8 [[X:%.*]], 23 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = add nsw i8 [[X]], 24 +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = add nsw i8 %x, 24 + %x2 = add nsw i8 %x, 23 + + %cond = icmp sle i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_and_implies_ult(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_and_implies_ult( +; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp ult i8 %x, %z + br i1 %cond, label %T, label %F +T: + %and = and i8 %z, %x + %r = icmp ult i8 %and, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_and_implies_ult_fail(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_and_implies_ult_fail( +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp ule i8 %x, %z + br i1 %cond, label %T, label %F +T: + %and = and i8 %x, %z + %r = icmp ult i8 %and, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_and_implies_slt_fail(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_and_implies_slt_fail( +; CHECK-NEXT: [[COND:%.*]] = icmp slt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[AND]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp slt i8 %x, %z + br i1 %cond, label %T, label %F +T: + %and = and i8 %x, %y + %r = icmp slt i8 %and, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_or_implies_ule(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_or_implies_ule( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[OR]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %or = or i8 %y, %x + %cond = icmp uge i8 %z, %or + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_or_implies_false_ugt_todo(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_or_implies_false_ugt_todo( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i8 [[OR]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; CHECK: F: +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %or = or i8 %x, %y + %cond = icmp ugt i8 %or, %z + br i1 %cond, label %T, label %F +T: + ret i1 %other +F: + %r = icmp ugt i8 %x, %z + ret i1 %r + +} + +define i1 @src_udiv_implies_ult(i8 %x, i8 %z, i1 %other) { +; CHECK-LABEL: @src_udiv_implies_ult( +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i8 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp ugt i8 %z, %x + br i1 %cond, label %T, label %F +T: + %and = udiv i8 %x, 3 + %r = icmp ult i8 %and, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_udiv_implies_ult2(i8 %x, i8 %z, i1 %other) { +; CHECK-LABEL: @src_udiv_implies_ult2( +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; CHECK: F: +; CHECK-NEXT: ret i1 true +; + %cond = icmp ule i8 %z, %x + br i1 %cond, label %T, label %F +T: + ret i1 %other +F: + %and = udiv i8 %x, 3 + %r = icmp ult i8 %and, %z + ret i1 %r +} + +define i1 @src_smin_implies_sle(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_smin_implies_sle( +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp sle i8 %x, %z + br i1 %cond, label %T, label %F +T: + %um = call i8 @llvm.smin.i8(i8 %x, i8 %y) + %r = icmp sle i8 %um, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_umin_implies_ule(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_umin_implies_ule( +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp ule i8 %x, %z + br i1 %cond, label %T, label %F +T: + %um = call i8 @llvm.umin.i8(i8 %x, i8 %y) + %r = icmp ule i8 %um, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_umax_implies_ule(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_umax_implies_ule( +; CHECK-NEXT: [[UM:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[UM]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %um = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %cond = icmp ule i8 %um, %z + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_smax_implies_sle(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_smax_implies_sle( +; CHECK-NEXT: [[UM:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[UM]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %um = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %cond = icmp sle i8 %um, %z + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x, %z + ret i1 %r +F: + ret i1 %other +} diff --git a/llvm/test/Transforms/InstCombine/insertelement.ll b/llvm/test/Transforms/InstCombine/insertelement.ll index 976c495465ce4..c8df2db6e70ca 100644 --- a/llvm/test/Transforms/InstCombine/insertelement.ll +++ b/llvm/test/Transforms/InstCombine/insertelement.ll @@ -17,11 +17,56 @@ define <4 x i32> @insert_unknown_idx(<4 x i32> %x, i32 %idx) { ; CHECK-LABEL: @insert_unknown_idx( ; CHECK-NEXT: [[V1:%.*]] = and <4 x i32> [[X:%.*]], ; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 6, i32 [[IDX:%.*]] -; CHECK-NEXT: [[V3:%.*]] = and <4 x i32> [[V2]], -; CHECK-NEXT: ret <4 x i32> [[V3]] +; CHECK-NEXT: ret <4 x i32> [[V2]] ; %v1 = and <4 x i32> %x, %v2 = insertelement <4 x i32> %v1, i32 6, i32 %idx %v3 = and <4 x i32> %v2, ret <4 x i32> %v3 } + +define <2 x i8> @insert_known_any_idx(<2 x i8> %xx, i8 %yy, i32 %idx) { +; CHECK-LABEL: @insert_known_any_idx( +; CHECK-NEXT: ret <2 x i8> +; + %x = or <2 x i8> %xx, + %y = or i8 %yy, 16 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 %idx + %r = and <2 x i8> %ins, + ret <2 x i8> %r +} + +define <2 x i8> @insert_known_any_idx_fail1(<2 x i8> %xx, i8 %yy, i32 %idx) { +; CHECK-LABEL: @insert_known_any_idx_fail1( +; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], 16 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %x = or <2 x i8> %xx, + %y = or i8 %yy, 16 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 %idx + %r = and <2 x i8> %ins, + ret <2 x i8> %r +} + + +define <2 x i8> @insert_known_any_idx_fail2(<2 x i8> %xx, i8 %yy, i32 %idx) { +; CHECK-LABEL: @insert_known_any_idx_fail2( +; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], +; CHECK-NEXT: [[Y:%.*]] = or i8 [[YY:%.*]], 15 +; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[INS]], +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %x = or <2 x i8> %xx, + %y = or i8 %yy, 15 + + %ins = insertelement <2 x i8> %x, i8 %y, i32 %idx + %r = and <2 x i8> %ins, + ret <2 x i8> %r +} + diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index 5305c78f69123..769f7661fc8dc 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -124,7 +124,6 @@ exit: ret i8 %or2 } - define i8 @test_cond_and_bothways(i8 %x) { ; CHECK-LABEL: @test_cond_and_bothways( ; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 91 @@ -181,8 +180,6 @@ exit: ret i8 %or2 } - - define i8 @test_cond_and_commuted(i8 %x, i1 %c1, i1 %c2) { ; CHECK-LABEL: @test_cond_and_commuted( ; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 3 @@ -343,7 +340,7 @@ exit: ret i8 %or2 } -define i32 @test_icmp_trunc1(i32 %x){ +define i32 @test_icmp_trunc1(i32 %x) { ; CHECK-LABEL: @test_icmp_trunc1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Y:%.*]] = trunc i32 [[X:%.*]] to i16 @@ -365,7 +362,7 @@ else: ret i32 0 } -define i32 @test_icmp_trunc_assume(i32 %x){ +define i32 @test_icmp_trunc_assume(i32 %x) { ; CHECK-LABEL: @test_icmp_trunc_assume( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Y:%.*]] = trunc i32 [[X:%.*]] to i16 @@ -532,7 +529,106 @@ if.else: ret i1 %other } +define i8 @and_eq_bits_must_be_set(i8 %x, i8 %y) { +; CHECK-LABEL: @and_eq_bits_must_be_set( +; CHECK-NEXT: [[XY:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 123 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i8 1 +; + %xy = and i8 %x, %y + %cmp = icmp eq i8 %xy, 123 + call void @llvm.assume(i1 %cmp) + %r = and i8 %x, 1 + ret i8 %r +} + +define i8 @and_eq_bits_must_be_set2(i8 %x, i8 %y) { +; CHECK-LABEL: @and_eq_bits_must_be_set2( +; CHECK-NEXT: [[XY:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 123 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i8 11 +; + %xy = and i8 %x, %y + %cmp = icmp eq i8 %xy, 123 + call void @llvm.assume(i1 %cmp) + %r = and i8 %y, 11 + ret i8 %r +} + +define i8 @and_eq_bits_must_be_set2_partial_fail(i8 %x, i8 %y) { +; CHECK-LABEL: @and_eq_bits_must_be_set2_partial_fail( +; CHECK-NEXT: [[XY:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 123 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 111 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = and i8 %x, %y + %cmp = icmp eq i8 %xy, 123 + call void @llvm.assume(i1 %cmp) + %r = and i8 %y, 111 + ret i8 %r +} + +define i8 @or_eq_bits_must_be_unset(i8 %x, i8 %y) { +; CHECK-LABEL: @or_eq_bits_must_be_unset( +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i8 0 +; + %xy = or i8 %x, %y + %cmp = icmp eq i8 %xy, 124 + call void @llvm.assume(i1 %cmp) + %r = and i8 %x, 3 + ret i8 %r +} + +define i8 @or_eq_bits_must_be_unset2(i8 %x, i8 %y) { +; CHECK-LABEL: @or_eq_bits_must_be_unset2( +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret i8 0 +; + %xy = or i8 %x, %y + %cmp = icmp eq i8 %xy, 124 + call void @llvm.assume(i1 %cmp) + %r = and i8 %y, 1 + ret i8 %r +} +define i8 @or_eq_bits_must_be_unset2_partial_fail(i8 %x, i8 %y) { +; CHECK-LABEL: @or_eq_bits_must_be_unset2_partial_fail( +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 4 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = or i8 %x, %y + %cmp = icmp eq i8 %xy, 124 + call void @llvm.assume(i1 %cmp) + %r = and i8 %y, 7 + ret i8 %r +} + +define i8 @or_ne_bits_must_be_unset2_fail(i8 %x, i8 %y) { +; CHECK-LABEL: @or_ne_bits_must_be_unset2_fail( +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[XY]], 124 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = or i8 %x, %y + %cmp = icmp ne i8 %xy, 124 + call void @llvm.assume(i1 %cmp) + %r = and i8 %x, 3 + ret i8 %r +} declare void @use(i1) declare void @sink(i8) diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index f0ea09c088474..c850b87bb2dd4 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -1303,3 +1303,221 @@ define i1 @logical_or_and_with_common_not_op_variant5(i1 %a) { %or = select i1 %a, i1 true, i1 %and ret i1 %or } + +define i1 @reduce_logical_and1(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_logical_and1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CMP1]], [[CMP]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[A:%.*]], i1 [[TMP0]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = select i1 %a, i1 %cmp1, i1 false + %and2 = select i1 %and1, i1 %cmp, i1 false + ret i1 %and2 +} + +define i1 @reduce_logical_and2(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @reduce_logical_and2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[C:%.*]], true +; CHECK-NEXT: [[B:%.*]] = and i1 [[TMP0]], [[B1:%.*]] +; CHECK-NEXT: [[AND3:%.*]] = select i1 [[AND2:%.*]], i1 [[B]], i1 false +; CHECK-NEXT: ret i1 [[AND3]] +; +bb: + %or = xor i1 %c, %b + %and1 = select i1 %a, i1 %or, i1 false + %and2 = select i1 %and1, i1 %b, i1 false + ret i1 %and2 +} + +define i1 @reduce_logical_and3(i1 %a, i32 %b, i32 noundef %c) { +; CHECK-LABEL: @reduce_logical_and3( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[A:%.*]], i1 [[TMP0]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = select i1 %a, i1 %cmp, i1 false + %and2 = select i1 %and1, i1 %cmp1, i1 false + ret i1 %and2 +} + +define i1 @reduce_logical_or1(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_logical_or1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[CMP1]], [[CMP]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[TMP0]] +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = select i1 %a, i1 true, i1 %cmp1 + %and2 = select i1 %and1, i1 true, i1 %cmp + ret i1 %and2 +} + +define i1 @reduce_logical_or2(i1 %a, i1 %b, i1 %c) { +; CHECK-LABEL: @reduce_logical_or2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[B:%.*]] = or i1 [[C:%.*]], [[B1:%.*]] +; CHECK-NEXT: [[AND3:%.*]] = select i1 [[AND2:%.*]], i1 true, i1 [[B]] +; CHECK-NEXT: ret i1 [[AND3]] +; +bb: + %or = xor i1 %c, %b + %and1 = select i1 %a, i1 true, i1 %or + %and2 = select i1 %and1, i1 true, i1 %b + ret i1 %and2 +} + +define i1 @reduce_logical_or3(i1 %a, i32 %b, i32 noundef %c) { +; CHECK-LABEL: @reduce_logical_or3( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[TMP0]] +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = select i1 %a, i1 true, i1 %cmp + %and2 = select i1 %and1, i1 true, i1 %cmp1 + ret i1 %and2 +} + +define i1 @reduce_logical_and_fail1(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_logical_and_fail1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[A:%.*]], i1 [[CMP]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 [[CMP1]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = select i1 %a, i1 %cmp, i1 false + %and2 = select i1 %and1, i1 %cmp1, i1 false + ret i1 %and2 +} + +define i1 @reduce_logical_and_fail2(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_logical_and_fail2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], 7 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[A:%.*]], i1 [[CMP]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 [[CMP1]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, 7 + %and1 = select i1 %a, i1 %cmp, i1 false + %and2 = select i1 %and1, i1 %cmp1, i1 false + ret i1 %and2 +} + +define i1 @reduce_logical_or_fail1(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_logical_or_fail1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[CMP]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 true, i1 [[CMP1]] +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = select i1 %a, i1 true, i1 %cmp + %and2 = select i1 %and1, i1 true, i1 %cmp1 + ret i1 %and2 +} + +define i1 @reduce_logical_or_fail2(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_logical_or_fail2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], 7 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[CMP]] +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 true, i1 [[CMP1]] +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, 7 + %and1 = select i1 %a, i1 true, i1 %cmp + %and2 = select i1 %and1, i1 true, i1 %cmp1 + ret i1 %and2 +} + +define i1 @reduce_logical_and_multiuse(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_logical_and_multiuse( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[A:%.*]], i1 [[CMP1]], i1 false +; CHECK-NEXT: call void @use1(i1 [[AND1]]) +; CHECK-NEXT: [[AND2:%.*]] = select i1 [[AND1]], i1 [[CMP]], i1 false +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = select i1 %a, i1 %cmp1, i1 false + call void @use1(i1 %and1) + %and2 = select i1 %and1, i1 %cmp, i1 false + ret i1 %and2 +} + +define i1 @reduce_bitwise_and1(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_bitwise_and1( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[AND1:%.*]] = or i1 [[CMP1]], [[A:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i1 [[AND1]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = or i1 %a, %cmp1 + %and2 = select i1 %and1, i1 %cmp, i1 false + ret i1 %and2 +} + +define i1 @reduce_bitwise_and2(i1 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @reduce_bitwise_and2( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[B:%.*]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[A:%.*]], i1 [[CMP1]], i1 false +; CHECK-NEXT: [[AND2:%.*]] = or i1 [[AND1]], [[CMP]] +; CHECK-NEXT: ret i1 [[AND2]] +; +bb: + %cmp = icmp slt i32 %b, 6 + %cmp1 = icmp sgt i32 %c, %b + %and1 = select i1 %a, i1 %cmp1, i1 false + %and2 = or i1 %and1, %cmp + ret i1 %and2 +} diff --git a/llvm/test/Transforms/InstCombine/shl-demand.ll b/llvm/test/Transforms/InstCombine/shl-demand.ll index 85752890b4b80..26175ebbe1535 100644 --- a/llvm/test/Transforms/InstCombine/shl-demand.ll +++ b/llvm/test/Transforms/InstCombine/shl-demand.ll @@ -1,6 +1,124 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +; If we only want bits that already match the signbit then we don't need to shift. +; https://alive2.llvm.org/ce/z/WJBPVt +define i32 @src_srem_shl_demand_max_signbit(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_max_signbit( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 2 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SREM]], -2147483648 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 2 ; srem = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSD + %shl = shl i32 %srem, 30 ; shl = SD000000000000000000000000000000 + %mask = and i32 %shl, -2147483648 ; mask = 10000000000000000000000000000000 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_min_signbit(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_min_signbit( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 1073741823 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SREM]], -2147483648 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 1073741823 ; srem = SSDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + %shl = shl i32 %srem, 1 ; shl = SDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD0 + %mask = and i32 %shl, -2147483648 ; mask = 10000000000000000000000000000000 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_max_mask(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_max_mask( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 2 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SREM]], -4 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 2 ; srem = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSD + %shl = shl i32 %srem, 1 ; shl = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSD0 + %mask = and i32 %shl, -4 ; mask = 11111111111111111111111111111100 + ret i32 %mask +} + +; Negative test - mask demands non-signbit from shift source +define i32 @src_srem_shl_demand_max_signbit_mask_hit_first_demand(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_max_signbit_mask_hit_first_demand( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 4 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SREM]], 29 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SHL]], -1073741824 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 4 ; srem = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSDD + %shl = shl i32 %srem, 29 ; shl = SDD00000000000000000000000000000 + %mask = and i32 %shl, -1073741824 ; mask = 11000000000000000000000000000000 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_min_signbit_mask_hit_last_demand(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_min_signbit_mask_hit_last_demand( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 536870912 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SREM]], 1 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SHL]], -1073741822 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 536870912 ; srem = SSSDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + %shl = shl i32 %srem, 1 ; shl = SSDDDDDDDDDDDDDDDDDDDDDDDDDDDDD0 + %mask = and i32 %shl, -1073741822 ; mask = 11000000000000000000000000000010 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_eliminate_signbit(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_eliminate_signbit( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 1073741824 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SREM]], 1 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SHL]], 2 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 1073741824 ; srem = SSDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD + %shl = shl i32 %srem, 1 ; shl = DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD0 + %mask = and i32 %shl, 2 ; mask = 00000000000000000000000000000010 + ret i32 %mask +} + +define i32 @src_srem_shl_demand_max_mask_hit_demand(i32 %a0) { +; CHECK-LABEL: @src_srem_shl_demand_max_mask_hit_demand( +; CHECK-NEXT: [[SREM:%.*]] = srem i32 [[A0:%.*]], 4 +; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SREM]], 1 +; CHECK-NEXT: [[MASK:%.*]] = and i32 [[SHL]], -4 +; CHECK-NEXT: ret i32 [[MASK]] +; + %srem = srem i32 %a0, 4 ; srem = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSDD + %shl= shl i32 %srem, 1 ; shl = SSSSSSSSSSSSSSSSSSSSSSSSSSSSSDD0 + %mask = and i32 %shl, -4 ; mask = 11111111111111111111111111111100 + ret i32 %mask +} + +define <2 x i32> @src_srem_shl_mask_vector(<2 x i32> %a0) { +; CHECK-LABEL: @src_srem_shl_mask_vector( +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl nsw <2 x i32> [[SREM]], +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], +; CHECK-NEXT: ret <2 x i32> [[MASK]] +; + %srem = srem <2 x i32> %a0, + %shl = shl <2 x i32> %srem, + %mask = and <2 x i32> %shl, + ret <2 x i32> %mask +} + +define <2 x i32> @src_srem_shl_mask_vector_nonconstant(<2 x i32> %a0, <2 x i32> %a1) { +; CHECK-LABEL: @src_srem_shl_mask_vector_nonconstant( +; CHECK-NEXT: [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[SREM]], [[A1:%.*]] +; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[SHL]], +; CHECK-NEXT: ret <2 x i32> [[MASK]] +; + %srem = srem <2 x i32> %a0, + %shl = shl <2 x i32> %srem, %a1 + %mask = and <2 x i32> %shl, + ret <2 x i32> %mask +} + define i16 @sext_shl_trunc_same_size(i16 %x, i32 %y) { ; CHECK-LABEL: @sext_shl_trunc_same_size( ; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[X:%.*]] to i32 diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index 661c36038a67e..a4b74aa8cc7dc 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) { ; CHECK-LABEL: @zext_or_icmp_icmp( @@ -180,11 +180,11 @@ define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) { ; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[SUB17]], 32 ; CHECK-NEXT: [[CONV18:%.*]] = ashr exact i64 [[SEXT]], 32 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[XOR]], [[CONV18]] -; CHECK-NEXT: [[CONV19:%.*]] = zext i1 [[CMP]] to i16 -; CHECK-NEXT: [[OR21:%.*]] = or i16 [[CONV19]], [[INSERT]] -; CHECK-NEXT: [[TOBOOL23_NOT:%.*]] = icmp eq i16 [[OR21]], 0 +; CHECK-NEXT: [[TRUNC44:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[INC:%.*]] = add i8 [[TRUNC44]], [[I162]] +; CHECK-NEXT: [[TOBOOL23_NOT:%.*]] = xor i1 [[CMP]], true ; CHECK-NEXT: call void @llvm.assume(i1 [[TOBOOL23_NOT]]) -; CHECK-NEXT: ret i8 [[I162]] +; CHECK-NEXT: ret i8 [[INC]] ; %b = icmp eq i32 %t0, 0 %b2 = icmp eq i16 %insert, 0 diff --git a/llvm/test/Transforms/InstSimplify/implies.ll b/llvm/test/Transforms/InstSimplify/implies.ll index b70dc90da655e..7e3cb656bce15 100644 --- a/llvm/test/Transforms/InstSimplify/implies.ll +++ b/llvm/test/Transforms/InstSimplify/implies.ll @@ -155,7 +155,13 @@ define i1 @test9(i32 %length.i, i32 %i) { define i1 @test10(i32 %length.i, i32 %x.full) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[X:%.*]] = and i32 [[X_FULL:%.*]], -65536 +; CHECK-NEXT: [[LARGE:%.*]] = or i32 [[X]], 100 +; CHECK-NEXT: [[SMALL:%.*]] = or i32 [[X]], 90 +; CHECK-NEXT: [[KNOWN:%.*]] = icmp ult i32 [[LARGE]], [[LENGTH_I:%.*]] +; CHECK-NEXT: [[TO_PROVE:%.*]] = icmp ult i32 [[SMALL]], [[LENGTH_I]] +; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[KNOWN]], [[TO_PROVE]] +; CHECK-NEXT: ret i1 [[RES]] ; %x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000 %large = or i32 %x, 100 @@ -166,6 +172,19 @@ define i1 @test10(i32 %length.i, i32 %x.full) { ret i1 %res } +define i1 @test10_with_disjoint(i32 %length.i, i32 %x.full) { +; CHECK-LABEL: @test10_with_disjoint( +; CHECK-NEXT: ret i1 true +; + %x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000 + %large = or disjoint i32 %x, 100 + %small = or disjoint i32 %x, 90 + %known = icmp ult i32 %large, %length.i + %to.prove = icmp ult i32 %small, %length.i + %res = icmp ule i1 %known, %to.prove + ret i1 %res +} + define i1 @test11(i32 %length.i, i32 %x) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[LARGE:%.*]] = or i32 [[X:%.*]], 100 @@ -216,7 +235,13 @@ define i1 @test13(i32 %length.i, i32 %x) { define i1 @test14(i32 %length.i, i32 %x.full) { ; CHECK-LABEL: @test14( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[X:%.*]] = and i32 [[X_FULL:%.*]], -61681 +; CHECK-NEXT: [[LARGE:%.*]] = or i32 [[X]], 8224 +; CHECK-NEXT: [[SMALL:%.*]] = or i32 [[X]], 4112 +; CHECK-NEXT: [[KNOWN:%.*]] = icmp ult i32 [[LARGE]], [[LENGTH_I:%.*]] +; CHECK-NEXT: [[TO_PROVE:%.*]] = icmp ult i32 [[SMALL]], [[LENGTH_I]] +; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[KNOWN]], [[TO_PROVE]] +; CHECK-NEXT: ret i1 [[RES]] ; %x = and i32 %x.full, 4294905615 ;; 4294905615 == 0xffff0f0f %large = or i32 %x, 8224 ;; == 0x2020 @@ -227,6 +252,19 @@ define i1 @test14(i32 %length.i, i32 %x.full) { ret i1 %res } +define i1 @test14_with_disjoint(i32 %length.i, i32 %x.full) { +; CHECK-LABEL: @test14_with_disjoint( +; CHECK-NEXT: ret i1 true +; + %x = and i32 %x.full, 4294905615 ;; 4294905615 == 0xffff0f0f + %large = or disjoint i32 %x, 8224 ;; == 0x2020 + %small = or disjoint i32 %x, 4112 ;; == 0x1010 + %known = icmp ult i32 %large, %length.i + %to.prove = icmp ult i32 %small, %length.i + %res = icmp ule i1 %known, %to.prove + ret i1 %res +} + define i1 @test15(i32 %length.i, i32 %x) { ; CHECK-LABEL: @test15( ; CHECK-NEXT: [[LARGE:%.*]] = add nuw i32 [[X:%.*]], 100 diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll index daa64f2e2ea7c..27ab11446b571 100644 --- a/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll +++ b/llvm/test/Transforms/LoopIdiom/AArch64/byte-compare-index.ll @@ -2090,3 +2090,99 @@ while.end: ret i32 %res } +; The optimization should be disabled when noimplicitfloat is present. +define i32 @no_implicit_float(ptr %a, ptr %b, i32 %len, i32 %extra, i32 %n) noimplicitfloat { +; CHECK-LABEL: define i32 @no_implicit_float( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; CHECK-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]] +; CHECK-NEXT: ret i32 [[RES]] +; +; LOOP-DEL-LABEL: define i32 @no_implicit_float( +; LOOP-DEL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] { +; LOOP-DEL-NEXT: entry: +; LOOP-DEL-NEXT: br label [[WHILE_COND:%.*]] +; LOOP-DEL: while.cond: +; LOOP-DEL-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; LOOP-DEL-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; LOOP-DEL-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; LOOP-DEL: while.body: +; LOOP-DEL-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; LOOP-DEL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; LOOP-DEL-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; LOOP-DEL-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; LOOP-DEL-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; LOOP-DEL: while.end: +; LOOP-DEL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; LOOP-DEL-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ] +; LOOP-DEL-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]] +; LOOP-DEL-NEXT: ret i32 [[RES]] +; +; NO-TRANSFORM-LABEL: define i32 @no_implicit_float( +; NO-TRANSFORM-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[LEN:%.*]], i32 [[EXTRA:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; NO-TRANSFORM-NEXT: entry: +; NO-TRANSFORM-NEXT: br label [[WHILE_COND:%.*]] +; NO-TRANSFORM: while.cond: +; NO-TRANSFORM-NEXT: [[LEN_ADDR:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[WHILE_BODY:%.*]] ] +; NO-TRANSFORM-NEXT: [[INC]] = add i32 [[LEN_ADDR]], 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[INC]], [[N]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; NO-TRANSFORM: while.body: +; NO-TRANSFORM-NEXT: [[IDXPROM:%.*]] = zext i32 [[INC]] to i64 +; NO-TRANSFORM-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NO-TRANSFORM-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IDXPROM]] +; NO-TRANSFORM-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; NO-TRANSFORM-NEXT: [[CMP_NOT2:%.*]] = icmp eq i8 [[TMP0]], [[TMP1]] +; NO-TRANSFORM-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_COND]], label [[WHILE_END]] +; NO-TRANSFORM: while.end: +; NO-TRANSFORM-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ], [ [[INC]], [[WHILE_COND]] ] +; NO-TRANSFORM-NEXT: [[EXTRA_PHI:%.*]] = phi i32 [ [[EXTRA]], [[WHILE_BODY]] ], [ [[EXTRA]], [[WHILE_COND]] ] +; NO-TRANSFORM-NEXT: [[RES:%.*]] = add i32 [[INC_LCSSA]], [[EXTRA_PHI]] +; NO-TRANSFORM-NEXT: ret i32 [[RES]] +; +entry: + br label %while.cond + +while.cond: + %len.addr = phi i32 [ %len, %entry ], [ %inc, %while.body ] + %inc = add i32 %len.addr, 1 + %cmp.not = icmp eq i32 %inc, %n + br i1 %cmp.not, label %while.end, label %while.body + +while.body: + %idxprom = zext i32 %inc to i64 + %arrayidx = getelementptr inbounds i8, ptr %a, i64 %idxprom + %0 = load i8, ptr %arrayidx + %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %idxprom + %1 = load i8, ptr %arrayidx2 + %cmp.not2 = icmp eq i8 %0, %1 + br i1 %cmp.not2, label %while.cond, label %while.end + +while.end: + %inc.lcssa = phi i32 [ %inc, %while.body ], [ %inc, %while.cond ] + %extra.phi = phi i32 [ %extra, %while.body ], [ %extra, %while.cond ] + %res = add i32 %inc.lcssa, %extra.phi + ret i32 %res +} + diff --git a/llvm/test/Transforms/LoopLoadElim/non-const-distance.ll b/llvm/test/Transforms/LoopLoadElim/non-const-distance.ll new file mode 100644 index 0000000000000..b97d4c23c73d1 --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/non-const-distance.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +define void @non_const_distance(i64 %start, ptr %A, i1 %c) { +; CHECK-LABEL: define void @non_const_distance( +; CHECK-SAME: i64 [[START:%.*]], ptr [[A:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i64 1, i64 0 +; CHECK-NEXT: [[SEL_NOT:%.*]] = xor i64 [[SEL]], -1 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i32, ptr [[PTR_IV]], i64 [[SEL_NOT]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[ADD_PTR]], align 4 +; CHECK-NEXT: store i32 [[L]], ptr [[PTR_IV]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1000 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %sel = select i1 %c, i64 1, i64 0 + %sel.not = xor i64 %sel, -1 + br label %loop + +loop: + %iv = phi i64 [ %start, %entry ], [ %iv.next, %loop ] + %ptr.iv = phi ptr [ %A, %entry ], [ %ptr.iv.next, %loop ] + %add.ptr = getelementptr i32, ptr %ptr.iv, i64 %sel.not + %l = load i32, ptr %add.ptr, align 4 + store i32 %l, ptr %ptr.iv, align 4 + %iv.next = add i64 %iv, 1 + %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 4 + %ec = icmp eq i64 %iv, 1000 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll new file mode 100644 index 0000000000000..931ab4f77618f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p loop-vectorize -mtriple=arm64-apple-macosx -S %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) { +; CHECK-LABEL: define i32 @multi_exit_iv_uniform( +; CHECK-SAME: i32 [[A:%.*]], i64 [[N:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 2147483648) +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[UMIN]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[BROADCAST_SPLAT]] to <2 x i64> +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 2 +; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP10]] = add <2 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP11]] = add <2 x i32> [[VEC_PHI1]], +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i32> [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]]) +; CHECK-NEXT: br label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_LATCH]] ] +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[CONV7:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT: store i64 [[CONV7]], ptr [[ARRAYIDX_I]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], -1 +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i64 [[IV]], 2147483648 +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit.1: +; CHECK-NEXT: ret i32 10 +; CHECK: exit.2: +; CHECK-NEXT: [[IV_2_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_2_NEXT]], [[LOOP_LATCH]] ] +; CHECK-NEXT: ret i32 [[IV_2_NEXT_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop.latch ] + %c.1 = icmp eq i64 %iv, %N + br i1 %c.1, label %exit.1, label %loop.latch + +loop.latch: + %arrayidx.i = getelementptr i64, ptr %dst, i64 %iv + %conv7 = zext i32 %a to i64 + store i64 %conv7, ptr %arrayidx.i, align 8 + %iv.next = add i64 %iv, 1 + %iv.2.next = add i32 %iv.2, -1 + %c.2 = icmp eq i64 %iv, 2147483648 + br i1 %c.2, label %exit.2, label %loop.header + +exit.1: + ret i32 10 + +exit.2: + ret i32 %iv.2.next +} + +define i64 @pointer_induction_only(ptr %start, ptr %end) { +; CHECK-LABEL: define i64 @pointer_induction_only( +; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[START2]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-NEXT: [[TMP9]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[TMP8]], <2 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[IV]], align 1 +; CHECK-NEXT: [[RECUR_NEXT]] = zext i32 [[L]] to i64 +; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds i8, ptr [[IV]], i64 4 +; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[IV]], [[END]] +; CHECK-NEXT: br i1 [[C]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[RECUR_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[RECUR_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] + %recur = phi i64 [ 0, %entry ], [ %recur.next, %loop ] + %l = load i32, ptr %iv, align 1 + %recur.next = zext i32 %l to i64 + %iv.next = getelementptr inbounds i8, ptr %iv, i64 4 + %c = icmp eq ptr %iv, %end + br i1 %c, label %exit, label %loop + +exit: + ret i64 %recur +} + + +define i64 @int_and_pointer_iv(ptr %start, i32 %N) { +; CHECK-LABEL: define i64 @int_and_pointer_iv( +; CHECK-SAME: ptr [[START:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 4000 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64> +; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4 +; CHECK-NEXT: [[RECUR_NEXT]] = zext i32 [[L]] to i64 +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[RECUR_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[RECUR_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ] + %recur = phi i64 [ 0, %entry ], [ %recur.next, %loop ] + %l = load i32, ptr %ptr.iv, align 4 + %recur.next = zext i32 %l to i64 + %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 4 + %iv.next = add i32 %iv, 1 + %tobool.not = icmp eq i32 %iv.next, 1000 + br i1 %tobool.not, label %exit, label %loop + +exit: + ret i64 %recur +} + +define void @wide_truncated_iv(ptr %dst) { +; CHECK-LABEL: define void @wide_truncated_iv( +; CHECK-SAME: ptr [[DST:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 8 +; CHECK-NEXT: store <8 x i8> [[VEC_IND]], ptr [[TMP4]], align 1 +; CHECK-NEXT: store <8 x i8> [[STEP_ADD]], ptr [[TMP5]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[STEP_ADD]], +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 192 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 192, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i8 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: store i8 [[TRUNC_IV]], ptr [[GEP]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], 200 +; CHECK-NEXT: br i1 [[C]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %trunc.iv = trunc i64 %iv to i8 + %gep = getelementptr i8, ptr %dst, i64 %iv + store i8 %trunc.iv, ptr %gep, align 1 + %iv.next = add i64 %iv, 1 + %c = icmp eq i64 %iv, 200 + br i1 %c, label %exit, label %loop + +exit: + ret void +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll index 37ac570aa06c6..9969f881063c3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll @@ -5,8 +5,8 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK-COST: Checking a loop in 'fixed_width' -; CHECK-COST: Found an estimated cost of 12 for VF 2 For instruction: store i32 2, ptr %arrayidx1, align 4 -; CHECK-COST: Found an estimated cost of 24 for VF 4 For instruction: store i32 2, ptr %arrayidx1, align 4 +; CHECK-COST: Found an estimated cost of 14 for VF 2 For instruction: store i32 2, ptr %arrayidx1, align 4 +; CHECK-COST: Found an estimated cost of 28 for VF 4 For instruction: store i32 2, ptr %arrayidx1, align 4 ; CHECK-COST: Selecting VF: 1. ; We should decide this loop is not worth vectorising using fixed width vectors diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll index 1c26ee8479e57..2470bca1e17b9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll @@ -8,39 +8,41 @@ target triple = "aarch64-linux-gnu" define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq [[VEC_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select [[VEC_ICMP]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) +; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne [[VEC_SEL]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_ICMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 ; CHECK-VF4IC4-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC4: vector.body: -; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq {{.*}}, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq {{.*}}, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq {{.*}}, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq {{.*}}, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL1]] = select [[VEC_ICMP1]], [[VEC_PHI1]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL2]] = select [[VEC_ICMP2]], [[VEC_PHI2]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL3]] = select [[VEC_ICMP3]], [[VEC_PHI3]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL4]] = select [[VEC_ICMP4]], [[VEC_PHI4]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor [[VEC_ICMP1]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor [[VEC_ICMP2]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor [[VEC_ICMP3]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor [[VEC_ICMP4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or [[VEC_PHI1]], [[NOT1]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or [[VEC_PHI2]], [[NOT2]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or [[VEC_PHI3]], [[NOT3]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or [[VEC_PHI4]], [[NOT4]] ; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne [[VEC_SEL1]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = select [[VEC_ICMP5]], [[VEC_SEL1]], [[VEC_SEL2]] -; CHECK-VF4IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne [[VEC_SEL5]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = select [[VEC_ICMP6]], [[VEC_SEL5]], [[VEC_SEL3]] -; CHECK-VF4IC4-NEXT: [[VEC_ICMP7:%.*]] = icmp ne [[VEC_SEL6]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = select [[VEC_ICMP7]], [[VEC_SEL6]], [[VEC_SEL4]] -; CHECK-VF4IC4-NEXT: [[FIN_ICMP:%.*]] = icmp ne [[VEC_SEL7]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_ICMP]]) -; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC4-NEXT: [[OR1:%.*]] = or [[VEC_SEL2]], [[VEC_SEL1]] +; CHECK-VF4IC4-NEXT: [[OR2:%.*]] = or [[VEC_SEL3]], [[OR1]] +; CHECK-VF4IC4-NEXT: [[OR3:%.*]] = or [[VEC_SEL4]], [[OR2]] +; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[OR3]]) +; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 entry: br label %for.body @@ -62,21 +64,18 @@ exit: ; preds = %for.body define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_i32_from_icmp ; CHECK-VF4IC1: vector.ph: -; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement poison, i32 %a, i64 0 -; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector [[TMP1]], poison, zeroinitializer -; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement poison, i32 %b, i64 0 -; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector [[TMP2]], poison, zeroinitializer +; CHECK-VF4IC1-NOT: shufflevector +; CHECK-VF4IC1-NOT: shufflevector ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq [[VEC_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select [[VEC_ICMP]], [[VEC_PHI]], [[SPLAT_OF_B]] +; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement poison, i32 %a, i64 0 -; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector [[FIN_INS]], poison, zeroinitializer -; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne [[VEC_SEL]], [[FIN_SPLAT]] -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_CMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 %b, i32 %a ; CHECK-VF4IC4-LABEL: @select_i32_from_icmp ; CHECK-VF4IC4: vector.body: @@ -101,14 +100,15 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq [[VEC_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select [[VEC_ICMP]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor [[VEC_ICMP]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne [[VEC_SEL]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_ICMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 2 ; CHECK-VF4IC4-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC4: vector.body: @@ -156,17 +156,17 @@ exit: ; preds = %for.body define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load ; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt [[VEC_LOAD]], shufflevector ( insertelement ( poison, i32 35, i64 0), poison, zeroinitializer) ; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, [[MASK]], poison) ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq [[MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = select [[VEC_ICMP]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), [[VEC_PHI]] -; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select [[MASK]], [[VEC_SEL_TMP]], [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = or [[VEC_PHI]], [[VEC_ICMP]] +; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select [[MASK]], [[VEC_SEL_TMP]], [[VEC_PHI]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne [[VEC_SEL]], zeroinitializer -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[FIN_ICMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 0 +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 0 ; CHECK-VF4IC4-LABEL: @pred_select_const_i32_from_icmp ; CHECK-VF4IC4: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll index 9dcc751db7cf0..c544d2a92e639 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-reductions.ll @@ -304,8 +304,6 @@ define i32 @cond_xor_reduction(ptr noalias %a, ptr noalias %cond, i64 %N) #0 { ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP14]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP16]], i32 4, [[TMP15]], poison) ; CHECK-NEXT: [[TMP17:%.*]] = xor [[VEC_PHI]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: [[TMP18:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP18]], zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[TMP17]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP20]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP22]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll index dd1495626eb98..d9cc630482fc8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-function-calls.ll @@ -2925,11 +2925,12 @@ define void @modf_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; ; ARMPL-SVE-LABEL: define void @modf_f64 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; ARMPL-SVE: [[TMP23:%.*]] = call @armpl_svmodf_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @modf_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP5:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; ARMPL-SVE-NOPRED: [[TMP17:%.*]] = call @armpl_svmodf_f64_x( [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; ARMPL-SVE-NOPRED: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR64:[0-9]+]] ; entry: br label %for.body @@ -2970,11 +2971,12 @@ define void @modf_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; ; ARMPL-SVE-LABEL: define void @modf_f32 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] +; ARMPL-SVE: [[TMP23:%.*]] = call @armpl_svmodf_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @modf_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: [[TMP5:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) +; ARMPL-SVE-NOPRED: [[TMP17:%.*]] = call @armpl_svmodf_f32_x( [[WIDE_LOAD:%.*]], ptr [[TMP16:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; ARMPL-SVE-NOPRED: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR65:[0-9]+]] ; entry: br label %for.body @@ -3023,7 +3025,7 @@ define void @nextafter_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @nextafter_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svnextafter_f64_x( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) #[[ATTR64:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @nextafter(double [[IN:%.*]], double [[IN]]) #[[ATTR66:[0-9]+]] ; entry: br label %for.body @@ -3068,7 +3070,7 @@ define void @nextafter_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @nextafter_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svnextafter_f32_x( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) #[[ATTR65:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @nextafterf(float [[IN:%.*]], float [[IN]]) #[[ATTR67:[0-9]+]] ; entry: br label %for.body @@ -3116,7 +3118,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @pow_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svpow_f64_x( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @pow(double [[IN:%.*]], double [[IN]]) #[[ATTR66:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @pow(double [[IN:%.*]], double [[IN]]) #[[ATTR68:[0-9]+]] ; entry: br label %for.body @@ -3161,7 +3163,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @pow_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svpow_f32_x( [[WIDE_LOAD:%.*]], [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @powf(float [[IN:%.*]], float [[IN]]) #[[ATTR67:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @powf(float [[IN:%.*]], float [[IN]]) #[[ATTR69:[0-9]+]] ; entry: br label %for.body @@ -3209,7 +3211,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @sin_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svsin_f64_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sin(double [[IN:%.*]]) #[[ATTR68:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sin(double [[IN:%.*]]) #[[ATTR70:[0-9]+]] ; entry: br label %for.body @@ -3254,7 +3256,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @sin_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svsin_f32_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinf(float [[IN:%.*]]) #[[ATTR69:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinf(float [[IN:%.*]]) #[[ATTR71:[0-9]+]] ; entry: br label %for.body @@ -3297,11 +3299,12 @@ define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; ; ARMPL-SVE-LABEL: define void @sincos_f64 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR6:[0-9]+]] +; ARMPL-SVE: call void @armpl_svsincos_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @sincos_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; ARMPL-SVE-NOPRED: call void @armpl_svsincos_f64_x( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; ARMPL-SVE-NOPRED: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR72:[0-9]+]] ; entry: br label %for.body @@ -3341,11 +3344,12 @@ define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; ; ARMPL-SVE-LABEL: define void @sincos_f32 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR7:[0-9]+]] +; ARMPL-SVE: call void @armpl_svsincos_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @sincos_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; ARMPL-SVE-NOPRED: call void @armpl_svsincos_f32_x( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; ARMPL-SVE-NOPRED: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR73:[0-9]+]] ; entry: br label %for.body @@ -3388,11 +3392,12 @@ define void @sincospi_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; ; ARMPL-SVE-LABEL: define void @sincospi_f64 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR8:[0-9]+]] +; ARMPL-SVE: call void @armpl_svsincospi_f64_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @sincospi_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: call void @armpl_vsincospiq_f64(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; ARMPL-SVE-NOPRED: call void @armpl_svsincospi_f64_x( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; ARMPL-SVE-NOPRED: call void @sincospi(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR74:[0-9]+]] ; entry: br label %for.body @@ -3432,11 +3437,12 @@ define void @sincospi_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; ; ARMPL-SVE-LABEL: define void @sincospi_f32 ; ARMPL-SVE-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR9:[0-9]+]] +; ARMPL-SVE: call void @armpl_svsincospi_f32_x( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP23:%.*]], ptr [[TMP24:%.*]], [[ACTIVE_LANE_MASK:%.*]]) ; ; ARMPL-SVE-NOPRED-LABEL: define void @sincospi_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0]] { -; ARMPL-SVE-NOPRED: call void @armpl_vsincospiq_f32(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; ARMPL-SVE-NOPRED: call void @armpl_svsincospi_f32_x( [[WIDE_LOAD:%.*]], ptr [[TMP17:%.*]], ptr [[TMP18:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; ARMPL-SVE-NOPRED: call void @sincospif(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR75:[0-9]+]] ; entry: br label %for.body @@ -3484,7 +3490,7 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @sinh_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svsinh_f64_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sinh(double [[IN:%.*]]) #[[ATTR70:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sinh(double [[IN:%.*]]) #[[ATTR76:[0-9]+]] ; entry: br label %for.body @@ -3529,7 +3535,7 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @sinh_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svsinh_f32_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinhf(float [[IN:%.*]]) #[[ATTR71:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinhf(float [[IN:%.*]]) #[[ATTR77:[0-9]+]] ; entry: br label %for.body @@ -3577,7 +3583,7 @@ define void @sinpi_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @sinpi_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svsinpi_f64_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) #[[ATTR72:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sinpi(double [[IN:%.*]]) #[[ATTR78:[0-9]+]] ; entry: br label %for.body @@ -3622,7 +3628,7 @@ define void @sinpi_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @sinpi_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svsinpi_f32_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) #[[ATTR73:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sinpif(float [[IN:%.*]]) #[[ATTR79:[0-9]+]] ; entry: br label %for.body @@ -3670,7 +3676,7 @@ define void @sqrt_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @sqrt_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svsqrt_f64_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) #[[ATTR74:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @sqrt(double [[IN:%.*]]) #[[ATTR80:[0-9]+]] ; entry: br label %for.body @@ -3715,7 +3721,7 @@ define void @sqrt_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @sqrt_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svsqrt_f32_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) #[[ATTR75:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @sqrtf(float [[IN:%.*]]) #[[ATTR81:[0-9]+]] ; entry: br label %for.body @@ -3763,7 +3769,7 @@ define void @tan_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @tan_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svtan_f64_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tan(double [[IN:%.*]]) #[[ATTR76:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tan(double [[IN:%.*]]) #[[ATTR82:[0-9]+]] ; entry: br label %for.body @@ -3808,7 +3814,7 @@ define void @tan_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @tan_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svtan_f32_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tanf(float [[IN:%.*]]) #[[ATTR77:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tanf(float [[IN:%.*]]) #[[ATTR83:[0-9]+]] ; entry: br label %for.body @@ -3856,7 +3862,7 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @tanh_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svtanh_f64_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tanh(double [[IN:%.*]]) #[[ATTR78:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tanh(double [[IN:%.*]]) #[[ATTR84:[0-9]+]] ; entry: br label %for.body @@ -3901,7 +3907,7 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @tanh_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svtanh_f32_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tanhf(float [[IN:%.*]]) #[[ATTR79:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tanhf(float [[IN:%.*]]) #[[ATTR85:[0-9]+]] ; entry: br label %for.body @@ -3949,7 +3955,7 @@ define void @tgamma_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @tgamma_f64 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svtgamma_f64_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) #[[ATTR80:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call double @tgamma(double [[IN:%.*]]) #[[ATTR86:[0-9]+]] ; entry: br label %for.body @@ -3994,7 +4000,7 @@ define void @tgamma_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { ; ARMPL-SVE-NOPRED-LABEL: define void @tgamma_f32 ; ARMPL-SVE-NOPRED-SAME: (ptr noalias [[IN_PTR:%.*]], ptr noalias [[OUT_PTR:%.*]]) #[[ATTR0]] { ; ARMPL-SVE-NOPRED: [[TMP9:%.*]] = call @armpl_svtgamma_f32_x( [[WIDE_LOAD:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) #[[ATTR81:[0-9]+]] +; ARMPL-SVE-NOPRED: [[CALL:%.*]] = tail call float @tgammaf(float [[IN:%.*]]) #[[ATTR87:[0-9]+]] ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-force-tail-with-evl.ll new file mode 100644 index 0000000000000..2ce2a45a811ab --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-force-tail-with-evl.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -S < %s | FileCheck %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -S < %s | FileCheck %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll new file mode 100644 index 0000000000000..bd52c2a8f0645 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll @@ -0,0 +1,117 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -disable-output < %s 2>&1 | FileCheck %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; CHECK-LABEL: VPlan 'Initial VPlan for VF={2,4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF * UF +; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<%2> = backedge-taken count +; CHECK-NEXT: Live-in ir<%N> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8> +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> +; CHECK-NEXT: EMIT vp<%4> = icmp ule ir<%iv>, vp<%2> +; CHECK-NEXT: Successor(s): pred.store +; CHECK-EMPTY: +; CHECK-NEXT: pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%4> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: REPLICATE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%5> +; CHECK-NEXT: REPLICATE ir<%0> = load ir<%arrayidx> +; CHECK-NEXT: REPLICATE ir<%arrayidx2> = getelementptr inbounds ir<%c>, vp<%5> +; CHECK-NEXT: REPLICATE ir<%1> = load ir<%arrayidx2> +; CHECK-NEXT: REPLICATE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%5> +; CHECK-NEXT: REPLICATE ir<%add> = add nsw ir<%1>, ir<%0> +; CHECK-NEXT: REPLICATE store ir<%add>, ir<%arrayidx4> +; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%6> = ir<%0> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%7> = ir<%1> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): for.body.2 +; CHECK-EMPTY: +; CHECK-NEXT: for.body.2: +; CHECK-NEXT: EMIT vp<%8> = add vp<%3>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%1> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @safe_dep(ptr %p) { +; CHECK-LABEL: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF * UF +; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in ir<512> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6> +; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> +; CHECK-NEXT: CLONE ir<%a1> = getelementptr ir<%p>, vp<%3> +; CHECK-NEXT: vp<%4> = vector-pointer ir<%a1> +; CHECK-NEXT: WIDEN ir<%v> = load vp<%4> +; CHECK-NEXT: CLONE ir<%offset> = add vp<%3>, ir<100> +; CHECK-NEXT: CLONE ir<%a2> = getelementptr ir<%p>, ir<%offset> +; CHECK-NEXT: vp<%5> = vector-pointer ir<%a2> +; CHECK-NEXT: WIDEN store vp<%5>, ir<%v> +; CHECK-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%6>, vp<%1> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %a1 = getelementptr i64, ptr %p, i64 %iv + %v = load i64, ptr %a1, align 32 + %offset = add i64 %iv, 100 + %a2 = getelementptr i64, ptr %p, i64 %offset + store i64 %v, ptr %a2, align 32 + %iv.next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv, 511 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index dcd78aa7f1e3d..7ca1b5395dd01 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -449,7 +449,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -502,8 +501,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> ; FIXED-NEXT: [[TMP10:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP8]] ; FIXED-NEXT: [[TMP11:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP7]], ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP10]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 @@ -583,7 +580,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -636,8 +632,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> ; FIXED-NEXT: [[TMP10:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP8]] ; FIXED-NEXT: [[TMP11:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP7]], ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP10]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 @@ -714,7 +708,6 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP10:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -763,8 +756,6 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], ; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], ; FIXED-NEXT: [[TMP9:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP9]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 @@ -841,7 +832,6 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP10:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP10]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -890,8 +880,6 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], ; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], ; FIXED-NEXT: [[TMP9:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], ; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP9]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 @@ -969,7 +957,6 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP9]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] @@ -1020,8 +1007,6 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP9:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> , <32 x i8> ; FIXED-NEXT: [[TMP10:%.*]] = sdiv <32 x i8> [[WIDE_LOAD]], [[TMP8]] ; FIXED-NEXT: [[TMP11:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <32 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <32 x i1> [[TMP7]], ; FIXED-NEXT: [[PREDPHI:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP10]], <32 x i8> [[WIDE_LOAD]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> [[TMP11]], <32 x i8> [[WIDE_LOAD1]] ; FIXED-NEXT: store <32 x i8> [[PREDPHI]], ptr [[TMP4]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 57e1dc9051f4d..b876e9d2c1a5c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple riscv64-linux-gnu -mattr=+v,+d -passes=loop-vectorize < %s -S -o - | FileCheck %s -check-prefix=OUTLOOP ; RUN: opt -mtriple riscv64-linux-gnu -mattr=+v,+d -passes=loop-vectorize -prefer-inloop-reductions < %s -S -o - | FileCheck %s -check-prefix=INLOOP - +; RUN: opt -passes=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64" +; FIXME: inloop reductions are not supported yet with predicated vectorization. + define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-LABEL: @add_i16_i32( ; OUTLOOP-NEXT: entry: @@ -115,6 +117,70 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; INLOOP-NEXT: ret i32 [[R_0_LCSSA]] ; +; IF-EVL-LABEL: @add_i16_i32( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; IF-EVL-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; IF-EVL: for.body.preheader: +; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 +; IF-EVL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP4]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1 +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[INDEX]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i32() +; IF-EVL-NEXT: [[TMP9:%.*]] = add zeroinitializer, [[TMP8]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP9]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i16.p0(ptr [[TMP12]], i32 2, [[TMP10]], poison) +; IF-EVL-NEXT: [[TMP13:%.*]] = sext [[WIDE_MASKED_LOAD]] to +; IF-EVL-NEXT: [[TMP14]] = add [[VEC_PHI]], [[TMP13]] +; IF-EVL-NEXT: [[TMP15:%.*]] = select [[TMP10]], [[TMP14]], [[VEC_PHI]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP6]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP15]]) +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; IF-EVL-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]] +; IF-EVL-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; IF-EVL-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +; IF-EVL-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] +; IF-EVL-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup.loopexit: +; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; IF-EVL-NEXT: ret i32 [[R_0_LCSSA]] +; entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body, label %for.cond.cleanup diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 34a7987bb40ab..2b58acbfe9cc9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -12,25 +12,24 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[Y]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 ; ; SCALABLE-LABEL: @select_icmp ; SCALABLE: vector.ph: @@ -42,25 +41,24 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp slt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT2]] +; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP9]], zeroinitializer -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[Y]], i32 0 +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 ; entry: br label %for.body @@ -87,25 +85,24 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[Y]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 ; ; SCALABLE-LABEL: @select_fcmp ; SCALABLE: vector.ph: @@ -117,25 +114,24 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[X:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast olt [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT2]] +; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP9]], zeroinitializer -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[Y]], i32 0 +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 ; entry: br label %for.body @@ -163,20 +159,21 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 ; ; SCALABLE-LABEL: @select_const_i32_from_icmp ; SCALABLE: vector.ph: @@ -189,20 +186,21 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 7, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP9]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 7, i32 3 +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 ; entry: br label %for.body @@ -227,29 +225,24 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[B]], i32 [[A]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a ; ; SCALABLE-LABEL: @select_i32_from_icmp ; SCALABLE: vector.ph: @@ -259,29 +252,24 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; SCALABLE-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement poison, i32 [[A:%.*]], i64 0 -; SCALABLE-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector [[MINMAX_IDENT_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[B:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], [[BROADCAST_SPLAT]] +; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[A]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP9]], [[DOTSPLAT]] -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[B]], i32 [[A]] +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a ; entry: br label %for.body @@ -309,20 +297,21 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 ; ; SCALABLE-LABEL: @select_const_i32_from_fcmp ; SCALABLE: vector.ph: @@ -335,20 +324,21 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq [[WIDE_LOAD]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = select [[TMP8]], [[VEC_PHI]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[NOT:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP9]] = or [[VEC_PHI]], [[NOT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[TMP9]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 1, i32 2 +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP9]]) +; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 ; entry: br label %for.body @@ -401,7 +391,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 @@ -411,16 +401,15 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x i32> poison) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> , <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP9]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP8]] +; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP9]], <4 x i1> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[PREDPHI]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP12]], i32 1, i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]]) +; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP12]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 ; ; SCALABLE-LABEL: @pred_select_const_i32_from_icmp ; SCALABLE: vector.ph: @@ -433,7 +422,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 @@ -443,16 +432,15 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 ; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP10]], i32 4, [[TMP8]], poison) ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP13:%.*]] = select [[TMP12]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), [[VEC_PHI]] -; SCALABLE-NEXT: [[TMP14:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[PREDPHI]] = select [[TMP8]], [[TMP13]], [[VEC_PHI]] +; SCALABLE-NEXT: [[TMP13:%.*]] = or [[VEC_PHI]], [[TMP12]] +; SCALABLE-NEXT: [[PREDPHI]] = select [[TMP8]], [[TMP13]], [[VEC_PHI]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] ; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne [[PREDPHI]], zeroinitializer -; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[RDX_SELECT_CMP]]) -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP18]], i32 1, i32 0 +; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[PREDPHI]]) +; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP18]] +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index dcfa9bb105b62..1ce4cb928e808 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -346,7 +346,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP11]], poison) -; SCALABLE-NEXT: [[TMP12:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP11]], [[WIDE_MASKED_GATHER]], zeroinitializer ; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] ; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 @@ -395,8 +394,6 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], ; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) ; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP3]], <4 x i64> poison) -; FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP2]], -; FIXEDLEN-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], ; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[WIDE_MASKED_GATHER2]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll new file mode 100644 index 0000000000000..835ff37568817 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP + +define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %index, i64 %n) { +; IF-EVL-LABEL: @gather_scatter( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; IF-EVL-NEXT: [[TMP12:%.*]] = add [[TMP11]], zeroinitializer +; IF-EVL-NEXT: [[TMP13:%.*]] = mul [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; IF-EVL-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 +; IF-EVL-NEXT: [[TMP16:%.*]] = mul i64 1, [[TMP15]] +; IF-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; IF-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP17]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[INDEX:%.*]], [[VEC_IND]] +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i64.nxv2p0( align 8 [[TMP20]], [[TMP19]], i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], [[WIDE_MASKED_GATHER]] +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.vp.gather.nxv2f32.nxv2p0( align 4 [[TMP21]], [[TMP19]], i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], [[WIDE_MASKED_GATHER]] +; IF-EVL-NEXT: call void @llvm.vp.scatter.nxv2f32.nxv2p0( [[WIDE_MASKED_GATHER2]], align 4 [[TMP22]], [[TMP19]], i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP18]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], [[TMP10]] +; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[INDEX]], i64 [[INDVARS_IV]] +; IF-EVL-NEXT: [[TMP25:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 +; IF-EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[TMP25]] +; IF-EVL-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[TMP25]] +; IF-EVL-NEXT: store float [[TMP26]], ptr [[ARRAYIDX7]], align 4 +; IF-EVL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.end: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @gather_scatter( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[INDEX:%.*]], i64 [[INDVARS_IV]] +; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 +; NO-VP-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +; NO-VP-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: store float [[TMP1]], ptr [[ARRAYIDX7]], align 4 +; NO-VP-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; NO-VP: for.end: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx3 = getelementptr inbounds i32, ptr %index, i64 %indvars.iv + %0 = load i64, ptr %arrayidx3, align 8 + %arrayidx5 = getelementptr inbounds float, ptr %in, i64 %0 + %1 = load float, ptr %arrayidx5, align 4 + %arrayidx7 = getelementptr inbounds float, ptr %out, i64 %0 + store float %1, ptr %arrayidx7, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll new file mode 100644 index 0000000000000..0b495bc680f0c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=NO-VP %s + +; FIXME: interleaved accesses are not supported yet with predicated vectorization. +define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { +; IF-EVL-LABEL: @interleave( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP31]], 8 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP17]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 +; IF-EVL-NEXT: [[TMP32:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP32]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP12:%.*]] = add [[TMP11]], zeroinitializer +; IF-EVL-NEXT: [[TMP13:%.*]] = mul [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; IF-EVL-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 +; IF-EVL-NEXT: [[TMP37:%.*]] = mul i64 1, [[TMP15]] +; IF-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP37]], i64 0 +; IF-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] +; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 +; IF-EVL-NEXT: [[TMP38:%.*]] = add i64 [[TMP19]], 0 +; IF-EVL-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 1 +; IF-EVL-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], [[TMP39]] +; IF-EVL-NEXT: [[TMP23:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP24:%.*]] = icmp ule [[STEP_ADD]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], [[VEC_IND]], i32 0 +; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[STEP_ADD]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP25]], i32 4, [[TMP23]], poison) +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP26]], i32 4, [[TMP24]], poison) +; IF-EVL-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[VEC_IND]], i32 1 +; IF-EVL-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[STEP_ADD]], i32 1 +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP27]], i32 4, [[TMP23]], poison) +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP28]], i32 4, [[TMP24]], poison) +; IF-EVL-NEXT: [[TMP29:%.*]] = add nsw [[WIDE_MASKED_GATHER3]], [[WIDE_MASKED_GATHER]] +; IF-EVL-NEXT: [[TMP30:%.*]] = add nsw [[WIDE_MASKED_GATHER4]], [[WIDE_MASKED_GATHER2]] +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; IF-EVL-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 +; IF-EVL-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 4 +; IF-EVL-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP35]] +; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP29]], ptr [[TMP33]], i32 4, [[TMP23]]) +; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP30]], ptr [[TMP36]], i32 4, [[TMP24]]) +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 0 +; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 1 +; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @interleave( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 +; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], i64 [[TMP10]], i32 0 +; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[TMP1]], i32 0 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 +; NO-VP-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 +; NO-VP-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 +; NO-VP-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; NO-VP-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; NO-VP-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; NO-VP-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; NO-VP-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]] +; NO-VP-NEXT: [[TMP7:%.*]] = add nsw <8 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]] +; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0 +; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 8 +; NO-VP-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP12]], align 4 +; NO-VP-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP11]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; NO-VP-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP: scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 0 +; NO-VP-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 1 +; NO-VP-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds [2 x i32], ptr %b, i64 %iv, i32 0 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [2 x i32], ptr %b, i64 %iv, i32 1 + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 + +for.cond.cleanup: + ret void +} + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.interleave.count", i32 2} +!2 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll new file mode 100644 index 0000000000000..d5ad99f5cff88 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=NO-VP %s + +define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { +; IF-EVL-LABEL: @iv32( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP19:%.*]] = sub i32 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP19]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = add i32 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP13]] +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP13]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP_LOAD]], ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP12]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[TMP10]] +; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i32 [[IV_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV1]] +; IF-EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV1]] +; IF-EVL-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT1]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @iv32( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP0]], 4 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], [[TMP10]] +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP1]], 4 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP11]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP12:%.*]] = mul i32 [[TMP2]], 4 +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP3]] +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]] +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; NO-VP-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] +; NO-VP-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP: scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]] +; NO-VP-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]] +; NO-VP-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i32 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 %iv + store i32 %0, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond.not = icmp eq i32 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll new file mode 100644 index 0000000000000..203d0c977074e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP + +define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { +; IF-EVL-LABEL: @masked_loadstore( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP14:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP15:%.*]] = add zeroinitializer, [[TMP14]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP15]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP19:%.*]] = icmp ne [[VP_OP_LOAD]], zeroinitializer +; IF-EVL-NEXT: [[TMP20:%.*]] = select [[TMP16]], [[TMP19]], zeroinitializer +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP22]], [[TMP20]], i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP23:%.*]] = add [[VP_OP_LOAD]], [[VP_OP_LOAD3]] +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP23]], ptr align 4 [[TMP22]], [[TMP20]], i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP24:%.*]] = zext i32 [[TMP12]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP24]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_011]] +; IF-EVL-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP26]], 0 +; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; IF-EVL: if.then: +; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_011]] +; IF-EVL-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP26]], [[TMP27]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 +; IF-EVL-NEXT: br label [[FOR_INC]] +; IF-EVL: for.inc: +; IF-EVL-NEXT: [[INC]] = add nuw nsw i64 [[I_011]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: exit: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @masked_loadstore( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[I_011]] +; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; NO-VP-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; NO-VP: if.then: +; NO-VP-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_011]] +; NO-VP-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 +; NO-VP-NEXT: br label [[FOR_INC]] +; NO-VP: for.inc: +; NO-VP-NEXT: [[INC]] = add nuw nsw i64 [[I_011]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N:%.*]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]] +; NO-VP: exit: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %i.011 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.011 + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp ne i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %i.011 + %1 = load i32, ptr %arrayidx3, align 4 + %add = add i32 %0, %1 + store i32 %add, ptr %arrayidx3, align 4 + br label %for.inc + +for.inc: + %inc = add nuw nsw i64 %i.011, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-no-masking.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-no-masking.ll new file mode 100644 index 0000000000000..1c49fba1370e9 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-no-masking.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s + +; No need to emit predicated vector code if the vector instructions with masking are not required. +define i32 @no_masking() { +; CHECK-LABEL: @no_masking( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[BODY]] ] +; CHECK-NEXT: [[INC]] = add i32 [[P]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BODY]] +; CHECK: end: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %body + +body: + %p = phi i32 [ 1, %entry ], [ %inc, %body ] + %inc = add i32 %p, 1 + %cmp = icmp eq i32 %inc, 0 + br i1 %cmp, label %end, label %body + +end: + ret i32 0 +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll new file mode 100644 index 0000000000000..f2222e0a1f936 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP + +; FIXME: reversed loads/stores are not supported yet with predicated vectorization. +define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %ptr2) { +; IF-EVL-LABEL: @reverse_load_store( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; IF-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] +; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 +; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] +; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP9:%.*]] = add zeroinitializer, [[TMP8]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP9]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule [[VEC_IV]], shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP7]], -1 +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 0, [[TMP14]] +; IF-EVL-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP15]] +; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP16]] +; IF-EVL-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[TMP10]]) +; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP18]], i32 4, [[REVERSE]], poison) +; IF-EVL-NEXT: [[REVERSE3:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[WIDE_MASKED_LOAD]]) +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 +; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP21]] +; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP21]] +; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP22]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP23]] +; IF-EVL-NEXT: [[REVERSE4:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[TMP10]]) +; IF-EVL-NEXT: [[REVERSE5:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[REVERSE3]]) +; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[REVERSE5]], ptr [[TMP25]], i32 4, [[REVERSE4]]) +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; IF-EVL-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[STARTVAL]], [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 +; IF-EVL-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD]] +; IF-EVL-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 +; IF-EVL-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2]], i64 [[ADD]] +; IF-EVL-NEXT: store i32 [[TMP]], ptr [[GEPS]], align 4 +; IF-EVL-NEXT: [[INC]] = add i32 [[I]], 1 +; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 +; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: loopend: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @reverse_load_store( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[STARTVAL:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 +; NO-VP-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[ADD]] +; NO-VP-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 +; NO-VP-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[ADD]] +; NO-VP-NEXT: store i32 [[TMP]], ptr [[GEPS]], align 4 +; NO-VP-NEXT: [[INC]] = add i32 [[I]], 1 +; NO-VP-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 +; NO-VP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND:%.*]] +; NO-VP: loopend: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %add.phi = phi i64 [ %startval, %entry ], [ %add, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %add = add i64 %add.phi, -1 + %gepl = getelementptr inbounds i32, ptr %ptr, i64 %add + %tmp = load i32, ptr %gepl, align 4 + %geps = getelementptr inbounds i32, ptr %ptr2, i64 %add + store i32 %tmp, ptr %geps, align 4 + %inc = add i32 %i, 1 + %exitcond = icmp ne i32 %inc, 1024 + br i1 %exitcond, label %for.body, label %loopend + +loopend: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll new file mode 100644 index 0000000000000..c69bb17f698aa --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=NO-VP %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL-LABEL: @foo( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = add nsw [[VP_OP_LOAD1]], [[VP_OP_LOAD]] +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @foo( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 +; NO-VP-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD1]], [[WIDE_LOAD]] +; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; NO-VP-NEXT: store [[TMP11]], ptr [[TMP13]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP: scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll new file mode 100644 index 0000000000000..72b881bd44c76 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -0,0 +1,134 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefixes=IF-EVL,CHECK %s + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefixes=NO-VP,CHECK %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { +; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; IF-EVL-NEXT: Live-in ir<%N> = original trip-count +; IF-EVL-EMPTY: +; IF-EVL: vector.ph: +; IF-EVL-NEXT: Successor(s): vector loop +; IF-EVL-EMPTY: +; IF-EVL-NEXT: vector loop: { +; IF-EVL-NEXT: vector.body: +; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> +; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[EVL_PHI]]>, ir<%N> +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]>, ir +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]>, ir +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> +; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; IF-EVL-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]>, ir +; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> +; IF-EVL-NEXT: No successors +; IF-EVL-NEXT: } + +; NO-VP: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { +; NO-VP-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; NO-VP-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; NO-VP-NEXT: Live-in ir<%N> = original trip-count +; NO-VP-EMPTY: +; NO-VP: vector.ph: +; NO-VP-NEXT: Successor(s): vector loop +; NO-VP-EMPTY: +; NO-VP-NEXT: vector loop: { +; NO-VP-NEXT: vector.body: +; NO-VP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; NO-VP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; NO-VP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; NO-VP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> +; NO-VP-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; NO-VP-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]> +; NO-VP-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> +; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> +; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; NO-VP-NEXT: No successors +; NO-VP-NEXT: } + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @safe_dep(ptr %p) { +; CHECK: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<512> = original trip-count +; CHECK-EMPTY: +; CHECK: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr ir<%p>, vp<[[ST]]> +; CHECK-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; CHECK-NEXT: WIDEN ir<[[V:%.+]]> = load vp<[[PTR1]]> +; CHECK-NEXT: CLONE ir<[[OFFSET:.+]]> = add vp<[[ST]]>, ir<100> +; CHECK-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr ir<%p>, ir<[[OFFSET]]> +; CHECK-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; CHECK-NEXT: WIDEN store vp<[[PTR2]]>, ir<[[V]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } + +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %a1 = getelementptr i64, ptr %p, i64 %iv + %v = load i64, ptr %a1, align 32 + %offset = add i64 %iv, 100 + %a2 = getelementptr i64, ptr %p, i64 %offset + store i64 %v, ptr %a2, align 32 + %iv.next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv, 511 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 5944d9036b0a9..fcc3864a7aebe 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -579,7 +579,6 @@ define void @Bgep_inbounds_unconditionally_due_to_store(ptr noalias %B, ptr read ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr float, ptr %B, i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD2]], diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 87f525fbce17a..09fad39062a5d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -84,8 +84,6 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], ; SSE-NEXT: [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], -; SSE-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], -; SSE-NEXT: [[TMP9:%.*]] = xor <2 x i1> [[TMP7]], ; SSE-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[VEC_PHI]], [[WIDE_LOAD]] ; SSE-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[VEC_PHI1]], [[WIDE_LOAD2]] ; SSE-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP6]], <2 x double> [[TMP10]], <2 x double> [[VEC_PHI]] @@ -153,10 +151,6 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], ; AVX-NEXT: [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], ; AVX-NEXT: [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], -; AVX-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP12]], -; AVX-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP13]], -; AVX-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP14]], -; AVX-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP15]], ; AVX-NEXT: [[TMP20:%.*]] = fadd fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] ; AVX-NEXT: [[TMP21:%.*]] = fadd fast <4 x double> [[VEC_PHI1]], [[WIDE_LOAD4]] ; AVX-NEXT: [[TMP22:%.*]] = fadd fast <4 x double> [[VEC_PHI2]], [[WIDE_LOAD5]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll new file mode 100644 index 0000000000000..8ce87d0ef1710 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -0,0 +1,231 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" + +define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 { +; CHECK-LABEL: define i32 @iv_used_widened_and_truncated( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], +; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], +; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD2]] +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VEC_IND4]], <8 x ptr> [[TMP1]], i32 8, <8 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD5]], <8 x ptr> [[TMP2]], i32 8, <8 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD6]], <8 x ptr> [[TMP3]], i32 8, <8 x i1> ) +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD7]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> ) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[C]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep = getelementptr { i32, [ 8 x i32 ]}, ptr %dst, i64 %iv + %t = trunc i64 %iv to i32 + store i32 %t, ptr %gep, align 8 + %iv.next = add i64 %iv, 1 + %c = icmp eq i64 %iv, %N + br i1 %c, label %exit, label %loop + +exit: + ret i32 0 +} + +define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) { +; CHECK-LABEL: define void @multiple_truncated_ivs_with_wide_uses( +; CHECK-SAME: i1 [[C:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 130 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i64 260 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 +; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP6]], align 2, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP7]], align 2, !alias.scope [[META4]], !noalias [[META7]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 +; CHECK-NEXT: store <4 x i32> [[VEC_IND3]], ptr [[TMP10]], align 4, !alias.scope [[META7]] +; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], +; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_16:%.*]] = trunc i64 [[IV]] to i16 +; CHECK-NEXT: [[IV_32:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i16 [[IV_16]], i16 10 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i16, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: store i16 [[SEL]], ptr [[GEP_A]], align 2 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: store i32 [[IV_32]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.16 = trunc i64 %iv to i16 + %iv.32 = trunc i64 %iv to i32 + %sel = select i1 %c, i16 %iv.16, i16 10 + %gep.A = getelementptr i16, ptr %A, i64 %iv + store i16 %sel, ptr %gep.A + %gep.B = getelementptr i32, ptr %B, i64 %iv + store i32 %iv.32, ptr %gep.B + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 64 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) { +; CHECK-LABEL: define void @truncated_ivs_with_wide_and_scalar_uses( +; CHECK-SAME: i1 [[C:%.*]], ptr [[DST:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8 +; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2 +; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_16:%.*]] = trunc i64 [[IV]] to i16 +; CHECK-NEXT: [[IV_32:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[DST]], i32 [[IV_32]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C]], i16 [[IV_16]], i16 10 +; CHECK-NEXT: store i16 [[SEL]], ptr [[GEP]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.16 = trunc i64 %iv to i16 + %iv.32 = trunc i64 %iv to i32 + %gep = getelementptr i16, ptr %dst, i32 %iv.32 + %sel = select i1 %c, i16 %iv.16, i16 10 + store i16 %sel, ptr %gep + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 64 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[META4]] = !{[[META5:![0-9]+]]} +; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]} +; CHECK: [[META6]] = distinct !{[[META6]], !"LVerDomain"} +; CHECK: [[META7]] = !{[[META8:![0-9]+]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 91b8e149487a8..8be359af9e6a8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -54,10 +54,6 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i1> [[TMP4]], -; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i1> [[TMP5]], -; CHECK-NEXT: [[TMP18:%.*]] = xor <4 x i1> [[TMP6]], -; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP7]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI10:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI11:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD8]], <4 x i32> zeroinitializer @@ -214,10 +210,6 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP69]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP71]], align 4 -; CHECK-NEXT: [[TMP72:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer @@ -398,10 +390,6 @@ define i32 @test_invariant_address(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP93:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP89]], i32 1 ; CHECK-NEXT: [[TMP94:%.*]] = insertelement <4 x i32> [[TMP93]], i32 [[TMP90]], i32 2 ; CHECK-NEXT: [[TMP95:%.*]] = insertelement <4 x i32> [[TMP94]], i32 [[TMP91]], i32 3 -; CHECK-NEXT: [[TMP96:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP97:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP98:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP99:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP71]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP79]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP87]], <4 x i32> zeroinitializer @@ -689,10 +677,6 @@ define i32 @test_step_narrower_than_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE33]] ; CHECK: pred.load.continue33: ; CHECK-NEXT: [[TMP143:%.*]] = phi <4 x i32> [ [[TMP138]], [[PRED_LOAD_CONTINUE31]] ], [ [[TMP142]], [[PRED_LOAD_IF32]] ] -; CHECK-NEXT: [[TMP144:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP145:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP146:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP147:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP83]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI34:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI35:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP123]], <4 x i32> zeroinitializer @@ -855,10 +839,6 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP70]], i32 4, <4 x i1> [[TMP48]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP71]], i32 4, <4 x i1> [[TMP56]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP72]], i32 4, <4 x i1> [[TMP64]], <4 x i32> poison) -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP40]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP48]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP56]], -; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP64]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP40]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP56]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer @@ -1022,10 +1002,6 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP69]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP71]], align 4 -; CHECK-NEXT: [[TMP72:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer @@ -1270,10 +1246,6 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP109:%.*]] = insertelement <4 x i32> [[TMP108]], i32 [[TMP105]], i32 1 ; CHECK-NEXT: [[TMP110:%.*]] = insertelement <4 x i32> [[TMP109]], i32 [[TMP106]], i32 2 ; CHECK-NEXT: [[TMP111:%.*]] = insertelement <4 x i32> [[TMP110]], i32 [[TMP107]], i32 3 -; CHECK-NEXT: [[TMP112:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP113:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP114:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP115:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP87]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer @@ -1430,10 +1402,6 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP69]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP70]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP71]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) -; CHECK-NEXT: [[TMP72:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer @@ -1590,10 +1558,6 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP69]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP70]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP71]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) -; CHECK-NEXT: [[TMP72:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer @@ -1750,10 +1714,6 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP69]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP70]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP71]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) -; CHECK-NEXT: [[TMP72:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer @@ -1919,10 +1879,6 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP71]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP72]], align 4 -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP40]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP48]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP56]], -; CHECK-NEXT: [[TMP76:%.*]] = xor <4 x i1> [[TMP64]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP40]], <4 x i32> [[WIDE_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP48]], <4 x i32> [[WIDE_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP56]], <4 x i32> [[WIDE_LOAD5]], <4 x i32> zeroinitializer @@ -2087,10 +2043,6 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP69]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP70]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP71]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) -; CHECK-NEXT: [[TMP72:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer @@ -2248,10 +2200,6 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP69]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP70]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP71]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) -; CHECK-NEXT: [[TMP72:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer @@ -2419,10 +2367,6 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP69]], i32 4, <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP70]], i32 4, <4 x i1> [[TMP55]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP71]], i32 4, <4 x i1> [[TMP63]], <4 x i32> poison) -; CHECK-NEXT: [[TMP72:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP73:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP74:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP75:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_MASKED_LOAD4]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI8:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[WIDE_MASKED_LOAD5]], <4 x i32> zeroinitializer @@ -2624,10 +2568,6 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP109:%.*]] = insertelement <4 x i32> [[TMP108]], i32 [[TMP105]], i32 1 ; CHECK-NEXT: [[TMP110:%.*]] = insertelement <4 x i32> [[TMP109]], i32 [[TMP106]], i32 2 ; CHECK-NEXT: [[TMP111:%.*]] = insertelement <4 x i32> [[TMP110]], i32 [[TMP107]], i32 3 -; CHECK-NEXT: [[TMP112:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP113:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP114:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP115:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP87]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer @@ -2763,8 +2703,6 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP53:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP49]], i32 1 ; CHECK-NEXT: [[TMP54:%.*]] = insertelement <4 x i32> [[TMP53]], i32 [[TMP50]], i32 2 ; CHECK-NEXT: [[TMP55:%.*]] = insertelement <4 x i32> [[TMP54]], i32 [[TMP51]], i32 3 -; CHECK-NEXT: [[TMP56:%.*]] = xor <4 x i1> [[TMP23]], -; CHECK-NEXT: [[TMP57:%.*]] = xor <4 x i1> [[TMP31]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP23]], <4 x i32> [[TMP47]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP31]], <4 x i32> [[TMP55]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP58]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] @@ -2952,10 +2890,6 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP109:%.*]] = insertelement <4 x i32> [[TMP108]], i32 [[TMP105]], i32 1 ; CHECK-NEXT: [[TMP110:%.*]] = insertelement <4 x i32> [[TMP109]], i32 [[TMP106]], i32 2 ; CHECK-NEXT: [[TMP111:%.*]] = insertelement <4 x i32> [[TMP110]], i32 [[TMP107]], i32 3 -; CHECK-NEXT: [[TMP112:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP113:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP114:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP115:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP87]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer @@ -3245,10 +3179,6 @@ define i32 @neg_test_non_unit_stride_off_by_four_bytes(i64 %len, ptr %test_base) ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE33]] ; CHECK: pred.load.continue33: ; CHECK-NEXT: [[TMP143:%.*]] = phi <4 x i32> [ [[TMP138]], [[PRED_LOAD_CONTINUE31]] ], [ [[TMP142]], [[PRED_LOAD_IF32]] ] -; CHECK-NEXT: [[TMP144:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP145:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP146:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP147:%.*]] = xor <4 x i1> [[TMP63]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP83]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI34:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI35:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP123]], <4 x i32> zeroinitializer @@ -3459,10 +3389,6 @@ define i32 @test_non_unit_stride_with_first_iteration_step_access(i64 %len, ptr ; CHECK-NEXT: [[TMP125:%.*]] = insertelement <4 x i32> [[TMP124]], i32 [[TMP121]], i32 1 ; CHECK-NEXT: [[TMP126:%.*]] = insertelement <4 x i32> [[TMP125]], i32 [[TMP122]], i32 2 ; CHECK-NEXT: [[TMP127:%.*]] = insertelement <4 x i32> [[TMP126]], i32 [[TMP123]], i32 3 -; CHECK-NEXT: [[TMP128:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP129:%.*]] = xor <4 x i1> [[TMP63]], -; CHECK-NEXT: [[TMP130:%.*]] = xor <4 x i1> [[TMP71]], -; CHECK-NEXT: [[TMP131:%.*]] = xor <4 x i1> [[TMP79]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP111]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP71]], <4 x i32> [[TMP119]], <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll new file mode 100644 index 0000000000000..1cf71360adf72 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll @@ -0,0 +1,191 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=x86_64 -mattr=+avx512f -S < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=x86_64 -mattr=+avx512f -S < %s 2>&1 | FileCheck --check-prefix=NO-VP %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL-LABEL: @foo( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 15 +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], +; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <16 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] +; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP3]], i32 4, <16 x i1> [[TMP1]], <16 x i32> poison) +; IF-EVL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] +; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP5]], i32 4, <16 x i1> [[TMP1]], <16 x i32> poison) +; IF-EVL-NEXT: [[TMP6:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]] +; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; IF-EVL-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP6]], ptr [[TMP8]], i32 4, <16 x i1> [[TMP1]]) +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @foo( +; NO-VP-NEXT: iter.check: +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; NO-VP: vector.main.loop.iter.check: +; NO-VP-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 64 +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 64 +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16 +; NO-VP-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32 +; NO-VP-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 48 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]] +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 16 +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 32 +; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 48 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i32>, ptr [[TMP10]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i32>, ptr [[TMP11]], align 4 +; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP1]] +; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP2]] +; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 16 +; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 32 +; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 48 +; NO-VP-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP17]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP18]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, ptr [[TMP19]], align 4 +; NO-VP-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_LOAD5]], [[WIDE_LOAD]] +; NO-VP-NEXT: [[TMP21:%.*]] = add nsw <16 x i32> [[WIDE_LOAD6]], [[WIDE_LOAD2]] +; NO-VP-NEXT: [[TMP22:%.*]] = add nsw <16 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] +; NO-VP-NEXT: [[TMP23:%.*]] = add nsw <16 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] +; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; NO-VP-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] +; NO-VP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0 +; NO-VP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 16 +; NO-VP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 32 +; NO-VP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 48 +; NO-VP-NEXT: store <16 x i32> [[TMP20]], ptr [[TMP28]], align 4 +; NO-VP-NEXT: store <16 x i32> [[TMP21]], ptr [[TMP29]], align 4 +; NO-VP-NEXT: store <16 x i32> [[TMP22]], ptr [[TMP30]], align 4 +; NO-VP-NEXT: store <16 x i32> [[TMP23]], ptr [[TMP31]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 +; NO-VP-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; NO-VP: vec.epilog.iter.check: +; NO-VP-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 +; NO-VP-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; NO-VP: vec.epilog.ph: +; NO-VP-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; NO-VP-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[N]], 8 +; NO-VP-NEXT: [[N_VEC10:%.*]] = sub i64 [[N]], [[N_MOD_VF9]] +; NO-VP-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; NO-VP: vec.epilog.vector.body: +; NO-VP-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP33:%.*]] = add i64 [[INDEX12]], 0 +; NO-VP-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP33]] +; NO-VP-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4 +; NO-VP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP33]] +; NO-VP-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4 +; NO-VP-NEXT: [[TMP38:%.*]] = add nsw <8 x i32> [[WIDE_LOAD14]], [[WIDE_LOAD13]] +; NO-VP-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP33]] +; NO-VP-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 0 +; NO-VP-NEXT: store <8 x i32> [[TMP38]], ptr [[TMP40]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX12]], 8 +; NO-VP-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC10]] +; NO-VP-NEXT: br i1 [[TMP41]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: vec.epilog.middle.block: +; NO-VP-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] +; NO-VP-NEXT: br i1 [[CMP_N11]], label [[FOR_COND_CLEANUP]], label [[VEC_EPILOG_SCALAR_PH]] +; NO-VP: vec.epilog.scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP42:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP43]], [[TMP42]] +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll new file mode 100644 index 0000000000000..9b49d44141db3 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll @@ -0,0 +1,89 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=x86_64 -mattr=+avx512f -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=x86_64 -mattr=+avx512f -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL: VPlan 'Initial VPlan for VF={4},UF>=1' { +; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; IF-EVL-NEXT: Live-in vp<[[BETC:%[0-9]+]]> = backedge-taken count +; IF-EVL-NEXT: Live-in ir<%N> = original trip-count +; IF-EVL-EMPTY: +; IF-EVL: vector.ph: +; IF-EVL-NEXT: Successor(s): vector loop +; IF-EVL-EMPTY: +; IF-EVL-NEXT: vector loop: { +; IF-EVL-NEXT: vector.body: +; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; IF-EVL-NEXT: EMIT vp<[[VIV:%[0-9]+]]> = WIDEN-CANONICAL-INDUCTION vp<[[IV]]> +; IF-EVL-NEXT: EMIT vp<[[MASK:%[0-9]+]]> = icmp ule vp<[[VIV]]>, vp<[[BETC]]> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]>, vp<[[MASK]]> +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]>, vp<[[MASK]]> +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> +; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; IF-EVL-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[MASK]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; IF-EVL-NEXT: No successors +; IF-EVL-NEXT: } + +; NO-VP: VPlan 'Initial VPlan for VF={4},UF>=1' { +; NO-VP-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; NO-VP-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; NO-VP-NEXT: Live-in ir<%N> = original trip-count +; NO-VP-EMPTY: +; NO-VP: vector.ph: +; NO-VP-NEXT: Successor(s): vector loop +; NO-VP-EMPTY: +; NO-VP-NEXT: vector loop: { +; NO-VP-NEXT: vector.body: +; NO-VP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; NO-VP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; NO-VP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; NO-VP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> +; NO-VP-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; NO-VP-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]> +; NO-VP-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> +; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> +; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; NO-VP-NEXT: No successors +; NO-VP-NEXT: } + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index 91355728133da..86eba22d35a3f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -44,7 +44,6 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; CHECK: pred.sdiv.continue2: ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_SDIV_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <2 x i32> [[TMP14]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -144,7 +143,6 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; SINK-GATHER: pred.sdiv.continue14: ; SINK-GATHER-NEXT: [[TMP44:%.*]] = phi <8 x i32> [ [[TMP39]], [[PRED_SDIV_CONTINUE12]] ], [ [[TMP43]], [[PRED_SDIV_IF13]] ] ; SINK-GATHER-NEXT: [[TMP45:%.*]] = add nsw <8 x i32> [[TMP44]], [[WIDE_LOAD]] -; SINK-GATHER-NEXT: [[TMP46:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT]], ; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP45]], <8 x i32> [[WIDE_LOAD]] ; SINK-GATHER-NEXT: [[TMP47]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -254,7 +252,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF1]] ] ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -403,7 +400,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER: pred.udiv.continue14: ; SINK-GATHER-NEXT: [[TMP63:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE12]] ], [ [[TMP60]], [[PRED_UDIV_IF13]] ] ; SINK-GATHER-NEXT: [[TMP64:%.*]] = phi <8 x i32> [ [[TMP56]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP62]], [[PRED_UDIV_IF13]] ] -; SINK-GATHER-NEXT: [[TMP65:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT]], ; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] ; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 0b872709ec6c6..c721da7597b1c 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -19,20 +19,20 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i32> , <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 0 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: @@ -42,32 +42,33 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[BC_MERGE_RDX]], 0 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_MERGE_RDX]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP9]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD7]], zeroinitializer -; CHECK-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i32> , <4 x i32> [[VEC_PHI6]] +; CHECK-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI6]], [[TMP13]] ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <4 x i32> [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) -; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP16]], i32 1, i32 0 +; CHECK-NEXT: [[TMP17:%.*]] = freeze i1 [[TMP16]] +; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP17]], i32 1, i32 0 ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -101,6 +102,104 @@ exit: ret i32 %select } +define i32 @any_of_reduction_epilog_arg_as_start_value(ptr %src, i64 %N, i32 %start) { +; CHECK-LABEL: define i32 @any_of_reduction_epilog_arg_as_start_value( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 [[START]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[BC_MERGE_RDX]], [[START]] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP9]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD7]], zeroinitializer +; CHECK-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI6]], [[TMP13]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) +; CHECK-NEXT: [[TMP17:%.*]] = freeze i1 [[TMP16]] +; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP17]], i32 1, i32 [[START]] +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[START]], [[ITER_CHECK]] ], [ [[START]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0 +; CHECK-NEXT: [[SELECT]] = select i1 [[ICMP]], i32 1, i32 [[RED]] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[ICMP3:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[ICMP3]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SELECT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi i32 [ %start, %entry ], [ %select, %loop ] + %gep = getelementptr inbounds i8, ptr %src, i64 %iv + %load = load i8, ptr %gep, align 1 + %icmp = icmp eq i8 %load, 0 + %select = select i1 %icmp, i32 1, i32 %red + %iv.next = add i64 %iv, 1 + %icmp3 = icmp eq i64 %iv, %N + br i1 %icmp3, label %exit, label %loop + +exit: + ret i32 %select +} define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-LABEL: define i1 @any_of_reduction_i1_epilog( @@ -124,14 +223,15 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; CHECK-NEXT: [[TMP3]] = or <4 x i1> [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP6:%.*]] = freeze i1 [[TMP5]] ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i1 false, i1 false ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -144,10 +244,11 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ false, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i1 [[BC_MERGE_RDX]], false ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END5:%.*]] = trunc i64 [[N_VEC3]] to i32 -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[BC_MERGE_RDX]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 ; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -160,21 +261,22 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND11:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[VEC_IND11]], [[BROADCAST_SPLAT14]] -; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP8]], <4 x i1> [[VEC_PHI10]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI10]], [[TMP9]] ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX9]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP16:%.*]] = icmp ne <4 x i1> [[TMP10]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP16]]) +; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) +; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] ; CHECK-NEXT: [[RDX_SELECT16:%.*]] = select i1 [[TMP13]], i1 false, i1 false ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ false, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ false, [[ITER_CHECK]] ], [ false, [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -185,7 +287,7 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 ; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[CMP_2]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_2]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i1 [ [[SEL]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i1 [[SEL_LCSSA]] @@ -219,4 +321,7 @@ exit: ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/global_alias.ll b/llvm/test/Transforms/LoopVectorize/global_alias.ll index 01affc1a689f2..336e462a4cf63 100644 --- a/llvm/test/Transforms/LoopVectorize/global_alias.ll +++ b/llvm/test/Transforms/LoopVectorize/global_alias.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='default,loop-vectorize,dce,instcombine' -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s +; RUN: opt -passes='loop-vectorize,dce,instcombine' -force-vector-interleave=1 -force-vector-width=4 -S %s | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64" @@ -28,39 +28,23 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @noAlias01(i32 %a) nounwind { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %arrayidx1 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %i.05 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %arrayidx1 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %i.05 store i32 %add, ptr %arrayidx1, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx2, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx2, align 4 + ret i32 %1 } ; /// Different objects, positive induction with widening slide @@ -76,40 +60,24 @@ for.end: ; preds = %for.cond define i32 @noAlias02(i32 %a) { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 90 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %add = add nsw i32 %1, 10 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %add - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add1 = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %add = add nuw nsw i32 %i.05, 10 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %add + %0 = load i32, ptr %arrayidx, align 4 + %add1 = add nsw i32 %0, %a + %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %i.05 store i32 %add1, ptr %arrayidx2, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx3, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 90 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx3, align 4 + ret i32 %1 } ; /// Different objects, positive induction with shortening slide @@ -125,40 +93,24 @@ for.end: ; preds = %for.cond define i32 @noAlias03(i32 %a) { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %add1 = add nsw i32 %4, 10 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %i.05 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %add1 = add nuw nsw i32 %i.05, 10 %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %add1 store i32 %add, ptr %arrayidx2, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx3, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx3, align 4 + ret i32 %1 } ; /// Pointer access, positive stride, run-time check added @@ -177,42 +129,26 @@ for.end: ; preds = %for.cond define i32 @noAlias04(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load ptr, ptr @PB, align 4 - %2 = load i32, ptr %i, align 4 - %add.ptr = getelementptr inbounds i32, ptr %1, i32 %2 - %3 = load i32, ptr %add.ptr, align 4 - %4 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %3, %4 - %5 = load ptr, ptr @PA, align 4 - %6 = load i32, ptr %i, align 4 - %add.ptr1 = getelementptr inbounds i32, ptr %5, i32 %6 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = load ptr, ptr @PB, align 4 + %add.ptr = getelementptr inbounds i32, ptr %0, i32 %i.05 + %1 = load i32, ptr %add.ptr, align 4 + %add = add nsw i32 %1, %a + %2 = load ptr, ptr @PA, align 4 + %add.ptr1 = getelementptr inbounds i32, ptr %2, i32 %i.05 store i32 %add, ptr %add.ptr1, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, ptr %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %8 = load ptr, ptr @PA, align 4 - %9 = load i32, ptr %a.addr, align 4 - %add.ptr2 = getelementptr inbounds i32, ptr %8, i32 %9 - %10 = load i32, ptr %add.ptr2, align 4 - ret i32 %10 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %3 = load ptr, ptr @PA, align 4 + %add.ptr2 = getelementptr inbounds i32, ptr %3, i32 %a + %4 = load i32, ptr %add.ptr2, align 4 + ret i32 %4 } ; /// Different objects, positive induction, multi-array @@ -228,47 +164,23 @@ for.end: ; preds = %for.cond define i32 @noAlias05(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %N = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 10, ptr %N, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %2 = load i32, ptr %N, align 4 - %arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr getelementptr inbounds (%struct.anon.0, ptr @Bar, i32 0, i32 2), i32 0, i32 %2 - %arrayidx1 = getelementptr inbounds [100 x i32], ptr %arrayidx, i32 0, i32 %1 - %3 = load i32, ptr %arrayidx1, align 4 - %4 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %3, %4 - %5 = load i32, ptr %i, align 4 - %6 = load i32, ptr %N, align 4 - %arrayidx2 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %6 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr %arrayidx2, i32 0, i32 %5 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx1 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 2, i32 10, i32 %i.07 + %0 = load i32, ptr %arrayidx1, align 4 + %add = add nsw i32 %0, %a + %arrayidx3 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 10, i32 %i.07 store i32 %add, ptr %arrayidx3, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, ptr %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %8 = load i32, ptr %a.addr, align 4 - %9 = load i32, ptr %N, align 4 - %arrayidx4 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %9 - %arrayidx5 = getelementptr inbounds [100 x i32], ptr %arrayidx4, i32 0, i32 %8 - %10 = load i32, ptr %arrayidx5, align 4 - ret i32 %10 + %inc = add nuw nsw i32 %i.07, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx5 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 10, i32 %a + %1 = load i32, ptr %arrayidx5, align 4 + ret i32 %1 } ; /// Same objects, positive induction, multi-array, different sub-elements @@ -284,48 +196,23 @@ for.end: ; preds = %for.cond define i32 @noAlias06(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %N = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 10, ptr %N, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %2 = load i32, ptr %N, align 4 - %add = add nsw i32 %2, 1 - %arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %add - %arrayidx1 = getelementptr inbounds [100 x i32], ptr %arrayidx, i32 0, i32 %1 - %3 = load i32, ptr %arrayidx1, align 4 - %4 = load i32, ptr %a.addr, align 4 - %add2 = add nsw i32 %3, %4 - %5 = load i32, ptr %i, align 4 - %6 = load i32, ptr %N, align 4 - %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %6 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr %arrayidx3, i32 0, i32 %5 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx1 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 11, i32 %i.07 + %0 = load i32, ptr %arrayidx1, align 4 + %add2 = add nsw i32 %0, %a + %arrayidx4 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 10, i32 %i.07 store i32 %add2, ptr %arrayidx4, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, ptr %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %8 = load i32, ptr %a.addr, align 4 - %9 = load i32, ptr %N, align 4 - %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %9 - %arrayidx6 = getelementptr inbounds [100 x i32], ptr %arrayidx5, i32 0, i32 %8 - %10 = load i32, ptr %arrayidx6, align 4 - ret i32 %10 + %inc = add nuw nsw i32 %i.07, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx6 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 10, i32 %a + %1 = load i32, ptr %arrayidx6, align 4 + ret i32 %1 } ; /// Different objects, negative induction, constant distance @@ -340,43 +227,24 @@ for.end: ; preds = %for.cond ; CHECK: ret define i32 @noAlias07(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 1 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %sub2 = sub nsw i32 100, %4 - %sub3 = sub nsw i32 %sub2, 1 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub3 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nuw nsw i32 99, %i.05 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %sub1 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub1 store i32 %add, ptr %arrayidx4, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx5, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx5, align 4 + ret i32 %1 } ; /// Different objects, negative induction, shortening slide @@ -392,43 +260,25 @@ for.end: ; preds = %for.cond define i32 @noAlias08(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 90 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 10 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %sub2 = sub nsw i32 100, %4 - %sub3 = sub nsw i32 %sub2, 1 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nuw nsw i32 90, %i.05 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %sub1 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %sub3 = sub nuw nsw i32 99, %i.05 %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub3 store i32 %add, ptr %arrayidx4, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx5, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 90 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx5, align 4 + ret i32 %1 } ; /// Different objects, negative induction, widening slide @@ -444,43 +294,25 @@ for.end: ; preds = %for.cond define i32 @noAlias09(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 1 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %sub2 = sub nsw i32 100, %4 - %sub3 = sub nsw i32 %sub2, 10 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nuw nsw i32 99, %i.05 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %sub1 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %sub3 = sub nsw i32 90, %i.05 %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub3 store i32 %add, ptr %arrayidx4, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx5, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx5, align 4 + ret i32 %1 } ; /// Pointer access, negative stride, run-time check added @@ -499,48 +331,31 @@ for.end: ; preds = %for.cond define i32 @noAlias10(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load ptr, ptr @PB, align 4 - %add.ptr = getelementptr inbounds i32, ptr %1, i32 100 - %2 = load i32, ptr %i, align 4 - %idx.neg = sub i32 0, %2 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = load ptr, ptr @PB, align 4 + %add.ptr = getelementptr inbounds i8, ptr %0, i32 400 + %idx.neg = sub nsw i32 0, %i.05 %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i32 %idx.neg - %add.ptr2 = getelementptr inbounds i32, ptr %add.ptr1, i32 -1 - %3 = load i32, ptr %add.ptr2, align 4 - %4 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %3, %4 - %5 = load ptr, ptr @PA, align 4 - %add.ptr3 = getelementptr inbounds i32, ptr %5, i32 100 - %6 = load i32, ptr %i, align 4 - %idx.neg4 = sub i32 0, %6 - %add.ptr5 = getelementptr inbounds i32, ptr %add.ptr3, i32 %idx.neg4 - %add.ptr6 = getelementptr inbounds i32, ptr %add.ptr5, i32 -1 + %add.ptr2 = getelementptr inbounds i8, ptr %add.ptr1, i32 -4 + %1 = load i32, ptr %add.ptr2, align 4 + %add = add nsw i32 %1, %a + %2 = load ptr, ptr @PA, align 4 + %add.ptr3 = getelementptr inbounds i8, ptr %2, i32 400 + %add.ptr5 = getelementptr inbounds i32, ptr %add.ptr3, i32 %idx.neg + %add.ptr6 = getelementptr inbounds i8, ptr %add.ptr5, i32 -4 store i32 %add, ptr %add.ptr6, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, ptr %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %8 = load ptr, ptr @PA, align 4 - %9 = load i32, ptr %a.addr, align 4 - %add.ptr7 = getelementptr inbounds i32, ptr %8, i32 %9 - %10 = load i32, ptr %add.ptr7, align 4 - ret i32 %10 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %3 = load ptr, ptr @PA, align 4 + %add.ptr7 = getelementptr inbounds i32, ptr %3, i32 %a + %4 = load i32, ptr %add.ptr7, align 4 + ret i32 %4 } ; /// Different objects, negative induction, multi-array @@ -556,51 +371,24 @@ for.end: ; preds = %for.cond define i32 @noAlias11(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %N = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 10, ptr %N, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 1 - %2 = load i32, ptr %N, align 4 - %arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr getelementptr inbounds (%struct.anon.0, ptr @Bar, i32 0, i32 2), i32 0, i32 %2 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr %arrayidx, i32 0, i32 %sub1 - %3 = load i32, ptr %arrayidx2, align 4 - %4 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %3, %4 - %5 = load i32, ptr %i, align 4 - %sub3 = sub nsw i32 100, %5 - %sub4 = sub nsw i32 %sub3, 1 - %6 = load i32, ptr %N, align 4 - %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %6 - %arrayidx6 = getelementptr inbounds [100 x i32], ptr %arrayidx5, i32 0, i32 %sub4 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nuw nsw i32 99, %i.07 + %arrayidx2 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 2, i32 10, i32 %sub1 + %0 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %0, %a + %arrayidx6 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 10, i32 %sub1 store i32 %add, ptr %arrayidx6, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, ptr %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %8 = load i32, ptr %a.addr, align 4 - %9 = load i32, ptr %N, align 4 - %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %9 - %arrayidx8 = getelementptr inbounds [100 x i32], ptr %arrayidx7, i32 0, i32 %8 - %10 = load i32, ptr %arrayidx8, align 4 - ret i32 %10 + %inc = add nuw nsw i32 %i.07, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx8 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 10, i32 %a + %1 = load i32, ptr %arrayidx8, align 4 + ret i32 %1 } ; /// Same objects, negative induction, multi-array, different sub-elements @@ -616,52 +404,24 @@ for.end: ; preds = %for.cond define i32 @noAlias12(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %N = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 10, ptr %N, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 1 - %2 = load i32, ptr %N, align 4 - %add = add nsw i32 %2, 1 - %arrayidx = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %add - %arrayidx2 = getelementptr inbounds [100 x i32], ptr %arrayidx, i32 0, i32 %sub1 - %3 = load i32, ptr %arrayidx2, align 4 - %4 = load i32, ptr %a.addr, align 4 - %add3 = add nsw i32 %3, %4 - %5 = load i32, ptr %i, align 4 - %sub4 = sub nsw i32 100, %5 - %sub5 = sub nsw i32 %sub4, 1 - %6 = load i32, ptr %N, align 4 - %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %6 - %arrayidx7 = getelementptr inbounds [100 x i32], ptr %arrayidx6, i32 0, i32 %sub5 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nuw nsw i32 99, %i.07 + %arrayidx2 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 11, i32 %sub1 + %0 = load i32, ptr %arrayidx2, align 4 + %add3 = add nsw i32 %0, %a + %arrayidx7 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 10, i32 %sub1 store i32 %add3, ptr %arrayidx7, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, ptr %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %8 = load i32, ptr %a.addr, align 4 - %9 = load i32, ptr %N, align 4 - %arrayidx8 = getelementptr inbounds [100 x [100 x i32]], ptr @Bar, i32 0, i32 %9 - %arrayidx9 = getelementptr inbounds [100 x i32], ptr %arrayidx8, i32 0, i32 %8 - %10 = load i32, ptr %arrayidx9, align 4 - ret i32 %10 + %inc = add nuw nsw i32 %i.07, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx9 = getelementptr inbounds %struct.anon.0, ptr @Bar, i32 0, i32 0, i32 10, i32 %a + %1 = load i32, ptr %arrayidx9, align 4 + ret i32 %1 } ; /// Same objects, positive induction, constant distance, just enough for vector size @@ -677,40 +437,24 @@ for.end: ; preds = %for.cond define i32 @noAlias13(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %add = add nsw i32 %1, 4 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %add = add nuw nsw i32 %i.05, 4 %arrayidx = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %add - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add1 = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4 + %0 = load i32, ptr %arrayidx, align 4 + %add1 = add nsw i32 %0, %a + %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %i.05 store i32 %add1, ptr %arrayidx2, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx3, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx3, align 4 + ret i32 %1 } ; /// Same objects, negative induction, constant distance, just enough for vector size @@ -726,43 +470,25 @@ for.end: ; preds = %for.cond define i32 @noAlias14(i32 %a) #0 { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 5 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nsw i32 95, %i.05 %arrayidx = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %sub2 = sub nsw i32 100, %4 - %sub3 = sub nsw i32 %sub2, 1 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %sub3 = sub nuw nsw i32 99, %i.05 %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub3 store i32 %add, ptr %arrayidx4, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx5, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx5 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx5, align 4 + ret i32 %1 } @@ -782,41 +508,24 @@ for.end: ; preds = %for.cond define i32 @mayAlias01(i32 %a) nounwind { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 1 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nuw nsw i32 99, %i.05 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %sub1 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %i.05 store i32 %add, ptr %arrayidx2, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx3, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx3, align 4 + ret i32 %1 } ; /// Different objects, swapped induction, alias at the beginning @@ -832,41 +541,24 @@ for.end: ; preds = %for.cond define i32 @mayAlias02(i32 %a) nounwind { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %4 - %sub1 = sub nsw i32 %sub, 1 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %i.05 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %sub1 = sub nuw nsw i32 99, %i.05 %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %sub1 store i32 %add, ptr %arrayidx2, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx3, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx3, align 4 + ret i32 %1 } ; /// Pointer access, run-time check added @@ -882,48 +574,31 @@ for.end: ; preds = %for.cond define i32 @mayAlias03(i32 %a) nounwind { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load ptr, ptr @PB, align 4 - %add.ptr = getelementptr inbounds i32, ptr %1, i32 100 - %2 = load i32, ptr %i, align 4 - %idx.neg = sub i32 0, %2 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = load ptr, ptr @PB, align 4 + %add.ptr = getelementptr inbounds i8, ptr %0, i32 400 + %idx.neg = sub nsw i32 0, %i.05 %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i32 %idx.neg - %add.ptr2 = getelementptr inbounds i32, ptr %add.ptr1, i32 -1 - %3 = load i32, ptr %add.ptr2, align 4 - %4 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %3, %4 - %5 = load ptr, ptr @PA, align 4 - %6 = load i32, ptr %i, align 4 - %add.ptr3 = getelementptr inbounds i32, ptr %5, i32 %6 + %add.ptr2 = getelementptr inbounds i8, ptr %add.ptr1, i32 -4 + %1 = load i32, ptr %add.ptr2, align 4 + %add = add nsw i32 %1, %a + %2 = load ptr, ptr @PA, align 4 + %add.ptr3 = getelementptr inbounds i32, ptr %2, i32 %i.05 store i32 %add, ptr %add.ptr3, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, ptr %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %8 = load ptr, ptr @PA, align 4 - %9 = load i32, ptr %a.addr, align 4 - %add.ptr4 = getelementptr inbounds i32, ptr %8, i32 %9 - %10 = load i32, ptr %add.ptr4, align 4 - ret i32 %10 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %3 = load ptr, ptr @PA, align 4 + %add.ptr4 = getelementptr inbounds i32, ptr %3, i32 %a + %4 = load i32, ptr %add.ptr4, align 4 + ret i32 %4 } - ;; === Finally, the tests that should only vectorize with care (or if we ignore undefined behaviour at all) === @@ -939,42 +614,25 @@ for.end: ; preds = %for.cond define i32 @mustAlias01(i32 %a) nounwind { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 1 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %add2 = add nsw i32 %4, 10 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nuw nsw i32 99, %i.05 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %sub1 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %add2 = add nuw nsw i32 %i.05, 10 %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %add2 store i32 %add, ptr %arrayidx3, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx4, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx4, align 4 + ret i32 %1 } ; int mustAlias02 (int a) { @@ -989,41 +647,24 @@ for.end: ; preds = %for.cond define i32 @mustAlias02(i32 %a) nounwind { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 10 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %4 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nsw i32 90, %i.05 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %sub1 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %arrayidx2 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %i.05 store i32 %add, ptr %arrayidx2, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx3, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx3, align 4 + ret i32 %1 } ; int mustAlias03 (int a) { @@ -1038,40 +679,23 @@ for.end: ; preds = %for.cond define i32 @mustAlias03(i32 %a) nounwind { entry: - %a.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %a, ptr %a.addr, align 4 - store i32 0, ptr %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %0, 100 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, ptr %i, align 4 - %sub = sub nsw i32 100, %1 - %sub1 = sub nsw i32 %sub, 10 - %arrayidx = getelementptr inbounds [100 x i32], ptr getelementptr inbounds (%struct.anon, ptr @Foo, i32 0, i32 2), i32 0, i32 %sub1 - %2 = load i32, ptr %arrayidx, align 4 - %3 = load i32, ptr %a.addr, align 4 - %add = add nsw i32 %2, %3 - %4 = load i32, ptr %i, align 4 - %add2 = add nsw i32 %4, 10 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sub1 = sub nsw i32 90, %i.05 + %arrayidx = getelementptr inbounds %struct.anon, ptr @Foo, i32 0, i32 2, i32 %sub1 + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %a + %add2 = add nuw nsw i32 %i.05, 10 %arrayidx3 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %add2 store i32 %add, ptr %arrayidx3, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, ptr %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, ptr %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %6 = load i32, ptr %a.addr, align 4 - %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %6 - %7 = load i32, ptr %arrayidx4, align 4 - ret i32 %7 + %inc = add nuw nsw i32 %i.05, 1 + %exitcond.not = icmp eq i32 %inc, 100 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + %arrayidx4 = getelementptr inbounds [100 x i32], ptr @Foo, i32 0, i32 %a + %1 = load i32, ptr %arrayidx4, align 4 + ret i32 %1 } diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index 7e0727348b018..692615a49ad9a 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -791,7 +791,6 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP15]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -858,8 +857,6 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { ; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.udiv.continue3: ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF2]] ] -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = xor i1 [[C]], true -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = xor i1 [[C]], true ; UNROLL-NO-VF-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], i32 [[TMP8]], i32 [[TMP4]] ; UNROLL-NO-VF-NEXT: [[PREDPHI4:%.*]] = select i1 [[C]], i32 [[TMP11]], i32 [[TMP5]] ; UNROLL-NO-VF-NEXT: [[TMP14]] = add i32 [[VEC_PHI]], [[PREDPHI]] diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index d5a26e97eec35..e9761a60fd6eb 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -612,7 +612,6 @@ for.end: ; preds = %for.body, %entry ; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer ; CHECK-DAG: %[[M1:.*]] = fmul fast <4 x float> %[[V0]], %[[V0]], %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer ; CHECK: %[[S1:.*]] = select <4 x i1> %[[C22]], <4 x float> %[[M1]], <4 x float> %[[M2]] ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[V0]], <4 x float> %[[S1]] ; CHECK: fadd fast <4 x float> %[[S2]], diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index ba98391e0b0c3..8d4be05a4390e 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -42,7 +42,6 @@ define i16 @test_access_size_not_multiple_of_align(i64 %len, ptr %test_base) { ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP3]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP16]] = add <2 x i16> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -140,7 +139,6 @@ define i32 @test_access_size_multiple_of_align_but_offset_by_1(i64 %len, ptr %te ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP3]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP14]], <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP16]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 2a58748d8fb67..337e1592a8304 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -95,7 +95,6 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> , [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[PREDPHI]] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll index e8c75635f2c69..6a9f83a9e0aa2 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll @@ -4,7 +4,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) { ; CHECK-VF2IC1-LABEL: @pred_select_const_i32_from_icmp( ; CHECK-VF2IC1: vector.body: -; CHECK-VF2IC1: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ] +; CHECK-VF2IC1: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ] ; CHECK-VF2IC1: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{%.*}}, align 4 ; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], ; CHECK-VF2IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 @@ -26,14 +26,13 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF2IC1: pred.load.continue2: ; CHECK-VF2IC1-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %pred.load.continue ], [ [[TMP14]], %pred.load.if1 ] ; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[TMP15]], -; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i32> , <2 x i32> [[VEC_PHI]] -; CHECK-VF2IC1-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP4]], -; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP17]], <2 x i32> [[VEC_PHI]] +; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = or <2 x i1> [[VEC_PHI]], [[TMP16]] +; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP17]], <2 x i1> [[VEC_PHI]] ; CHECK-VF2IC1: br i1 {{%.*}}, label %middle.block, label %vector.body ; CHECK-VF2IC1: middle.block: -; CHECK-VF2IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i32> [[PREDPHI]], zeroinitializer -; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) -; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 1, i32 0 +; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[PREDPHI]]) +; CHECK-VF2IC1-NEXT: [[FR_TMP20:%.*]] = freeze i1 [[TMP20]] +; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP20]], i32 1, i32 0 ; CHECK-VF2IC1: scalar.ph: ; CHECK-VF2IC1: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{%.*}}, %middle.block ], [ 0, %entry ] ; CHECK-VF2IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %entry ], [ [[RDX_SELECT]], %middle.block ] @@ -56,8 +55,8 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; ; CHECK-VF1IC2-LABEL: @pred_select_const_i32_from_icmp( ; CHECK-VF1IC2: vector.body: -; CHECK-VF1IC2: [[VEC_PHI:%.*]] = phi i32 [ 0, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue3 ] -; CHECK-VF1IC2-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %vector.ph ], [ [[PREDPHI5:%.*]], %pred.load.continue3 ] +; CHECK-VF1IC2: [[VEC_PHI:%.*]] = phi i1 [ false, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue3 ] +; CHECK-VF1IC2-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %vector.ph ], [ [[PREDPHI5:%.*]], %pred.load.continue3 ] ; CHECK-VF1IC2: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 {{%.*}} ; CHECK-VF1IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 {{%.*}} ; CHECK-VF1IC2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 @@ -80,16 +79,15 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF1IC2-NEXT: [[TMP11:%.*]] = phi i32 [ poison, %pred.load.continue ], [ [[TMP10]], %pred.load.if2 ] ; CHECK-VF1IC2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 2 ; CHECK-VF1IC2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP11]], 2 -; CHECK-VF1IC2-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], i32 1, i32 [[VEC_PHI]] -; CHECK-VF1IC2-NEXT: [[TMP15:%.*]] = select i1 [[TMP13]], i32 1, i32 [[VEC_PHI2]] -; CHECK-VF1IC2-NEXT: [[TMP16:%.*]] = xor i1 [[TMP4]], true -; CHECK-VF1IC2-NEXT: [[TMP17:%.*]] = xor i1 [[TMP5]], true -; CHECK-VF1IC2-NEXT: [[PREDPHI]] = select i1 [[TMP4]], i32 [[TMP14]], i32 [[VEC_PHI]] -; CHECK-VF1IC2-NEXT: [[PREDPHI5]] = select i1 [[TMP5]], i32 [[TMP15]], i32 [[VEC_PHI2]] +; CHECK-VF1IC2-NEXT: [[TMP14:%.*]] = or i1 [[VEC_PHI]], [[TMP12]] +; CHECK-VF1IC2-NEXT: [[TMP15:%.*]] = or i1 [[VEC_PHI2]], [[TMP13]] +; CHECK-VF1IC2-NEXT: [[PREDPHI]] = select i1 [[TMP4]], i1 [[TMP14]], i1 [[VEC_PHI]] +; CHECK-VF1IC2-NEXT: [[PREDPHI5]] = select i1 [[TMP5]], i1 [[TMP15]], i1 [[VEC_PHI2]] ; CHECK-VF1IC2: br i1 {{%.*}}, label %middle.block, label %vector.body ; CHECK-VF1IC2: middle.block: -; CHECK-VF1IC2-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[PREDPHI]], 0 -; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[PREDPHI]], i32 [[PREDPHI5]] +; CHECK-VF1IC2-NEXT: [[OR:%.*]] = or i1 [[PREDPHI5]], [[PREDPHI]] +; CHECK-VF1IC2-NEXT: [[FR_OR:%.*]] = freeze i1 [[OR]] +; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_OR]], i32 1, i32 0 ; CHECK-VF1IC2: br i1 {{%.*}}, label %for.end.loopexit, label %scalar.ph ; CHECK-VF1IC2: scalar.ph: ; CHECK-VF1IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{%.*}}, %middle.block ], [ 0, %entry ] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index c9f2aaef6d5c8..993b56a05207b 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -5,45 +5,47 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 ; CHECK-VF4IC4: vector.body: -; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, ; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, ; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, ; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, -; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = select <4 x i1> [[VEC_ICMP1]], <4 x i32> [[VEC_PHI1]], <4 x i32> -; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = select <4 x i1> [[VEC_ICMP2]], <4 x i32> [[VEC_PHI2]], <4 x i32> -; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = select <4 x i1> [[VEC_ICMP3]], <4 x i32> [[VEC_PHI3]], <4 x i32> -; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = select <4 x i1> [[VEC_ICMP4]], <4 x i32> [[VEC_PHI4]], <4 x i32> +; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <4 x i1> [[VEC_ICMP1]], +; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <4 x i1> [[VEC_ICMP2]], +; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <4 x i1> [[VEC_ICMP3]], +; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <4 x i1> [[VEC_ICMP4]], +; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <4 x i1> [[VEC_PHI1]], [[NOT1]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <4 x i1> [[VEC_PHI2]], [[NOT2]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <4 x i1> [[VEC_PHI3]], [[NOT3]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <4 x i1> [[VEC_PHI4]], [[NOT4]] ; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne <4 x i32> [[VEC_SEL1]], -; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = select <4 x i1> [[VEC_ICMP5]], <4 x i32> [[VEC_SEL1]], <4 x i32> [[VEC_SEL2]] -; CHECK-VF4IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne <4 x i32> [[VEC_SEL5]], -; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = select <4 x i1> [[VEC_ICMP6]], <4 x i32> [[VEC_SEL5]], <4 x i32> [[VEC_SEL3]] -; CHECK-VF4IC4-NEXT: [[VEC_ICMP7:%.*]] = icmp ne <4 x i32> [[VEC_SEL6]], -; CHECK-VF4IC4-NEXT: [[VEC_SEL_FIN:%.*]] = select <4 x i1> [[VEC_ICMP7]], <4 x i32> [[VEC_SEL6]], <4 x i32> [[VEC_SEL4]] -; CHECK-VF4IC4-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL_FIN]], -; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) -; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = or <4 x i1> [[VEC_SEL2]], [[VEC_SEL1]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = or <4 x i1> [[VEC_SEL3]], [[VEC_SEL5]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = or <4 x i1> [[VEC_SEL4]], [[VEC_SEL6]] +; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL7]]) +; CHECK-VF4IC4-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 ; CHECK-VF1IC4: vector.body: -; CHECK-VF1IC4: [[VEC_PHI1:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF1IC4: [[VEC_PHI1:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF1IC4: [[VEC_LOAD1:%.*]] = load i32 ; CHECK-VF1IC4-NEXT: [[VEC_LOAD2:%.*]] = load i32 ; CHECK-VF1IC4-NEXT: [[VEC_LOAD3:%.*]] = load i32 @@ -52,17 +54,20 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF1IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq i32 [[VEC_LOAD2]], 3 ; CHECK-VF1IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq i32 [[VEC_LOAD3]], 3 ; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq i32 [[VEC_LOAD4]], 3 -; CHECK-VF1IC4-NEXT: [[VEC_SEL1]] = select i1 [[VEC_ICMP1]], i32 [[VEC_PHI1]], i32 7 -; CHECK-VF1IC4-NEXT: [[VEC_SEL2]] = select i1 [[VEC_ICMP2]], i32 [[VEC_PHI2]], i32 7 -; CHECK-VF1IC4-NEXT: [[VEC_SEL3]] = select i1 [[VEC_ICMP3]], i32 [[VEC_PHI3]], i32 7 -; CHECK-VF1IC4-NEXT: [[VEC_SEL4]] = select i1 [[VEC_ICMP4]], i32 [[VEC_PHI4]], i32 7 +; CHECK-VF1IC4-NEXT: [[NOT1:%.*]] = xor i1 [[VEC_ICMP1]], true +; CHECK-VF1IC4-NEXT: [[NOT2:%.*]] = xor i1 [[VEC_ICMP2]], true +; CHECK-VF1IC4-NEXT: [[NOT3:%.*]] = xor i1 [[VEC_ICMP3]], true +; CHECK-VF1IC4-NEXT: [[NOT4:%.*]] = xor i1 [[VEC_ICMP4]], true +; CHECK-VF1IC4-NEXT: [[VEC_SEL1:%.*]] = or i1 [[VEC_PHI1]], [[NOT1]] +; CHECK-VF1IC4-NEXT: [[VEC_SEL2:%.*]] = or i1 [[VEC_PHI2]], [[NOT2]] +; CHECK-VF1IC4-NEXT: [[VEC_SEL3:%.*]] = or i1 [[VEC_PHI3]], [[NOT3]] +; CHECK-VF1IC4-NEXT: [[VEC_SEL4:%.*]] = or i1 [[VEC_PHI4]], [[NOT4]] ; CHECK-VF1IC4: middle.block: -; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp ne i32 [[VEC_SEL1]], 3 -; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = select i1 [[VEC_ICMP4]], i32 [[VEC_SEL1]], i32 [[VEC_SEL2]] -; CHECK-VF1IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne i32 [[VEC_SEL5]], 3 -; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = select i1 [[VEC_ICMP5]], i32 [[VEC_SEL5]], i32 [[VEC_SEL3]] -; CHECK-VF1IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne i32 [[VEC_SEL6]], 3 -; CHECK-VF1IC4-NEXT: {{.*}} = select i1 [[VEC_ICMP6]], i32 [[VEC_SEL6]], i32 [[VEC_SEL4]] +; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = or i1 [[VEC_SEL2]], [[VEC_SEL1]] +; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = or i1 [[VEC_SEL3]], [[VEC_SEL5]] +; CHECK-VF1IC4-NEXT: [[OR_RDX:%.*]] = or i1 [[VEC_SEL4]], [[VEC_SEL6]] +; CHECK-VF1IC4-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF1IC4-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 entry: br label %for.body @@ -86,14 +91,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_icmp2 ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> , <4 x i32> [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[VEC_ICMP]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 entry: br label %for.body @@ -117,21 +122,18 @@ exit: ; preds = %for.body define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) { ; CHECK-LABEL: @select_i32_from_icmp ; CHECK-VF4IC1: vector.ph: -; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 -; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 -; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NOT: shufflevector <4 x i32> +; CHECK-VF4IC1-NOT: shufflevector <4 x i32> ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]] +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 -; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]] -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a entry: br label %for.body @@ -154,14 +156,15 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_fcmp_fast ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> ; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_FCMP]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 entry: br label %for.body @@ -184,14 +187,15 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> ; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_FCMP]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 entry: br label %for.body @@ -216,18 +220,16 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC1: vector.ph: ; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 ; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 -; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_PHI]], -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], +; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 -; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]] -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]]) -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) +; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll index 16ab45415b5cc..55e61158a79c6 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll @@ -8,26 +8,25 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ undef, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[NOT:%*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], undef -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 undef +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) +; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 undef ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -72,26 +71,25 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ poison, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], poison -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 poison +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) +; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 poison ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -136,30 +134,25 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <2 x i64> [[MINMAX_IDENT_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 [[START]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) +; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 [[START]] ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 3bf9e5b5dd037..7f8ad1db456cd 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -24,7 +24,6 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[TMP5]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> , <2 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0 @@ -114,7 +113,6 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], ; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP3]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i16> [[WIDE_LOAD]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP10]], <2 x i16> , <2 x i16> [[PREDPHI]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP2]] @@ -198,7 +196,6 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) { ; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP3]] @@ -309,7 +306,6 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP15]], ; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP17]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP2]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP18]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP19]], <2 x i16> , <2 x i16> [[PREDPHI]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP1]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index c21b4d45e9a08..71eed3b2985d4 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -14,7 +14,6 @@ define void @blend_uniform_iv_trunc(i1 %c) { ; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP1]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT1]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[MASK1]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[MASK1]], <4 x i16> [[BROADCAST_SPLAT2]], <4 x i16> undef ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[PREDPHI]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i16 [[TMP4]] @@ -58,7 +57,6 @@ define void @blend_uniform_iv(i1 %c) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[MASK1]], ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[MASK1]], <4 x i64> [[BROADCAST_SPLAT2]], <4 x i64> undef ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP2]] @@ -104,7 +102,6 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[MASK1]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_IND]], <4 x i64> undef -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[MASK1]], ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP8]], <4 x i64> [[PREDPHI]], <4 x i64> undef ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[PREDPHI1]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll new file mode 100644 index 0000000000000..c622925510dd4 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" + +; Test cases for https://github.com/llvm/llvm-project/issues/87410. +define void @test_not_first_lane_only_constant(ptr %A, ptr noalias %B) { +; CHECK-LABEL: define void @test_not_first_lane_only_constant( +; CHECK-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[TMP0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> zeroinitializer, <4 x ptr> poison, <4 x ptr> [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[PREDPHI]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT5]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: store <4 x i16> [[BROADCAST_SPLAT6]], ptr [[TMP2]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]] +; CHECK-NEXT: br i1 false, label [[LOOP_LATCH]], label [[ELSE_1:%.*]] +; CHECK: else.1: +; CHECK-NEXT: br i1 false, label [[THEN_2:%.*]], label [[ELSE_2:%.*]] +; CHECK: then.2: +; CHECK-NEXT: br label [[ELSE_2]] +; CHECK: else.2: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[MERGE:%.*]] = phi ptr [ [[B]], [[ELSE_2]] ], [ poison, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[MERGE]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; CHECK-NEXT: store i16 [[L]], ptr [[GEP_A]], align 2 +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.A = getelementptr inbounds i16, ptr %A, i16 %iv + br i1 false, label %loop.latch, label %else.1 + +else.1: + br i1 false, label %then.2, label %else.2 + +then.2: + br label %else.2 + +else.2: + br label %loop.latch + +loop.latch: + %merge = phi ptr [ %B, %else.2 ], [ poison, %loop.header ] + %l = load i16, ptr %merge, align 2 + %iv.next = add i16 %iv, 1 + store i16 %l, ptr %gep.A + %c.2 = icmp eq i16 %iv.next, 1000 + br i1 %c.2, label %exit, label %loop.header + +exit: + ret void +} + +define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 %x, i16 %y) { +; CHECK-LABEL: define void @test_not_first_lane_only_wide_compare( +; CHECK-SAME: ptr [[A:%.*]], ptr noalias [[B:%.*]], i16 [[X:%.*]], i16 [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x ptr> poison, <4 x ptr> [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[PREDPHI]], i32 0 +; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 2 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT5]], <4 x i16> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: store <4 x i16> [[BROADCAST_SPLAT6]], ptr [[TMP2]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[IV]] +; CHECK-NEXT: [[L_0:%.*]] = load i16, ptr [[GEP_A]], align 2 +; CHECK-NEXT: [[C_0:%.*]] = icmp ult i16 [[L_0]], [[X]] +; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[ELSE_1:%.*]] +; CHECK: else.1: +; CHECK-NEXT: [[C_1:%.*]] = icmp ult i16 [[L_0]], [[Y]] +; CHECK-NEXT: br i1 [[C_1]], label [[THEN_2:%.*]], label [[ELSE_2:%.*]] +; CHECK: then.2: +; CHECK-NEXT: br label [[ELSE_2]] +; CHECK: else.2: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[MERGE:%.*]] = phi ptr [ [[B]], [[ELSE_2]] ], [ poison, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[MERGE]], align 2 +; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; CHECK-NEXT: store i16 [[L]], ptr [[GEP_A]], align 2 +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.A = getelementptr inbounds i16, ptr %A, i16 %iv + %l.0 = load i16, ptr %gep.A + %c.0 = icmp ult i16 %l.0, %x + br i1 %c.0, label %loop.latch, label %else.1 + +else.1: + %c.1 = icmp ult i16 %l.0, %y + br i1 %c.1, label %then.2, label %else.2 + +then.2: + br label %else.2 + +else.2: + br label %loop.latch + +loop.latch: + %merge = phi ptr [ %B, %else.2 ], [ poison, %loop.header ] + %l = load i16, ptr %merge, align 2 + %iv.next = add i16 %iv, 1 + store i16 %l, ptr %gep.A + %c.2 = icmp eq i16 %iv.next, 1000 + br i1 %c.2, label %exit, label %loop.header + +exit: + ret void +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll new file mode 100644 index 0000000000000..a90b38c6a9605 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on -S < %s | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-width=4 \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on -S < %s | FileCheck --check-prefix=NO-VP %s + +; The target does not support predicated vectorization. +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL-LABEL: @foo( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @foo( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], 4 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP8]] +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP14:%.*]] = mul i64 [[TMP1]], 4 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP14]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP15:%.*]] = mul i64 [[TMP2]], 4 +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 +; NO-VP-NEXT: [[TMP16:%.*]] = add nsw [[WIDE_LOAD1]], [[WIDE_LOAD]] +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; NO-VP-NEXT: store [[TMP16]], ptr [[TMP10]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP: scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vplan-force-tail-with-evl.ll new file mode 100644 index 0000000000000..f510d47d06e36 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vplan-force-tail-with-evl.ll @@ -0,0 +1,37 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl -force-vector-width=4 \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on \ +; RUN: -disable-output < %s 2>&1 | FileCheck --check-prefixes=NO-VP %s + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on \ +; RUN: -disable-output < %s 2>&1 | FileCheck --check-prefixes=NO-VP %s + +; The target does not support predicated vectorization. +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; NO-VP-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 89178953010fe..7056bbe6ba1b7 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -246,8 +246,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) { ; CHECK-NEXT: Successor(s): if.then.0 ; CHECK-EMPTY: ; CHECK-NEXT: if.then.0: -; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%cmp> -; CHECK-NEXT: BLEND ir<%d> = ir<0>/vp<[[NOT]]> vp<[[PRED]]>/ir<%cmp> +; CHECK-NEXT: BLEND ir<%d> = ir<0> vp<[[PRED]]>/ir<%cmp> ; CHECK-NEXT: CLONE ir<%idx> = getelementptr ir<%x>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%idx> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%d> @@ -455,7 +454,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db ; CHECK-NEXT: if.then.0: ; CHECK-NEXT: EMIT vp<[[NOT2:%.+]]> = not ir<%cmp2> ; CHECK-NEXT: EMIT vp<[[SEL2:%.+]]> = select vp<[[NOT1]]>, vp<[[NOT2]]>, ir -; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]>/vp<[[OR1]]> ir<%psd>/vp<[[SEL2]]> +; CHECK-NEXT: BLEND ir<%ysd.0> = vp<[[PHI]]> ir<%psd>/vp<[[SEL2]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%isd> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%ysd.0> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> @@ -748,8 +747,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) { ; CHECK-EMPTY: ; CHECK-NEXT: if.then.1: ; CHECK-NEXT: WIDEN ir<%fadd> = fadd vp<[[PHI1]]>, vp<[[PHI2]]> -; CHECK-NEXT: EMIT vp<[[NOT_COND:%.+]]> = not ir<%ifcond> -; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value>/vp<[[NOT_COND]]> ir<%fadd>/ir<%ifcond> +; CHECK-NEXT: BLEND ir<%st.value> = ir<%ld.value> ir<%fadd>/ir<%ifcond> ; CHECK-NEXT: CLONE ir<%st.addr> = getelementptr inbounds ir<%dest>, vp<[[STEPS]]> ; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer ir<%st.addr> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%st.value> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 89b3a6da16c1f..0cacb02dc4891 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -364,7 +364,7 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.1> ; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir ; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]> -; CHECK-NEXT: BLEND ir<%p> = ir<0>/vp<[[MASK3]]> vp<[[PRED]]>/vp<[[MASK2]]> +; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]> ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: ; CHECK-NEXT: pred.store: { @@ -465,7 +465,7 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0> ; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir ; CHECK-NEXT: EMIT vp<[[OR:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]> -; CHECK-NEXT: BLEND ir<%p> = ir<0>/vp<[[MASK3]]> vp<[[PRED]]>/vp<[[MASK2]]> +; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]> ; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = select vp<[[OR]]>, ir<%c.1>, ir ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: @@ -573,7 +573,7 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: EMIT vp<[[NOT:%.+]]> = not ir<%c.0> ; CHECK-NEXT: EMIT vp<[[MASK3:%.+]]> = select vp<[[MASK1]]>, vp<[[NOT]]>, ir ; CHECK-NEXT: EMIT vp<[[MASK4:%.+]]> = or vp<[[MASK2]]>, vp<[[MASK3]]> -; CHECK-NEXT: BLEND ir<%p> = ir<0>/vp<[[MASK3]]> vp<[[PRED]]>/vp<[[MASK2]]> +; CHECK-NEXT: BLEND ir<%p> = ir<0> vp<[[PRED]]>/vp<[[MASK2]]> ; CHECK-NEXT: EMIT vp<[[MASK5:%.+]]> = select vp<[[MASK4]]>, ir<%c.0>, ir ; CHECK-NEXT: Successor(s): pred.store ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/overlapping-contexts.ll b/llvm/test/Transforms/MemProfContextDisambiguation/overlapping-contexts.ll new file mode 100644 index 0000000000000..7fe9dc96921c6 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/overlapping-contexts.ll @@ -0,0 +1,232 @@ +;; This test ensures that the logic which assigns calls to stack nodes +;; correctly handles cloning of a callsite for a trimmed cold context +;; that partially overlaps with a longer context for a different allocation. + +;; The profile data and call stacks were all manually added, but the code +;; would be structured something like the following (fairly contrived to +;; result in the type of control flow needed to test): + +;; void A(bool b) { +;; if (b) +;; // cold: stack ids 10, 12, 13, 15 (trimmed ids 19, 20) +;; // not cold: stack ids 10, 12, 13, 14 (trimmed id 21) +;; new char[10]; // stack id 10 +;; else +;; // not cold: stack ids 11, 12, 13, 15, 16, 17 (trimmed id 22) +;; // cold: stack ids 11, 12, 13, 15, 16, 18 (trimmed id 23) +;; new char[10]; // stack id 11 +;; } +;; +;; void X(bool b) { +;; A(b); // stack ids 12 +;; } +;; +;; void B(bool b) { +;; X(b); // stack id 13 +;; } +;; +;; void D() { +;; B(true); // stack id 14 +;; } +;; +;; void C(bool b) { +;; B(b); // stack id 15 +;; } +;; +;; void E(bool b) { +;; C(b); // stack id 16 +;; } +;; +;; void F() { +;; E(false); // stack id 17 +;; } +;; +;; void G() { +;; E(false); // stack id 18 +;; } +;; +;; void M() { +;; C(true); // stack id 19 +;; } +;; +;; int main() { +;; D(); // stack id 20 (leads to not cold allocation) +;; M(); // stack id 21 (leads to cold allocation) +;; F(); // stack id 22 (leads to not cold allocation) +;; G(); // stack id 23 (leads to cold allocation) +;; } + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \ +; RUN: --check-prefix=STATS --check-prefix=REMARKS + +; REMARKS: created clone _Z1Ab.memprof.1 +; REMARKS: created clone _Z1Xb.memprof.1 +; REMARKS: created clone _Z1Bb.memprof.1 +; REMARKS: created clone _Z1Cb.memprof.1 +; REMARKS: created clone _Z1Eb.memprof.1 +; REMARKS: call in clone _Z1Gv assigned to call function clone _Z1Eb.memprof.1 +; REMARKS: call in clone _Z1Eb.memprof.1 assigned to call function clone _Z1Cb.memprof.1 +;; If we don't perform cloning for each allocation separately, we will miss +;; cloning _Z1Cb for the trimmed cold allocation context leading to the +;; allocation at stack id 10. +; REMARKS: call in clone _Z1Cb.memprof.1 assigned to call function clone _Z1Bb.memprof.1 +; REMARKS: call in clone _Z1Fv assigned to call function clone _Z1Eb +; REMARKS: call in clone _Z1Eb assigned to call function clone _Z1Cb +; REMARKS: call in clone _Z1Cb assigned to call function clone _Z1Bb.memprof.1 +; REMARKS: call in clone _Z1Bb.memprof.1 assigned to call function clone _Z1Xb.memprof.1 +; REMARKS: call in clone _Z1Xb.memprof.1 assigned to call function clone _Z1Ab.memprof.1 +; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold +; REMARKS: call in clone _Z1Bb.memprof.1 assigned to call function clone _Z1Xb +; REMARKS: call in clone _Z1Dv assigned to call function clone _Z1Bb +; REMARKS: call in clone _Z1Bb assigned to call function clone _Z1Xb +; REMARKS: call in clone _Z1Xb assigned to call function clone _Z1Ab +; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold +; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold +; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @_Z1Ab(i1 noundef zeroext %b) { +entry: + br i1 %b, label %if.then, label %if.else + +if.then: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !10 + br label %if.end + +if.else: + %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !5, !callsite !11 + br label %if.end + +if.end: + ret void +} + +; Function Attrs: nobuiltin +declare ptr @_Znam(i64) #0 + +define dso_local void @_Z1Xb(i1 noundef zeroext %b) { +entry: + tail call void @_Z1Ab(i1 noundef zeroext %b), !callsite !12 + ret void +} + +define dso_local void @_Z1Bb(i1 noundef zeroext %b) { +entry: + tail call void @_Z1Xb(i1 noundef zeroext %b), !callsite !13 + ret void +} + +define dso_local void @_Z1Dv() { +entry: + tail call void @_Z1Bb(i1 noundef zeroext true), !callsite !14 + ret void +} + +define dso_local void @_Z1Cb(i1 noundef zeroext %b) { +entry: + tail call void @_Z1Bb(i1 noundef zeroext %b), !callsite !15 + ret void +} + +define dso_local void @_Z1Eb(i1 noundef zeroext %b) { +entry: + tail call void @_Z1Cb(i1 noundef zeroext %b), !callsite !16 + ret void +} + +define dso_local void @_Z1Fv() { +entry: + tail call void @_Z1Eb(i1 noundef zeroext false), !callsite !17 + ret void +} + +define dso_local void @_Z1Gv() { +entry: + tail call void @_Z1Eb(i1 noundef zeroext false), !callsite !18 + ret void +} + +define dso_local void @_Z1Mv() { +entry: + tail call void @_Z1Cb(i1 noundef zeroext true), !callsite !19 + ret void +} + +define dso_local noundef i32 @main() local_unnamed_addr { +entry: + tail call void @_Z1Dv(), !callsite !20 ;; Not cold context + tail call void @_Z1Mv(), !callsite !21 ;; Cold context + tail call void @_Z1Fv(), !callsite !22 ;; Not cold context + tail call void @_Z1Gv(), !callsite !23 ;; Cold context + ret i32 0 +} + +attributes #0 = { nobuiltin } +attributes #7 = { builtin } + +!0 = !{!1, !3} +;; Cold (trimmed) context via call to _Z1Dv in main +!1 = !{!2, !"cold"} +!2 = !{i64 10, i64 12, i64 13, i64 15} +;; Not cold (trimmed) context via call to _Z1Mv in main +!3 = !{!4, !"notcold"} +!4 = !{i64 10, i64 12, i64 13, i64 14} +!5 = !{!6, !8} +;; Not cold (trimmed) context via call to _Z1Fv in main +!6 = !{!7, !"notcold"} +!7 = !{i64 11, i64 12, i64 13, i64 15, i64 16, i64 17} +;; Cold (trimmed) context via call to _Z1Gv in main +!8 = !{!9, !"cold"} +!9 = !{i64 11, i64 12, i64 13, i64 15, i64 16, i64 18} +!10 = !{i64 10} +!11 = !{i64 11} +!12 = !{i64 12} +!13 = !{i64 13} +!14 = !{i64 14} +!15 = !{i64 15} +!16 = !{i64 16} +!17 = !{i64 17} +!18 = !{i64 18} +!19 = !{i64 19} +!20 = !{i64 20} +!21 = !{i64 21} +!22 = !{i64 22} +!23 = !{i64 23} + +; IR: define {{.*}} @_Z1Cb(i1 noundef zeroext %b) +; IR-NEXT: entry: +; IR-NEXT: call {{.*}} @_Z1Bb.memprof.1(i1 noundef zeroext %b) + +; IR: define {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext %b) +; IR-NEXT: entry: +; IR-NEXT: br i1 %b, label %if.then, label %if.else +; IR-EMPTY: +; IR-NEXT: if.then: +; IR-NEXT: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] +; IR-NEXT: br label %if.end +; IR-EMPTY: +; IR-NEXT: if.else: +; IR-NEXT: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]] + +; IR: define {{.*}} @_Z1Xb.memprof.1(i1 noundef zeroext %b) +; IR-NEXT: entry: +; IR-NEXT: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext %b) + +; IR: define {{.*}} @_Z1Bb.memprof.1(i1 noundef zeroext %b) +; IR-NEXT: entry: +; IR-NEXT: call {{.*}} @_Z1Xb.memprof.1(i1 noundef zeroext %b) + +; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } + +; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) +; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) +; STATS: 5 memprof-context-disambiguation - Number of function clones created during whole program analysis diff --git a/llvm/test/Transforms/RemoveTraps/remove-traps.ll b/llvm/test/Transforms/RemoveTraps/remove-traps.ll deleted file mode 100644 index 71549e7d9b412..0000000000000 --- a/llvm/test/Transforms/RemoveTraps/remove-traps.ll +++ /dev/null @@ -1,397 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -passes='function(remove-traps)' -S | FileCheck %s --check-prefixes=NOPROFILE -; RUN: opt < %s -passes='function(remove-traps)' -remove-traps-random-rate=1 -S | FileCheck %s --check-prefixes=ALL -; RUN: opt < %s -passes='require,function(remove-traps)' -S | FileCheck %s --check-prefixes=HOT -; RUN: opt < %s -passes='require,function(remove-traps)' -remove-traps-percentile-cutoff-hot=700000 -S | FileCheck %s --check-prefixes=HOT70 - -target triple = "x86_64-pc-linux-gnu" - -declare void @llvm.ubsantrap(i8 immarg) - -define dso_local noundef i32 @simple(ptr noundef readonly %0) { -; NOPROFILE-LABEL: define dso_local noundef i32 @simple( -; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) { -; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; NOPROFILE: 3: -; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) -; NOPROFILE-NEXT: unreachable -; NOPROFILE: 4: -; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; NOPROFILE-NEXT: ret i32 [[TMP5]] -; -; ALL-LABEL: define dso_local noundef i32 @simple( -; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) { -; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; ALL: 3: -; ALL-NEXT: unreachable -; ALL: 4: -; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; ALL-NEXT: ret i32 [[TMP5]] -; -; HOT-LABEL: define dso_local noundef i32 @simple( -; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) { -; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT: 3: -; HOT-NEXT: tail call void @llvm.ubsantrap(i8 22) -; HOT-NEXT: unreachable -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT-NEXT: ret i32 [[TMP5]] -; -; HOT70-LABEL: define dso_local noundef i32 @simple( -; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) { -; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT70: 3: -; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) -; HOT70-NEXT: unreachable -; HOT70: 4: -; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT70-NEXT: ret i32 [[TMP5]] -; - %2 = icmp eq ptr %0, null - br i1 %2, label %3, label %4 - -3: - tail call void @llvm.ubsantrap(i8 22) - unreachable - -4: - %5 = load i32, ptr %0, align 4 - ret i32 %5 -} - - -define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { -; NOPROFILE-LABEL: define dso_local noundef i32 @hot( -; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { -; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; NOPROFILE: 3: -; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) -; NOPROFILE-NEXT: unreachable -; NOPROFILE: 4: -; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; NOPROFILE-NEXT: ret i32 [[TMP5]] -; -; ALL-LABEL: define dso_local noundef i32 @hot( -; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { -; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; ALL: 3: -; ALL-NEXT: unreachable -; ALL: 4: -; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; ALL-NEXT: ret i32 [[TMP5]] -; -; HOT-LABEL: define dso_local noundef i32 @hot( -; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { -; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT: 3: -; HOT-NEXT: unreachable -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT-NEXT: ret i32 [[TMP5]] -; -; HOT70-LABEL: define dso_local noundef i32 @hot( -; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { -; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT70: 3: -; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) -; HOT70-NEXT: unreachable -; HOT70: 4: -; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT70-NEXT: ret i32 [[TMP5]] -; - %2 = icmp eq ptr %0, null - br i1 %2, label %3, label %4 - -3: - tail call void @llvm.ubsantrap(i8 22) - unreachable - -4: - %5 = load i32, ptr %0, align 4 - ret i32 %5 -} - -define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { -; NOPROFILE-LABEL: define dso_local noundef i32 @veryHot( -; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { -; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; NOPROFILE: 3: -; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) -; NOPROFILE-NEXT: unreachable -; NOPROFILE: 4: -; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; NOPROFILE-NEXT: ret i32 [[TMP5]] -; -; ALL-LABEL: define dso_local noundef i32 @veryHot( -; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { -; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; ALL: 3: -; ALL-NEXT: unreachable -; ALL: 4: -; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; ALL-NEXT: ret i32 [[TMP5]] -; -; HOT-LABEL: define dso_local noundef i32 @veryHot( -; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { -; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT: 3: -; HOT-NEXT: unreachable -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT-NEXT: ret i32 [[TMP5]] -; -; HOT70-LABEL: define dso_local noundef i32 @veryHot( -; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { -; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT70: 3: -; HOT70-NEXT: unreachable -; HOT70: 4: -; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT70-NEXT: ret i32 [[TMP5]] -; - %2 = icmp eq ptr %0, null - br i1 %2, label %3, label %4 - -3: - tail call void @llvm.ubsantrap(i8 22) - unreachable - -4: - %5 = load i32, ptr %0, align 4 - ret i32 %5 -} - - -define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readonly %1) !prof !39 { -; NOPROFILE-LABEL: define dso_local noundef i32 @branchColdFnHot( -; NOPROFILE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { -; NOPROFILE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] -; NOPROFILE: 4: -; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; NOPROFILE-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; NOPROFILE: 6: -; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) -; NOPROFILE-NEXT: unreachable -; NOPROFILE: 7: -; NOPROFILE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; NOPROFILE-NEXT: br label [[TMP9]] -; NOPROFILE: 9: -; NOPROFILE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; NOPROFILE-NEXT: ret i32 [[TMP10]] -; -; ALL-LABEL: define dso_local noundef i32 @branchColdFnHot( -; ALL-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { -; ALL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; ALL-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] -; ALL: 4: -; ALL-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; ALL-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; ALL: 6: -; ALL-NEXT: unreachable -; ALL: 7: -; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; ALL-NEXT: br label [[TMP9]] -; ALL: 9: -; ALL-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; ALL-NEXT: ret i32 [[TMP10]] -; -; HOT-LABEL: define dso_local noundef i32 @branchColdFnHot( -; HOT-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { -; HOT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; HOT-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; HOT: 6: -; HOT-NEXT: tail call void @llvm.ubsantrap(i8 22) -; HOT-NEXT: unreachable -; HOT: 7: -; HOT-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; HOT-NEXT: br label [[TMP9]] -; HOT: 9: -; HOT-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; HOT-NEXT: ret i32 [[TMP10]] -; -; HOT70-LABEL: define dso_local noundef i32 @branchColdFnHot( -; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { -; HOT70-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] -; HOT70: 4: -; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT70-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; HOT70: 6: -; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) -; HOT70-NEXT: unreachable -; HOT70: 7: -; HOT70-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; HOT70-NEXT: br label [[TMP9]] -; HOT70: 9: -; HOT70-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; HOT70-NEXT: ret i32 [[TMP10]] -; - %3 = icmp eq i32 %0, 0 - br i1 %3, label %9, label %4, !prof !38 - -4: - %5 = icmp eq ptr %1, null - br i1 %5, label %6, label %7 - -6: - tail call void @llvm.ubsantrap(i8 22) #2 - unreachable - -7: - %8 = load i32, ptr %1, align 4 - br label %9 - -9: - %10 = phi i32 [ %8, %7 ], [ 0, %2 ] - ret i32 %10 -} - -define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readonly %1) !prof !36 { -; NOPROFILE-LABEL: define dso_local noundef i32 @branchHotFnCold( -; NOPROFILE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { -; NOPROFILE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] -; NOPROFILE: 4: -; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; NOPROFILE-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; NOPROFILE: 6: -; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) -; NOPROFILE-NEXT: unreachable -; NOPROFILE: 7: -; NOPROFILE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; NOPROFILE-NEXT: br label [[TMP9]] -; NOPROFILE: 9: -; NOPROFILE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; NOPROFILE-NEXT: ret i32 [[TMP10]] -; -; ALL-LABEL: define dso_local noundef i32 @branchHotFnCold( -; ALL-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { -; ALL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; ALL-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] -; ALL: 4: -; ALL-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; ALL-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; ALL: 6: -; ALL-NEXT: unreachable -; ALL: 7: -; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; ALL-NEXT: br label [[TMP9]] -; ALL: 9: -; ALL-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; ALL-NEXT: ret i32 [[TMP10]] -; -; HOT-LABEL: define dso_local noundef i32 @branchHotFnCold( -; HOT-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { -; HOT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; HOT-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; HOT: 6: -; HOT-NEXT: unreachable -; HOT: 7: -; HOT-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; HOT-NEXT: br label [[TMP9]] -; HOT: 9: -; HOT-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; HOT-NEXT: ret i32 [[TMP10]] -; -; HOT70-LABEL: define dso_local noundef i32 @branchHotFnCold( -; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { -; HOT70-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] -; HOT70: 4: -; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT70-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; HOT70: 6: -; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) -; HOT70-NEXT: unreachable -; HOT70: 7: -; HOT70-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; HOT70-NEXT: br label [[TMP9]] -; HOT70: 9: -; HOT70-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; HOT70-NEXT: ret i32 [[TMP10]] -; - %3 = icmp eq i32 %0, 0 - br i1 %3, label %9, label %4, !prof !37 - -4: - %5 = icmp eq ptr %1, null - br i1 %5, label %6, label %7 - -6: - tail call void @llvm.ubsantrap(i8 22) #2 - unreachable - -7: - %8 = load i32, ptr %1, align 4 - br label %9 - -9: - %10 = phi i32 [ %8, %7 ], [ 0, %2 ] - ret i32 %10 -} - -!llvm.module.flags = !{!6} -!6 = !{i32 1, !"ProfileSummary", !7} -!7 = !{!8, !9, !10, !11, !12, !13, !14, !17} -!8 = !{!"ProfileFormat", !"InstrProf"} -!9 = !{!"TotalCount", i64 30000} -!10 = !{!"MaxCount", i64 10000} -!11 = !{!"MaxInternalCount", i64 10000} -!12 = !{!"MaxFunctionCount", i64 10000} -!13 = !{!"NumCounts", i64 3} -!14 = !{!"NumFunctions", i64 5} -!17 = !{!"DetailedSummary", !18} -!18 = !{!19, !29, !30, !32, !34} -!19 = !{i32 10000, i64 10000, i32 3} -!29 = !{i32 950000, i64 5000, i32 3} -!30 = !{i32 990000, i64 500, i32 4} -!32 = !{i32 999900, i64 250, i32 4} -!34 = !{i32 999999, i64 1, i32 6} - -!36 = !{!"function_entry_count", i64 1000} -!39 = !{!"function_entry_count", i64 7000} - -!37 = !{!"branch_weights", i32 1, i32 1000} -!38 = !{!"branch_weights", i32 1000, i32 1} - -;. -; NOPROFILE: [[PROF16]] = !{!"function_entry_count", i64 1000} -; NOPROFILE: [[PROF17]] = !{!"function_entry_count", i64 7000} -; NOPROFILE: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} -; NOPROFILE: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} -;. -; ALL: [[PROF16]] = !{!"function_entry_count", i64 1000} -; ALL: [[PROF17]] = !{!"function_entry_count", i64 7000} -; ALL: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} -; ALL: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} -;. -; HOT: [[PROF16]] = !{!"function_entry_count", i64 1000} -; HOT: [[PROF17]] = !{!"function_entry_count", i64 7000} -; HOT: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} -; HOT: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} -;. -; HOT70: [[PROF16]] = !{!"function_entry_count", i64 1000} -; HOT70: [[PROF17]] = !{!"function_entry_count", i64 7000} -; HOT70: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} -; HOT70: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} -;. diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll index 44542f32bf145..d2711d0546c0a 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll @@ -81,8 +81,9 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i64> [[TMP4FT_0_LCSSA]], <2 x i64> [[TMP4TF_0_LCSSA]], <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP4FF_0_LCSSA]], <2 x i64> [[TMP4TT_0_LCSSA]], <2 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], <4 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i64> [[TMP12]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = trunc <4 x i64> [[TMP16]] to <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP12]] to <4 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], [[TMP57]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[NUMBEROFBOOLS]], 127 ; CHECK-NEXT: [[CMP86284:%.*]] = icmp ugt i32 [[AND]], 31 ; CHECK-NEXT: br i1 [[CMP86284]], label [[WHILE_BODY88:%.*]], label [[WHILE_END122:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-external-user.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-external-user.ll new file mode 100644 index 0000000000000..5df6b8581dd72 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalable-external-user.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; Protect against a crash with scalable vector users + +define i1 @crash(i32 %a, i32 %b) { +; CHECK-LABEL: @crash( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV_I446:%.*]] = sext i32 [[A:%.*]] to i64 +; CHECK-NEXT: [[CMP_I618870_NOT_NOT:%.*]] = icmp ult i64 0, [[CONV_I446]] +; CHECK-NEXT: [[CONV_I401:%.*]] = sext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[CMP_I407876_NOT_NOT:%.*]] = icmp ult i64 0, [[CONV_I401]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 [[CONV_I401]]) +; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP_I618870_NOT_NOT]], i1 [[CMP_I407876_NOT_NOT]], i1 false +; CHECK-NEXT: ret i1 [[R]] +; +entry: + %conv.i446 = sext i32 %a to i64 + %cmp.i618870.not.not = icmp ult i64 0, %conv.i446 + %conv.i401 = sext i32 %b to i64 + %cmp.i407876.not.not = icmp ult i64 0, %conv.i401 + %0 = tail call @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 %conv.i401) + %r = select i1 %cmp.i618870.not.not, i1 %cmp.i407876.not.not, i1 0 + ret i1 %r +} diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 500f10659f04c..1e7eb4a416724 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -801,10 +801,20 @@ entry: define i64 @red_zext_ld_4xi64(ptr %ptr) { ; CHECK-LABEL: @red_zext_ld_4xi64( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16> -; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) -; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i64 +; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 +; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[ZEXT_2:%.*]] = zext i8 [[LD2]] to i64 +; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i64 [[ADD_1]], [[ZEXT_2]] +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 3 +; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[ZEXT_3:%.*]] = zext i8 [[LD3]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[ADD_2]], [[ZEXT_3]] ; CHECK-NEXT: ret i64 [[TMP3]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll index 2d69c7c984dcd..04d275742832e 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll @@ -8,10 +8,11 @@ define i32 @test() { ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> , i32 4) -; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = zext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1) ; CHECK-NEXT: ret i32 [[TMP6]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll index 27c9655f94d3c..82966124d3baa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll @@ -11,9 +11,7 @@ define void @test(ptr %0, i8 %1, i1 %cmp12.i) { ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[PRE:%.*]] ; CHECK: pre: -; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i8> +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP5]], <8 x i8> ) ; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i8> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]] ; CHECK-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-after-intrinsic-call-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-after-intrinsic-call-minbitwidth.ll index a05d4fdd6315b..9fa88084aaa0a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-after-intrinsic-call-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-after-intrinsic-call-minbitwidth.ll @@ -5,12 +5,14 @@ define void @test() { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> zeroinitializer, <2 x i32> zeroinitializer) -; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i32> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[ADD:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i2> @llvm.smin.v2i2(<2 x i2> zeroinitializer, <2 x i2> zeroinitializer) +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> zeroinitializer, <2 x i2> zeroinitializer, <2 x i2> [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i2> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i2> [[TMP2]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = zext i2 [[TMP3]] to i32 ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[ADD]], 0 -; CHECK-NEXT: [[ADD45:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i2> [[TMP2]], i32 0 +; CHECK-NEXT: [[ADD45:%.*]] = zext i2 [[TMP5]] to i32 ; CHECK-NEXT: [[ADD152:%.*]] = or i32 [[ADD45]], [[ADD]] ; CHECK-NEXT: [[IDXPROM153:%.*]] = sext i32 [[ADD152]] to i64 ; CHECK-NEXT: [[ARRAYIDX154:%.*]] = getelementptr i8, ptr null, i64 [[IDXPROM153]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll new file mode 100644 index 0000000000000..84f7e219f5066 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +@e = global i8 0 +@c = global i16 0 +@d = global i32 0 + +define i8 @test() { +; CHECK-LABEL: define i8 @test() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @e, align 1 +; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @c, align 2 +; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV1]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP8]]) +; CHECK-NEXT: [[CONV4_30:%.*]] = trunc i32 [[TMP11]] to i8 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7 +; CHECK-NEXT: [[XOR_31:%.*]] = and i32 [[TMP13]], -2 +; CHECK-NEXT: store i32 [[XOR_31]], ptr @d, align 4 +; CHECK-NEXT: ret i8 [[CONV4_30]] +; +entry: + %0 = load i8, ptr @e, align 1 + %conv = sext i8 %0 to i32 + %1 = load i16, ptr @c, align 2 + %conv1 = zext i16 %1 to i32 + %or.16 = or i32 %conv, 1 + %add.16 = add nsw i32 %or.16, %conv1 + %or.18 = or i32 %conv, 1 + %add.18 = add nsw i32 %or.18, %conv1 + %conv4.181 = or i32 %add.16, %add.18 + %or.20 = or i32 %conv, 1 + %add.20 = add nsw i32 %or.20, %conv1 + %conv4.202 = or i32 %conv4.181, %add.20 + %or.22 = or i32 %conv, 1 + %add.22 = add nsw i32 %or.22, %conv1 + %conv4.223 = or i32 %conv4.202, %add.22 + %or.24 = or i32 %conv, 1 + %add.24 = add nsw i32 %or.24, %conv1 + %conv4.244 = or i32 %conv4.223, %add.24 + %or.26 = or i32 %conv, 1 + %add.26 = add nsw i32 %or.26, %conv1 + %conv4.265 = or i32 %conv4.244, %add.26 + %or.28 = or i32 %conv, 1 + %add.28 = add nsw i32 %or.28, %conv1 + %conv4.286 = or i32 %conv4.265, %add.28 + %or.30 = or i32 %conv, 32769 + %add.30 = add nsw i32 %or.30, %conv1 + %conv4.307 = or i32 %conv4.286, %add.30 + %conv4.30 = trunc i32 %conv4.307 to i8 + %xor.31 = and i32 %or.30, -2 + store i32 %xor.31, ptr @d, align 4 + ret i8 %conv4.30 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll new file mode 100644 index 0000000000000..a7dbe7d0b43fe --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-vecop-vectorized.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt --passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mattr=+avx512vl | FileCheck %s + +define i32 @test() { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: br label [[TMP1:%.*]] +; CHECK: 1: +; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x double> [ zeroinitializer, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[TMP1]] ] +; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x double> zeroinitializer, <8 x double> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6]] = shufflevector <4 x double> [[TMP5]], <4 x double> , <4 x i32> +; CHECK-NEXT: br label [[TMP1]] +; + br label %1 + +1: + %.i489 = phi double [ 0.000000e+00, %0 ], [ 0.000000e+00, %1 ] + %.i1102 = phi double [ 0.000000e+00, %0 ], [ %.i1110, %1 ] + %.i4105 = phi double [ 0.000000e+00, %0 ], [ %.i4113, %1 ] + %.i14525 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i1102) + %.i24526 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i489) + %.i44529 = call double @llvm.fma.f64(double 0.000000e+00, double 0.000000e+00, double %.i4105) + %.upto16034 = insertelement <8 x double> zeroinitializer, double %.i14525, i64 1 + %.upto26035 = insertelement <8 x double> %.upto16034, double %.i24526, i64 2 + %.upto36036 = insertelement <8 x double> %.upto26035, double %.i14525, i64 3 + %.upto46037 = insertelement <8 x double> %.upto36036, double %.i44529, i64 0 + %.i1110 = extractelement <8 x double> %.upto46037, i64 0 + %.i4113 = extractelement <8 x double> zeroinitializer, i64 0 + br label %1 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll index f4a471493f1b3..55da3e5f9f37c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/int-bitcast-minbitwidth.ll @@ -7,9 +7,10 @@ define void @t(i64 %v) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP3]]) +; CHECK-NEXT: [[TMP5:%.*]] = sext i16 [[TMP4]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 65535 ; CHECK-NEXT: store i32 [[TMP6]], ptr null, align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll index 44738aa1a6747..a8d481a3e28a5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll @@ -5,17 +5,22 @@ define i32 @test() { ; CHECK-LABEL: define i32 @test() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i8, ptr null, align 1 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP5]], -; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32 +; CHECK-NEXT: [[DEC_4:%.*]] = add i8 [[A_PROMOTED]], 0 +; CHECK-NEXT: [[CONV_I_4:%.*]] = zext i8 [[DEC_4]] to i32 +; CHECK-NEXT: [[SUB_I_4:%.*]] = add nuw nsw i32 [[CONV_I_4]], 0 +; CHECK-NEXT: [[DEC_5:%.*]] = add i8 [[A_PROMOTED]], 0 +; CHECK-NEXT: [[CONV_I_5:%.*]] = zext i8 [[DEC_5]] to i32 +; CHECK-NEXT: [[SUB_I_5:%.*]] = add nuw nsw i32 [[CONV_I_5]], 65535 +; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[SUB_I_4]], [[SUB_I_5]] +; CHECK-NEXT: [[DEC_6:%.*]] = or i8 [[A_PROMOTED]], 0 +; CHECK-NEXT: [[CONV_I_6:%.*]] = zext i8 [[DEC_6]] to i32 +; CHECK-NEXT: [[SUB_I_6:%.*]] = add nuw nsw i32 [[CONV_I_6]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], [[SUB_I_6]] +; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[A_PROMOTED]], 0 +; CHECK-NEXT: [[CONV_I_7:%.*]] = zext i8 [[TMP10]] to i32 +; CHECK-NEXT: [[SUB_I_7:%.*]] = add nuw nsw i32 [[CONV_I_7]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP1]], [[SUB_I_7]] ; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 65535 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP4]], i32 3 ; CHECK-NEXT: store i8 [[TMP10]], ptr null, align 1 ; CHECK-NEXT: [[CALL3:%.*]] = tail call i32 (ptr, ...) null(ptr null, i32 [[TMP9]]) ; CHECK-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 4acd63078b82e..4af69dff179e2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-6 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-7 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s define void @test(i64 %d.promoted.i) { ; CHECK-LABEL: define void @test( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll index e8b854b7cea6c..df7312e3d2b56 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/store-abs-minbitwidth.ll @@ -13,14 +13,13 @@ define i32 @test(ptr noalias %in, ptr noalias %inn, ptr %out) { ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[TMP7]] to <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[TMP7]] to <4 x i16> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> [[TMP10]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i8> [[TMP11]] to <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = sub <4 x i32> [[TMP12]], [[TMP8]] -; CHECK-NEXT: [[TMP14:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP13]], i1 true) -; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i8> [[TMP11]] to <4 x i16> +; CHECK-NEXT: [[TMP13:%.*]] = sub <4 x i16> [[TMP12]], [[TMP8]] +; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[TMP13]], i1 false) ; CHECK-NEXT: store <4 x i16> [[TMP15]], ptr [[OUT:%.*]], align 2 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll new file mode 100644 index 0000000000000..7f086d17ca4c0 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-incoming-for-neg-icmp.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @test(i32 %a, i8 %b, i8 %c) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i32 [[A:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[C]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP4]] to <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <4 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP7]], [[A]] +; CHECK-NEXT: ret i32 [[OP_RDX]] +; +entry: + %0 = add i8 %c, -3 + %dec19 = add i8 %c, -1 + %conv20 = zext i8 %dec19 to i32 + %conv16.1 = sext i8 %b to i32 + %cmp17.1 = icmp sle i32 %conv20, %conv16.1 + %conv18.1 = zext i1 %cmp17.1 to i32 + %a.1 = add nsw i32 %conv18.1, %a + %dec19.1 = add i8 %c, -2 + %conv20.1 = zext i8 %dec19.1 to i32 + %conv16.2 = sext i8 %b to i32 + %cmp17.2 = icmp sle i32 %conv20.1, %conv16.2 + %conv18.2 = zext i1 %cmp17.2 to i32 + %a.2 = add nsw i32 %a.1, %conv18.2 + %1 = zext i8 %0 to i32 + %conv16.158 = sext i8 %b to i32 + %cmp17.159 = icmp sle i32 %1, %conv16.158 + %conv18.160 = zext i1 %cmp17.159 to i32 + %a.161 = add nsw i32 %a.2, %conv18.160 + %dec19.162 = add i8 %c, -4 + %conv20.163 = zext i8 %dec19.162 to i32 + %conv16.1.1 = sext i8 %b to i32 + %cmp17.1.1 = icmp sle i32 %conv20.163, %conv16.1.1 + %conv18.1.1 = zext i1 %cmp17.1.1 to i32 + %a.1.1 = add nsw i32 %a.161, %conv18.1.1 + ret i32 %a.1.1 +} + diff --git a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll index 4a4c94098ab94..887d182016818 100644 --- a/llvm/test/Transforms/SimplifyCFG/HoistCode.ll +++ b/llvm/test/Transforms/SimplifyCFG/HoistCode.ll @@ -125,6 +125,37 @@ F: ret i32 %z2 } + +define float @hoist_uitofp_flags_preserve(i1 %C, i8 %x) { +; CHECK-LABEL: @hoist_uitofp_flags_preserve( +; CHECK-NEXT: common.ret: +; CHECK-NEXT: [[Z1:%.*]] = uitofp nneg i8 [[X:%.*]] to float +; CHECK-NEXT: ret float [[Z1]] +; + br i1 %C, label %T, label %F +T: + %z1 = uitofp nneg i8 %x to float + ret float %z1 +F: + %z2 = uitofp nneg i8 %x to float + ret float %z2 +} + +define float @hoist_uitofp_flags_drop(i1 %C, i8 %x) { +; CHECK-LABEL: @hoist_uitofp_flags_drop( +; CHECK-NEXT: common.ret: +; CHECK-NEXT: [[Z1:%.*]] = uitofp i8 [[X:%.*]] to float +; CHECK-NEXT: ret float [[Z1]] +; + br i1 %C, label %T, label %F +T: + %z1 = uitofp nneg i8 %x to float + ret float %z1 +F: + %z2 = uitofp i8 %x to float + ret float %z2 +} + define i32 @hoist_or_flags_preserve(i1 %C, i32 %x, i32 %y) { ; CHECK-LABEL: @hoist_or_flags_preserve( ; CHECK-NEXT: common.ret: diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll index d86e44f199b39..0e005ae75ef5a 100644 --- a/llvm/test/Transforms/Util/add-TLI-mappings.ll +++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -46,15 +46,21 @@ ; SLEEFGNUABI-SAME: ptr @_ZGVsNxvl4l4_sincospif, ; SLEEFGNUABI_SAME; ptr @_ZGVnN4v_log10f, ; SLEEFGNUABI-SAME: ptr @_ZGVsMxv_log10f -; ARMPL-SAME: [10 x ptr] [ +; ARMPL-SAME: [16 x ptr] [ ; ARMPL-SAME: ptr @armpl_vmodfq_f64, +; ARMPL-SAME: ptr @armpl_svmodf_f64_x, ; ARMPL-SAME: ptr @armpl_vmodfq_f32, +; ARMPL-SAME: ptr @armpl_svmodf_f32_x, ; ARMPL-SAME: ptr @armpl_vsinq_f64, ; ARMPL-SAME: ptr @armpl_svsin_f64_x, ; ARMPL-SAME: ptr @armpl_vsincosq_f64, +; ARMPL-SAME: ptr @armpl_svsincos_f64_x, ; ARMPL-SAME: ptr @armpl_vsincosq_f32, +; ARMPL-SAME: ptr @armpl_svsincos_f32_x, ; ARMPL-SAME: ptr @armpl_vsincospiq_f64, +; ARMPL-SAME: ptr @armpl_svsincospi_f64_x, ; ARMPL-SAME: ptr @armpl_vsincospiq_f32, +; ARMPL-SAME: ptr @armpl_svsincospi_f32_x, ; ARMPL-SAME: ptr @armpl_vlog10q_f32, ; ARMPL-SAME: ptr @armpl_svlog10_f32_x ; COMMON-SAME: ], section "llvm.metadata" @@ -195,13 +201,19 @@ declare float @llvm.log10.f32(float) #0 ; SLEEFGNUABI: declare @_ZGVsMxv_log10f(, ) ; ARMPL: declare <2 x double> @armpl_vmodfq_f64(<2 x double>, ptr) +; ARMPL: declare @armpl_svmodf_f64_x(, ptr, ) ; ARMPL: declare <4 x float> @armpl_vmodfq_f32(<4 x float>, ptr) +; ARMPL: declare @armpl_svmodf_f32_x(, ptr, ) ; ARMPL: declare <2 x double> @armpl_vsinq_f64(<2 x double>) ; ARMPL: declare @armpl_svsin_f64_x(, ) ; ARMPL: declare void @armpl_vsincosq_f64(<2 x double>, ptr, ptr) +; ARMPL: declare void @armpl_svsincos_f64_x(, ptr, ptr, ) ; ARMPL: declare void @armpl_vsincosq_f32(<4 x float>, ptr, ptr) +; ARMPL: declare void @armpl_svsincos_f32_x(, ptr, ptr, ) ; ARMPL: declare void @armpl_vsincospiq_f64(<2 x double>, ptr, ptr) +; ARMPL: declare void @armpl_svsincospi_f64_x(, ptr, ptr, ) ; ARMPL: declare void @armpl_vsincospiq_f32(<4 x float>, ptr, ptr) +; ARMPL: declare void @armpl_svsincospi_f32_x(, ptr, ptr, ) ; ARMPL: declare <4 x float> @armpl_vlog10q_f32(<4 x float>) ; ARMPL: declare @armpl_svlog10_f32_x(, ) @@ -255,20 +267,26 @@ attributes #0 = { nounwind readnone } ; SLEEFGNUABI-SAME: _ZGVsMxv_llvm.log10.f32(_ZGVsMxv_log10f)" } ; ARMPL: attributes #[[MODF]] = { "vector-function-abi-variant"= -; ARMPL-SAME: "_ZGV_LLVM_N2vl8_modf(armpl_vmodfq_f64)" } +; ARMPL-SAME: "_ZGV_LLVM_N2vl8_modf(armpl_vmodfq_f64), +; ARMPL-SAME: _ZGVsMxvl8_modf(armpl_svmodf_f64_x)" } ; ARMPL: attributes #[[MODFF]] = { "vector-function-abi-variant"= -; ARMPL-SAME: "_ZGV_LLVM_N4vl4_modff(armpl_vmodfq_f32)" } +; ARMPL-SAME: "_ZGV_LLVM_N4vl4_modff(armpl_vmodfq_f32), +; ARMPL-SAME: _ZGVsMxvl4_modff(armpl_svmodf_f32_x)" } ; ARMPL: attributes #[[SIN]] = { "vector-function-abi-variant"= ; ARMPL-SAME: "_ZGV_LLVM_N2v_sin(armpl_vsinq_f64), ; ARMPL-SAME _ZGVsMxv_sin(armpl_svsin_f64_x)" } ; ARMPL: attributes #[[SINCOS]] = { "vector-function-abi-variant"= -; ARMPL-SAME: "_ZGV_LLVM_N2vl8l8_sincos(armpl_vsincosq_f64)" } +; ARMPL-SAME: "_ZGV_LLVM_N2vl8l8_sincos(armpl_vsincosq_f64), +; ARMPL-SAME: _ZGVsMxvl8l8_sincos(armpl_svsincos_f64_x)" } ; ARMPL: attributes #[[SINCOSF]] = { "vector-function-abi-variant"= -; ARMPL-SAME: "_ZGV_LLVM_N4vl4l4_sincosf(armpl_vsincosq_f32)" } +; ARMPL-SAME: "_ZGV_LLVM_N4vl4l4_sincosf(armpl_vsincosq_f32), +; ARMPL-SAME: _ZGVsMxvl4l4_sincosf(armpl_svsincos_f32_x)" } ; ARMPL: attributes #[[SINCOSPI]] = { "vector-function-abi-variant"= -; ARMPL-SAME: "_ZGV_LLVM_N2vl8l8_sincospi(armpl_vsincospiq_f64)" } +; ARMPL-SAME: "_ZGV_LLVM_N2vl8l8_sincospi(armpl_vsincospiq_f64), +; ARMPL-SAME: _ZGVsMxvl8l8_sincospi(armpl_svsincospi_f64_x)" } ; ARMPL: attributes #[[SINCOSPIF]] = { "vector-function-abi-variant"= -; ARMPL-SAME: "_ZGV_LLVM_N4vl4l4_sincospif(armpl_vsincospiq_f32)" } +; ARMPL-SAME: "_ZGV_LLVM_N4vl4l4_sincospif(armpl_vsincospiq_f32), +; ARMPL-SAME: _ZGVsMxvl4l4_sincospif(armpl_svsincospi_f32_x)" } ; ARMPL: attributes #[[LOG10]] = { "vector-function-abi-variant"= ; ARMPL-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(armpl_vlog10q_f32), ; ARMPL-SAME _ZGVsMxv_llvm.log10.f32(armpl_svlog10_f32_x)" } diff --git a/llvm/test/Transforms/lower-builtin-allow-check.ll b/llvm/test/Transforms/lower-builtin-allow-check.ll new file mode 100644 index 0000000000000..05d940a46716c --- /dev/null +++ b/llvm/test/Transforms/lower-builtin-allow-check.ll @@ -0,0 +1,462 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes='function(lower-allow-check)' -S | FileCheck %s --check-prefixes=NOPROFILE +; RUN: opt < %s -passes='function(lower-allow-check)' -lower-allow-check-random-rate=0 -S | FileCheck %s --check-prefixes=NONE +; RUN: opt < %s -passes='require,function(lower-allow-check)' -lower-allow-check-percentile-cutoff-hot=990000 -S | FileCheck %s --check-prefixes=HOT99 +; RUN: opt < %s -passes='require,function(lower-allow-check)' -lower-allow-check-percentile-cutoff-hot=700000 -S | FileCheck %s --check-prefixes=HOT70 + +target triple = "x86_64-pc-linux-gnu" + +declare void @llvm.ubsantrap(i8 immarg) +declare i1 @llvm.allow.ubsan.check(i8 immarg) + +define dso_local noundef i32 @simple(ptr noundef readonly %0) { +; NOPROFILE-LABEL: define dso_local noundef i32 @simple( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; NONE-LABEL: define dso_local noundef i32 @simple( +; NONE-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; NONE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NONE-NEXT: [[HOT:%.*]] = xor i1 false, true +; NONE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NONE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NONE: 3: +; NONE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NONE-NEXT: unreachable +; NONE: 4: +; NONE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NONE-NEXT: ret i32 [[TMP5]] +; +; HOT99-LABEL: define dso_local noundef i32 @simple( +; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT99-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT99-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99: 3: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT99-NEXT: unreachable +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT99-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @simple( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT70-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %chk = icmp eq ptr %0, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %2 = or i1 %chk, %hot + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + + +define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { +; NOPROFILE-LABEL: define dso_local noundef i32 @hot( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; NONE-LABEL: define dso_local noundef i32 @hot( +; NONE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; NONE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NONE-NEXT: [[HOT:%.*]] = xor i1 false, true +; NONE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NONE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NONE: 3: +; NONE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NONE-NEXT: unreachable +; NONE: 4: +; NONE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NONE-NEXT: ret i32 [[TMP5]] +; +; HOT99-LABEL: define dso_local noundef i32 @hot( +; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT99-NEXT: [[HOT:%.*]] = xor i1 false, true +; HOT99-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99: 3: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT99-NEXT: unreachable +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT99-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @hot( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT70-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %chk = icmp eq ptr %0, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %2 = or i1 %chk, %hot + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + +define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { +; NOPROFILE-LABEL: define dso_local noundef i32 @veryHot( +; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE: 3: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NOPROFILE-NEXT: ret i32 [[TMP5]] +; +; NONE-LABEL: define dso_local noundef i32 @veryHot( +; NONE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; NONE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; NONE-NEXT: [[HOT:%.*]] = xor i1 false, true +; NONE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NONE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NONE: 3: +; NONE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NONE-NEXT: unreachable +; NONE: 4: +; NONE-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; NONE-NEXT: ret i32 [[TMP5]] +; +; HOT99-LABEL: define dso_local noundef i32 @veryHot( +; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT99-NEXT: [[HOT:%.*]] = xor i1 false, true +; HOT99-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99: 3: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT99-NEXT: unreachable +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT99-NEXT: ret i32 [[TMP5]] +; +; HOT70-LABEL: define dso_local noundef i32 @veryHot( +; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT70-NEXT: [[HOT:%.*]] = xor i1 false, true +; HOT70-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70: 3: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT70-NEXT: ret i32 [[TMP5]] +; + %chk = icmp eq ptr %0, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %2 = or i1 %chk, %hot + br i1 %2, label %3, label %4 + +3: + tail call void @llvm.ubsantrap(i8 22) + unreachable + +4: + %5 = load i32, ptr %0, align 4 + ret i32 %5 +} + + +define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readonly %1) !prof !39 { +; NOPROFILE-LABEL: define dso_local noundef i32 @branchColdFnHot( +; NOPROFILE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; NOPROFILE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NOPROFILE: 6: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 7: +; NOPROFILE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; NOPROFILE-NEXT: br label [[TMP9]] +; NOPROFILE: 9: +; NOPROFILE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; NOPROFILE-NEXT: ret i32 [[TMP10]] +; +; NONE-LABEL: define dso_local noundef i32 @branchColdFnHot( +; NONE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; NONE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; NONE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; NONE: 4: +; NONE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; NONE-NEXT: [[HOT:%.*]] = xor i1 false, true +; NONE-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; NONE-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NONE: 6: +; NONE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NONE-NEXT: unreachable +; NONE: 7: +; NONE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; NONE-NEXT: br label [[TMP9]] +; NONE: 9: +; NONE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; NONE-NEXT: ret i32 [[TMP10]] +; +; HOT99-LABEL: define dso_local noundef i32 @branchColdFnHot( +; HOT99-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; HOT99-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT99-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT99-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT99-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT99: 6: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT99-NEXT: unreachable +; HOT99: 7: +; HOT99-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT99-NEXT: br label [[TMP9]] +; HOT99: 9: +; HOT99-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT99-NEXT: ret i32 [[TMP10]] +; +; HOT70-LABEL: define dso_local noundef i32 @branchColdFnHot( +; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; HOT70-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT70-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT70-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT70: 6: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 7: +; HOT70-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT70-NEXT: br label [[TMP9]] +; HOT70: 9: +; HOT70-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT70-NEXT: ret i32 [[TMP10]] +; + %3 = icmp eq i32 %0, 0 + br i1 %3, label %9, label %4, !prof !38 + +4: + %chk = icmp eq ptr %1, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %5 = or i1 %chk, %hot + br i1 %5, label %6, label %7 + +6: + tail call void @llvm.ubsantrap(i8 22) #2 + unreachable + +7: + %8 = load i32, ptr %1, align 4 + br label %9 + +9: + %10 = phi i32 [ %8, %7 ], [ 0, %2 ] + ret i32 %10 +} + +define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readonly %1) !prof !36 { +; NOPROFILE-LABEL: define dso_local noundef i32 @branchHotFnCold( +; NOPROFILE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; NOPROFILE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; NOPROFILE: 4: +; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NOPROFILE: 6: +; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NOPROFILE-NEXT: unreachable +; NOPROFILE: 7: +; NOPROFILE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; NOPROFILE-NEXT: br label [[TMP9]] +; NOPROFILE: 9: +; NOPROFILE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; NOPROFILE-NEXT: ret i32 [[TMP10]] +; +; NONE-LABEL: define dso_local noundef i32 @branchHotFnCold( +; NONE-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; NONE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; NONE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; NONE: 4: +; NONE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; NONE-NEXT: [[HOT:%.*]] = xor i1 false, true +; NONE-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; NONE-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NONE: 6: +; NONE-NEXT: tail call void @llvm.ubsantrap(i8 22) +; NONE-NEXT: unreachable +; NONE: 7: +; NONE-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; NONE-NEXT: br label [[TMP9]] +; NONE: 9: +; NONE-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; NONE-NEXT: ret i32 [[TMP10]] +; +; HOT99-LABEL: define dso_local noundef i32 @branchHotFnCold( +; HOT99-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; HOT99-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT99-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT99-NEXT: [[HOT:%.*]] = xor i1 false, true +; HOT99-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT99: 6: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT99-NEXT: unreachable +; HOT99: 7: +; HOT99-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT99-NEXT: br label [[TMP9]] +; HOT99: 9: +; HOT99-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT99-NEXT: ret i32 [[TMP10]] +; +; HOT70-LABEL: define dso_local noundef i32 @branchHotFnCold( +; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; HOT70-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; HOT70: 4: +; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT70-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT70-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT70: 6: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT70-NEXT: unreachable +; HOT70: 7: +; HOT70-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT70-NEXT: br label [[TMP9]] +; HOT70: 9: +; HOT70-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT70-NEXT: ret i32 [[TMP10]] +; + %3 = icmp eq i32 %0, 0 + br i1 %3, label %9, label %4, !prof !37 + +4: + %chk = icmp eq ptr %1, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %5 = or i1 %chk, %hot + br i1 %5, label %6, label %7 + +6: + tail call void @llvm.ubsantrap(i8 22) #2 + unreachable + +7: + %8 = load i32, ptr %1, align 4 + br label %9 + +9: + %10 = phi i32 [ %8, %7 ], [ 0, %2 ] + ret i32 %10 +} + +!llvm.module.flags = !{!6} +!6 = !{i32 1, !"ProfileSummary", !7} +!7 = !{!8, !9, !10, !11, !12, !13, !14, !17} +!8 = !{!"ProfileFormat", !"InstrProf"} +!9 = !{!"TotalCount", i64 30000} +!10 = !{!"MaxCount", i64 10000} +!11 = !{!"MaxInternalCount", i64 10000} +!12 = !{!"MaxFunctionCount", i64 10000} +!13 = !{!"NumCounts", i64 3} +!14 = !{!"NumFunctions", i64 5} +!17 = !{!"DetailedSummary", !18} +!18 = !{!19, !29, !30, !32, !34} +!19 = !{i32 10000, i64 10000, i32 3} +!29 = !{i32 950000, i64 5000, i32 3} +!30 = !{i32 990000, i64 500, i32 4} +!32 = !{i32 999900, i64 250, i32 4} +!34 = !{i32 999999, i64 1, i32 6} + +!36 = !{!"function_entry_count", i64 1000} +!39 = !{!"function_entry_count", i64 7000} + +!37 = !{!"branch_weights", i32 1, i32 1000} +!38 = !{!"branch_weights", i32 1000, i32 1} + +;. +; NOPROFILE: [[PROF16]] = !{!"function_entry_count", i64 1000} +; NOPROFILE: [[PROF17]] = !{!"function_entry_count", i64 7000} +; NOPROFILE: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; NOPROFILE: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; NONE: [[PROF16]] = !{!"function_entry_count", i64 1000} +; NONE: [[PROF17]] = !{!"function_entry_count", i64 7000} +; NONE: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; NONE: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; HOT99: [[PROF16]] = !{!"function_entry_count", i64 1000} +; HOT99: [[PROF17]] = !{!"function_entry_count", i64 7000} +; HOT99: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; HOT99: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. +; HOT70: [[PROF16]] = !{!"function_entry_count", i64 1000} +; HOT70: [[PROF17]] = !{!"function_entry_count", i64 7000} +; HOT70: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; HOT70: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +;. diff --git a/llvm/test/Verifier/module-flags-note-gnu-property-elf-pauthabi.ll b/llvm/test/Verifier/module-flags-note-gnu-property-elf-pauthabi.ll new file mode 100644 index 0000000000000..435073d01c8e6 --- /dev/null +++ b/llvm/test/Verifier/module-flags-note-gnu-property-elf-pauthabi.ll @@ -0,0 +1,19 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t + +; CHECK: either both or no 'aarch64-elf-pauthabi-platform' and 'aarch64-elf-pauthabi-version' module flags must be present + +;--- err1.ll + +; RUN: not llvm-as err1.ll -o /dev/null 2>&1 | FileCheck %s + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 2} + +;--- err2.ll + +; RUN: not llvm-as err2.ll -o /dev/null 2>&1 | FileCheck %s + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 31} diff --git a/llvm/test/Verifier/pr69428.ll b/llvm/test/Verifier/pr69428.ll new file mode 100644 index 0000000000000..be8733bea1ab3 --- /dev/null +++ b/llvm/test/Verifier/pr69428.ll @@ -0,0 +1,48 @@ +; RUN: llvm-as -disable-output %s + +%struct._List_node_emplace_op2 = type { i8 } + +@"?_List@@3HA" = global i32 0, align 4 + +define void @"?ExecutionEngineaddExecutableDependency@@YAXXZ"() personality ptr @__CxxFrameHandler3 { +entry: + %agg.tmp.ensured.i = alloca %struct._List_node_emplace_op2, align 1 + %0 = load i32, ptr @"?_List@@3HA", align 4 + %call.i = call noundef ptr @"??0?$_List_node_emplace_op2@H@@QEAA@H@Z"(ptr %agg.tmp.ensured.i, i32 %0) + invoke void @llvm.seh.scope.begin() + to label %invoke.cont.i unwind label %ehcleanup.i + +invoke.cont.i: ; preds = %entry + invoke void @llvm.seh.scope.end() + to label %invoke.cont2.i unwind label %ehcleanup.i + +invoke.cont2.i: ; preds = %invoke.cont.i + call void @"??1?$_List_node_emplace_op2@H@@QEAA@XZ"(ptr %agg.tmp.ensured.i) #6 + unreachable + +ehcleanup.i: ; preds = %invoke.cont.i, %entry + %1 = cleanuppad within none [] + invoke void @llvm.seh.scope.begin() + to label %invoke.cont.i.i unwind label %ehcleanup.i.i + +invoke.cont.i.i: ; preds = %ehcleanup.i + invoke void @llvm.seh.scope.end() + to label %"??1?$_List_node_emplace_op2@H@@QEAA@XZ.exit.i" unwind label %ehcleanup.i.i + +ehcleanup.i.i: ; preds = %invoke.cont.i.i, %ehcleanup.i + %2 = cleanuppad within %1 [] + call void @"??1_Alloc_construct_ptr@@QEAA@XZ"(ptr %agg.tmp.ensured.i) #6 [ "funclet"(token %2) ] + cleanupret from %2 unwind to caller + +"??1?$_List_node_emplace_op2@H@@QEAA@XZ.exit.i": ; preds = %invoke.cont.i.i + call void @"??1_Alloc_construct_ptr@@QEAA@XZ"(ptr %agg.tmp.ensured.i) #6 [ "funclet"(token %1) ] + cleanupret from %1 unwind to caller +} + +declare i32 @__CxxFrameHandler3(...) +declare void @llvm.seh.scope.begin() +declare void @llvm.seh.scope.end() + +declare void @"??1?$_List_node_emplace_op2@H@@QEAA@XZ"(ptr) +declare void @"??1_Alloc_construct_ptr@@QEAA@XZ"(ptr) +declare ptr @"??0?$_List_node_emplace_op2@H@@QEAA@H@Z"(ptr, i32) diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index ca1faf62aa898..1b196b4355a6d 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1632,17 +1632,17 @@ vzeroupper # CHECK-NEXT: 4 17 2.00 * vrcpps (%rax), %ymm2 # CHECK-NEXT: 1 5 1.00 vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 1.00 * vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 6 2.00 vroundpd $1, %ymm0, %ymm2 # CHECK-NEXT: 3 12 2.00 * vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: 1 6 0.50 vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 6 2.00 vroundps $1, %ymm0, %ymm2 # CHECK-NEXT: 3 12 2.00 * vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: 1 6 0.50 vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 11 2.00 * vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 11 2.00 * vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 5 1.00 vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * vrsqrtps (%rax), %xmm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 216.25 235.25 176.17 176.17 38.00 424.25 3.25 12.67 +# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 424.25 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2342,17 +2342,17 @@ vzeroupper # CHECK-NEXT: - - 2.33 0.33 0.50 0.50 - 0.33 - - vrcpps (%rax), %ymm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundpd $1, %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundps $1, %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrsqrtps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s index dcc535306c85a..4865121308bbd 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s @@ -243,13 +243,13 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 15 2.00 * pmulld (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 ptest %xmm0, %xmm1 # CHECK-NEXT: 3 7 1.00 * ptest (%rax), %xmm1 -# CHECK-NEXT: 1 6 0.50 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundps $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * roundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundsd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundss $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * roundss $1, (%rax), %xmm2 # CHECK: Resources: @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67 +# CHECK-NEXT: - - 23.83 30.33 23.67 23.67 5.00 80.33 0.50 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -358,11 +358,11 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 2.00 - 0.50 0.50 - - - - pmulld (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - ptest %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - ptest (%rax), %xmm1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundsd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundss $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index cff60c9ce3ab3..05c476079c0f9 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1632,17 +1632,17 @@ vzeroupper # CHECK-NEXT: 4 18 2.00 * vrcpps (%rax), %ymm2 # CHECK-NEXT: 1 5 1.00 vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 1.00 * vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 6 2.00 vroundpd $1, %ymm0, %ymm2 # CHECK-NEXT: 3 13 2.00 * vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: 1 6 0.50 vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 6 2.00 vroundps $1, %ymm0, %ymm2 # CHECK-NEXT: 3 13 2.00 * vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: 1 6 0.50 vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 12 2.00 * vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 12 2.00 * vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 5 1.00 vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * vrsqrtps (%rax), %xmm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 215.58 236.58 176.17 176.17 38.00 427.58 3.25 12.67 +# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 427.58 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2342,17 +2342,17 @@ vzeroupper # CHECK-NEXT: - - 2.33 0.33 0.50 0.50 - 0.33 - - vrcpps (%rax), %ymm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundpd $1, %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundps $1, %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrsqrtps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s index c2d07735f1cb6..62dfa23a6bad0 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s @@ -243,13 +243,13 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 16 2.00 * pmulld (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 ptest %xmm0, %xmm1 # CHECK-NEXT: 3 8 1.00 * ptest (%rax), %xmm1 -# CHECK-NEXT: 1 6 0.50 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundps $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * roundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundsd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundss $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * roundss $1, (%rax), %xmm2 # CHECK: Resources: @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67 +# CHECK-NEXT: - - 23.83 30.33 23.67 23.67 5.00 80.33 0.50 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -358,11 +358,11 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 2.00 - 0.50 0.50 - - - - pmulld (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - ptest %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - ptest (%rax), %xmm1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundsd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundss $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s index 4654ce10dffd8..349abec66457c 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s @@ -68,12 +68,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -145,12 +145,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -223,12 +223,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -306,12 +306,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -389,12 +389,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -472,12 +472,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s index 12d6f399d4295..0fcd6f5074326 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s @@ -46,12 +46,12 @@ add %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -122,12 +122,12 @@ add %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s index 93f8d7616cb9c..cd427bb5912e3 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s @@ -41,12 +41,12 @@ mulxq %rax, %rax, %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -112,12 +112,12 @@ mulxq %rax, %rax, %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s index 13ef5bcb11ca9..bf82486cf737f 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s @@ -43,12 +43,12 @@ mulxq (%rdi), %rax, %rdx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -115,12 +115,12 @@ mulxq (%rdi), %rax, %rdx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-same-regs.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-same-regs.s index bfe8be85086f9..8a5a0148cf589 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-same-regs.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-same-regs.s @@ -44,12 +44,12 @@ mulxq %rax, %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -116,12 +116,12 @@ mulxq %rax, %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s index 1431875ade93a..f0e16a8d8b99c 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s @@ -68,12 +68,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -159,12 +159,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -250,12 +250,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -341,12 +341,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s index eb2bb979b7f5f..97f6a34acac19 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s @@ -68,12 +68,12 @@ vpaddq %ymm0, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -159,12 +159,12 @@ vpaddq %ymm0, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -250,12 +250,12 @@ vpaddq %ymm0, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -341,12 +341,12 @@ vpaddq %ymm0, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-mmx.s index 5909af853dd5a..c733f639819be 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-mmx.s @@ -63,12 +63,12 @@ paddd %mm0, %mm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -154,12 +154,12 @@ paddd %mm0, %mm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -245,12 +245,12 @@ paddd %mm0, %mm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s index 5a05487f217a4..63df99e56f8cd 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s @@ -68,12 +68,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -159,12 +159,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -250,12 +250,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -341,12 +341,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s index 7ac674c5a6b59..66c1322fa7902 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s @@ -40,12 +40,12 @@ xor %bx, %dx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s index 582da14211d0d..4ed529ee14465 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s @@ -40,12 +40,12 @@ add %cx, %bx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s index dda87e9ebc922..58941116d31f0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s @@ -33,12 +33,12 @@ lzcnt %ax, %bx ## partial register stall. # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s index 71520ea1ce4b5..fdbf4d99fbf6e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s @@ -42,12 +42,12 @@ lzcnt 2(%rsp), %cx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-xmm.s index 7afa80cb0c9a8..f3e515ce7e210 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-xmm.s @@ -180,12 +180,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -474,12 +474,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -768,12 +768,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1062,12 +1062,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1356,12 +1356,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1650,12 +1650,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-ymm.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-ymm.s index 8b81d55ca5db9..a484a7562fdcb 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-ymm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-ymm.s @@ -180,12 +180,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -474,12 +474,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -768,12 +768,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1062,12 +1062,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1356,12 +1356,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1650,12 +1650,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-gpr.s index f359048e52984..eb20d13e8c944 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-gpr.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-gpr.s @@ -134,12 +134,12 @@ xchgq %r15, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -402,12 +402,12 @@ xchgq %r15, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -670,12 +670,12 @@ xchgq %r15, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -938,12 +938,12 @@ xchgq %r15, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s index b556fd61f0778..e17d6717e1366 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s @@ -61,12 +61,12 @@ movq %mm7, %mm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-sse-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-sse-xmm.s index 147cb0fca2851..b45fd172bd555 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-sse-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-sse-xmm.s @@ -180,12 +180,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -474,12 +474,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -768,12 +768,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1062,12 +1062,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1356,12 +1356,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1650,12 +1650,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s index de59edf2352ef..0465d413a854b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s @@ -67,12 +67,12 @@ fxch %st(0) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s index 4e024e5846f18..9c5a19b1a0a2f 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s @@ -38,12 +38,12 @@ adox (%rbx), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s index 5abf3cca9211c..d1086961682d9 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s @@ -50,12 +50,12 @@ aeskeygenassist $22, (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s index 146b3ce62964a..4f0b4843d1704 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s @@ -1731,12 +1731,12 @@ vzeroupper # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s index 3c6b31a1ca011..1a8b9e2de1d8e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s @@ -771,12 +771,12 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s index 8c0e84135750e..26002373e9f5c 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s @@ -85,12 +85,12 @@ tzcnt (%rax), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s index 8d00c99982b07..0664c1dc7eed5 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s @@ -100,12 +100,12 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s index 3e7219c9c6d0f..b40d155e13f56 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s @@ -23,12 +23,12 @@ clflushopt (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s index 0dc89faad77c4..0f9935c5ba14d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s @@ -23,12 +23,12 @@ clzero # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s index e0e46afd0c0b4..8118e40ca5beb 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s @@ -218,12 +218,12 @@ cmovgq (%rax), %rdi # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s index 03763e5a2bfe7..9ab877636b29a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s @@ -25,12 +25,12 @@ cmpxchg16b (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s index bb995d588c43c..345ae02b30028 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s @@ -40,12 +40,12 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s index 9af180d8cb5c3..af207f029c82e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s @@ -500,12 +500,12 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s index 142508c4477e5..3e651837c2bd0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s @@ -40,12 +40,12 @@ wrgsbase %rdi # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s index 1545a228d9c55..02572026003c1 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s @@ -293,12 +293,12 @@ lea 1024(%rax, %rbx, 2), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s index ffbe414bac6c0..735287a6f564e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s @@ -35,12 +35,12 @@ lzcntq (%rax), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s index 75dbf95f4caae..2bc6177c80310 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s @@ -279,12 +279,12 @@ pxor (%rax), %mm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s index 144e97fbaf58f..6eeabbd2d7d40 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s @@ -35,12 +35,12 @@ movbe (%rax), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s index 3b343d733425e..103fd3ebf2392 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s @@ -25,12 +25,12 @@ mwaitx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s index 2d9f0e9b14e2b..893f47612f2fd 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s @@ -25,12 +25,12 @@ pclmulqdq $11, (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s index cce078f2a8d8e..29bcc5c62b4c1 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s @@ -35,12 +35,12 @@ popcntq (%rax), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s index 5423b6bcff1f7..b80e8f7f8b028 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s @@ -25,12 +25,12 @@ prefetchw (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s index fb09253644496..649eb10c22afd 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s @@ -27,12 +27,12 @@ rdrand %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s index f10a90ff9bc4f..44e0eebe4a2ac 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s @@ -27,12 +27,12 @@ rdseed %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s index 360a667edfaa7..e6d5ab90a2acc 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s @@ -55,12 +55,12 @@ sha256rnds2 (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s index 9816b87df4620..4c7a3f06d7624 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s @@ -328,12 +328,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s index f69c535385af5..d24aebfdf675e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s @@ -684,12 +684,12 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s index 8110390219c7c..51bb95feb091b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s @@ -74,12 +74,12 @@ mwait # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s index 0cc6c6a5c25ed..e952a169db8a0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s @@ -261,12 +261,12 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s index 873e4f406426e..8afcd809b67c8 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s @@ -70,12 +70,12 @@ pcmpgtq (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s index 1c1b0b24222fb..6606a3ef2b2b7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s @@ -35,12 +35,12 @@ movntss %xmm0, (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s index aeec49351a110..66688700404e1 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s @@ -180,12 +180,12 @@ psignw (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-vaes.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-vaes.s index 076094ffe1959..81afc7df85ef4 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-vaes.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-vaes.s @@ -40,12 +40,12 @@ vaesenclast (%rax), %ymm1, %ymm3 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-vpclmulqdq.s index 31680d562a423..10440e904b01b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-vpclmulqdq.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-vpclmulqdq.s @@ -25,12 +25,12 @@ vpclmulqdq $11, (%rax), %ymm1, %ymm3 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s index fb09b650840eb..8f627ca18771b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s @@ -56,12 +56,12 @@ salc # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s index fedb3d242c452..41ec631dc3fbb 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s @@ -1957,12 +1957,12 @@ xorq (%rax), %rdi # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s index 9a92bd0f2e7d3..cd8a06a57f784 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s @@ -364,12 +364,12 @@ fyl2xp1 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-xsave.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-xsave.s index 819361c0a025d..f348ff8696f63 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-xsave.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-xsave.s @@ -35,12 +35,12 @@ xsetbv # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s index 33657e6b66b8f..ed4e8f90dd7b0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s @@ -138,12 +138,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -229,12 +229,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -320,12 +320,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -411,12 +411,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -502,12 +502,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -593,12 +593,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -684,12 +684,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -775,12 +775,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -866,12 +866,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -957,12 +957,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1048,12 +1048,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1139,12 +1139,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1230,12 +1230,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1321,12 +1321,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1412,12 +1412,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1503,12 +1503,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1594,12 +1594,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1685,12 +1685,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s index ba7f51eb245a1..24043369c6f14 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s @@ -148,12 +148,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -239,12 +239,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -330,12 +330,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -421,12 +421,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -512,12 +512,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -603,12 +603,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -694,12 +694,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -785,12 +785,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -876,12 +876,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -967,12 +967,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1058,12 +1058,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1149,12 +1149,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1240,12 +1240,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1331,12 +1331,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1422,12 +1422,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1513,12 +1513,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1604,12 +1604,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1695,12 +1695,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1786,12 +1786,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1878,12 +1878,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s index 018adc261b08e..4d648f7fb9dea 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s @@ -68,12 +68,12 @@ addq %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -175,12 +175,12 @@ addq %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -282,12 +282,12 @@ addq %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -389,12 +389,12 @@ addq %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s index 935881a914215..aca39c52f36e5 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s @@ -138,12 +138,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -229,12 +229,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -320,12 +320,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -411,12 +411,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -502,12 +502,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -593,12 +593,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -684,12 +684,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -775,12 +775,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -866,12 +866,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -957,12 +957,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1048,12 +1048,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1139,12 +1139,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1230,12 +1230,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1321,12 +1321,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1412,12 +1412,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1503,12 +1503,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1594,12 +1594,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1685,12 +1685,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-objdump/ELF/dynamic-malformed.test b/llvm/test/tools/llvm-objdump/ELF/dynamic-malformed.test index b10e4f5e44f18..b1e3ca17b5049 100644 --- a/llvm/test/tools/llvm-objdump/ELF/dynamic-malformed.test +++ b/llvm/test/tools/llvm-objdump/ELF/dynamic-malformed.test @@ -12,7 +12,6 @@ FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB Type: ET_EXEC - Machine: EM_X86_64 Sections: - Name: .dynamic Type: SHT_DYNAMIC @@ -29,10 +28,35 @@ FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB Type: ET_EXEC - Machine: EM_X86_64 Sections: - Name: .dynamic Type: SHT_DYNAMIC Entries: - Tag: DT_SONAME Value: 1 + +# RUN: yaml2obj %s --docnum=3 -o %t.invalidaddr +# RUN: llvm-objdump -p %t.invalidaddr 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.invalidaddr --implicit-check-not=warning: --check-prefix=ADDR + +# ADDR: Dynamic Section: +# ADDR-NEXT: warning: '[[FILE]]': virtual address is not in any segment: 0x474 +# ADDR-NEXT: NEEDED 0xffffffffbe5a0b5f +# ADDR-NEXT: STRTAB 0x0000000000000474 + +--- +!ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN +Sections: + - Name: .dynamic + Type: SHT_DYNAMIC + Entries: + - Tag: DT_NEEDED + Value: 0xFFFFFFFFBE5A0B5F + - Tag: DT_STRTAB + Value: 0x474 + - Tag: DT_NULL + Value: 0x0 diff --git a/llvm/test/tools/llvm-profdata/memprof-merge-v0.test b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test new file mode 100644 index 0000000000000..68132961eb78d --- /dev/null +++ b/llvm/test/tools/llvm-profdata/memprof-merge-v0.test @@ -0,0 +1,22 @@ +REQUIRES: x86_64-linux + +RUN: echo ":ir" > %t.proftext +RUN: echo "main" >> %t.proftext +RUN: echo "742261418966908927" >> %t.proftext +RUN: echo "1" >> %t.proftext +RUN: echo "1" >> %t.proftext + +To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang +RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=0 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v0 +RUN: llvm-profdata show %t.prof.v0 | FileCheck %s + +RUN: llvm-profdata merge %t.proftext %p/Inputs/basic.memprofraw --memprof-version=1 --profiled-binary %p/Inputs/basic.memprofexe -o %t.prof.v1 +RUN: llvm-profdata show %t.prof.v1 | FileCheck %s + +For now we only check the validity of the instrumented profile since we don't +have a way to display the contents of the memprof indexed format yet. + +CHECK: Instrumentation level: IR entry_first = 0 +CHECK: Total functions: 1 +CHECK: Maximum function count: 1 +CHECK: Maximum internal block count: 0 diff --git a/llvm/test/tools/llvm-ranlib/help-message.test b/llvm/test/tools/llvm-ranlib/help-message.test index 8d8824ac46121..97212aa035b31 100644 --- a/llvm/test/tools/llvm-ranlib/help-message.test +++ b/llvm/test/tools/llvm-ranlib/help-message.test @@ -5,13 +5,22 @@ # RUN: llvm-ranlib -help | FileCheck %s --check-prefix=HELP # RUN: llvm-ranlib --help | FileCheck %s --check-prefix=HELP # RUN: llvm-ranlib --version | FileCheck %s --check-prefix=VERSION -# RUN: llvm-ranlib -version | FileCheck %s --check-prefix=VERSION -# RUN: llvm-ranlib -v | FileCheck %s --check-prefix=VERSION +# RUN: llvm-ranlib -V | FileCheck %s --check-prefix=VERSION ## Also check combined options (first -h/-v flag wins) # RUN: llvm-ranlib -Dh | FileCheck %s --check-prefix=HELP -# RUN: llvm-ranlib -Dvh | FileCheck %s --check-prefix=VERSION -# RUN: llvm-ranlib -Dhv | FileCheck %s --check-prefix=HELP +# RUN: llvm-ranlib -DVh | FileCheck %s --check-prefix=VERSION +# RUN: llvm-ranlib -DhV | FileCheck %s --check-prefix=HELP # HELP: USAGE: llvm-ranlib # VERSION: version + +## -v enables verbose output in BSD ranlib and GNU ar but is another alias +## for --version in GNU ranlib. Reject -v. +# RUN: not llvm-ranlib -v 2>&1 | FileCheck %s --check-prefix=ERR1 +# RUN: not llvm-ranlib -version 2>&1 | FileCheck %s --check-prefix=ERR2 +# RUN: not llvm-ranlib -Dvh 2>&1 | FileCheck %s --check-prefix=ERR3 + +# ERR1: error: Invalid option: '-v' +# ERR2: error: Invalid option: '-version' +# ERR3: error: Invalid option: '-vh' diff --git a/llvm/test/tools/llvm-reduce/reduce-flags.ll b/llvm/test/tools/llvm-reduce/reduce-flags.ll index 036bfdc84ac43..5d6d1260ac50e 100644 --- a/llvm/test/tools/llvm-reduce/reduce-flags.ll +++ b/llvm/test/tools/llvm-reduce/reduce-flags.ll @@ -232,3 +232,35 @@ define i32 @or_disjoint_keep(i32 %a, i32 %b) { %op = or disjoint i32 %a, %b ret i32 %op } + +; CHECK-LABEL: @trunc_nuw_drop( +; INTERESTING: = trunc +; RESULT: trunc i64 +define i32 @trunc_nuw_drop(i64 %a) { + %op = trunc nuw i64 %a to i32 + ret i32 %op +} + +; CHECK-LABEL: @trunc_nuw_keep( +; INTERESTING: = trunc nuw +; RESULT: trunc nuw i64 +define i32 @trunc_nuw_keep(i64 %a) { + %op = trunc nuw i64 %a to i32 + ret i32 %op +} + +; CHECK-LABEL: @trunc_nsw_drop( +; INTERESTING: = trunc +; RESULT: trunc i64 +define i32 @trunc_nsw_drop(i64 %a) { + %op = trunc nsw i64 %a to i32 + ret i32 %op +} + +; CHECK-LABEL: @trunc_nsw_keep( +; INTERESTING: = trunc nsw +; RESULT: trunc nsw i64 +define i32 @trunc_nsw_keep(i64 %a) { + %op = trunc nsw i64 %a to i32 + ret i32 %op +} diff --git a/llvm/tools/gold/CMakeLists.txt b/llvm/tools/gold/CMakeLists.txt index 58b323805c7dd..5c78529e38f48 100644 --- a/llvm/tools/gold/CMakeLists.txt +++ b/llvm/tools/gold/CMakeLists.txt @@ -12,7 +12,7 @@ if( LLVM_ENABLE_PIC AND LLVM_BINUTILS_INCDIR ) TargetParser ) - add_llvm_library(LLVMgold MODULE + add_llvm_library(LLVMgold MODULE INSTALL_WITH_TOOLCHAIN gold-plugin.cpp ) diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp index 294b8531b08f1..3b842b76d5c87 100644 --- a/llvm/tools/llvm-ar/llvm-ar.cpp +++ b/llvm/tools/llvm-ar/llvm-ar.cpp @@ -65,7 +65,7 @@ static void printRanLibHelp(StringRef ToolName) { << "USAGE: " + ToolName + " archive...\n\n" << "OPTIONS:\n" << " -h --help - Display available options\n" - << " -v --version - Display the version of this program\n" + << " -V --version - Display the version of this program\n" << " -D - Use zero for timestamps and uids/gids " "(default)\n" << " -U - Use actual timestamps and uids/gids\n" @@ -1439,7 +1439,7 @@ static int ranlib_main(int argc, char **argv) { } else if (arg.front() == 'h') { printHelpMessage(); return 0; - } else if (arg.front() == 'v') { + } else if (arg.front() == 'V') { cl::PrintVersionMessage(); return 0; } else if (arg.front() == 'X') { diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index 9e7f2c3ebac43..8f80cc26f3166 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -135,6 +135,9 @@ static cl::opt TryUseNewDbgInfoFormat( cl::init(false)); extern cl::opt UseNewDbgInfoFormat; +extern cl::opt PreserveInputDbgFormat; + +extern cl::opt LoadBitcodeIntoNewDbgInfoFormat; static ExitOnError ExitOnErr; @@ -480,12 +483,20 @@ int main(int argc, char **argv) { cl::HideUnrelatedOptions({&LinkCategory, &getColorCategory()}); cl::ParseCommandLineOptions(argc, argv, "llvm linker\n"); + // Load bitcode into the new debug info format by default. + if (LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_UNSET) + LoadBitcodeIntoNewDbgInfoFormat = cl::boolOrDefault::BOU_TRUE; + // RemoveDIs debug-info transition: tests may request that we /try/ to use the // new debug-info format. if (TryUseNewDbgInfoFormat) { // Turn the new debug-info format on. UseNewDbgInfoFormat = true; } + // Since llvm-link collects multiple IR modules together, for simplicity's + // sake we disable the "PreserveInputDbgFormat" flag to enforce a single + // debug info format. + PreserveInputDbgFormat = cl::boolOrDefault::BOU_FALSE; LLVMContext Context; Context.setDiagnosticHandler(std::make_unique(), diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp index 3c452b650cee1..f310097eec634 100644 --- a/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/llvm/tools/llvm-lto/llvm-lto.cpp @@ -271,6 +271,7 @@ static cl::opt TryUseNewDbgInfoFormat( extern cl::opt UseNewDbgInfoFormat; extern cl::opt LoadBitcodeIntoNewDbgInfoFormat; +extern cl::opt PreserveInputDbgFormat; namespace { @@ -954,6 +955,10 @@ int main(int argc, char **argv) { // Turn the new debug-info format on. UseNewDbgInfoFormat = true; } + // Since llvm-lto collects multiple IR modules together, for simplicity's sake + // we disable the "PreserveInputDbgFormat" flag to enforce a single debug info + // format. + PreserveInputDbgFormat = cl::boolOrDefault::BOU_FALSE; if (OptLevel < '0' || OptLevel > '3') error("optimization level must be between 0 and 3"); diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index f222d02bd7cea..faed9ff9939bd 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -194,6 +194,7 @@ static cl::opt TryUseNewDbgInfoFormat( extern cl::opt UseNewDbgInfoFormat; extern cl::opt LoadBitcodeIntoNewDbgInfoFormat; +extern cl::opt PreserveInputDbgFormat; static void check(Error E, std::string Msg) { if (!E) @@ -239,6 +240,10 @@ static int run(int argc, char **argv) { // Turn the new debug-info format on. UseNewDbgInfoFormat = true; } + // Since llvm-lto2 collects multiple IR modules together, for simplicity's + // sake we disable the "PreserveInputDbgFormat" flag to enforce a single debug + // info format. + PreserveInputDbgFormat = cl::boolOrDefault::BOU_FALSE; // FIXME: Workaround PR30396 which means that a symbol can appear // more than once if it is defined in module-level assembly and diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index fda99bd6d33e1..8c184fc1fbb66 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -68,7 +68,7 @@ static Expected getDynamicStrTab(const ELFFile &Elf) { if (Dyn.d_tag == ELF::DT_STRTAB) { auto MappedAddrOrError = Elf.toMappedAddr(Dyn.getPtr()); if (!MappedAddrOrError) - consumeError(MappedAddrOrError.takeError()); + return MappedAddrOrError.takeError(); return StringRef(reinterpret_cast(*MappedAddrOrError)); } } @@ -223,7 +223,6 @@ template void ELFDumper::printDynamicSection() { continue; std::string Str = Elf.getDynamicTagAsString(Dyn.d_tag); - outs() << format(TagFmt.c_str(), Str.c_str()); const char *Fmt = ELFT::Is64Bits ? "0x%016" PRIx64 "\n" : "0x%08" PRIx64 "\n"; @@ -232,14 +231,16 @@ template void ELFDumper::printDynamicSection() { Dyn.d_tag == ELF::DT_AUXILIARY || Dyn.d_tag == ELF::DT_FILTER) { Expected StrTabOrErr = getDynamicStrTab(Elf); if (StrTabOrErr) { - const char *Data = StrTabOrErr.get().data(); - outs() << (Data + Dyn.d_un.d_val) << "\n"; + const char *Data = StrTabOrErr->data(); + outs() << format(TagFmt.c_str(), Str.c_str()) << Data + Dyn.getVal() + << "\n"; continue; } reportWarning(toString(StrTabOrErr.takeError()), Obj.getFileName()); consumeError(StrTabOrErr.takeError()); } - outs() << format(Fmt, (uint64_t)Dyn.d_un.d_val); + outs() << format(TagFmt.c_str(), Str.c_str()) + << format(Fmt, (uint64_t)Dyn.getVal()); } } diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.cpp index 7b6fe7e5f9173..ad619a6c02a4d 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionFlags.cpp @@ -27,6 +27,11 @@ static void reduceFlagsInModule(Oracle &O, ReducerWorkItem &WorkItem) { I.setHasNoSignedWrap(false); if (OBO->hasNoUnsignedWrap() && !O.shouldKeep()) I.setHasNoUnsignedWrap(false); + } else if (auto *Trunc = dyn_cast(&I)) { + if (Trunc->hasNoSignedWrap() && !O.shouldKeep()) + Trunc->setHasNoSignedWrap(false); + if (Trunc->hasNoUnsignedWrap() && !O.shouldKeep()) + Trunc->setHasNoUnsignedWrap(false); } else if (auto *PE = dyn_cast(&I)) { if (PE->isExact() && !O.shouldKeep()) I.setIsExact(false); diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index d5ef63e38e279..76fc26412407e 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -3249,21 +3249,11 @@ TEST(APIntTest, SolveQuadraticEquationWrap) { } TEST(APIntTest, MultiplicativeInverseExaustive) { - for (unsigned BitWidth = 1; BitWidth <= 16; ++BitWidth) { - for (unsigned Value = 0; Value < (1u << BitWidth); ++Value) { + for (unsigned BitWidth = 1; BitWidth <= 8; ++BitWidth) { + for (unsigned Value = 1; Value < (1u << BitWidth); Value += 2) { + // Multiplicative inverse exists for all odd numbers. APInt V = APInt(BitWidth, Value); - APInt MulInv = - V.zext(BitWidth + 1) - .multiplicativeInverse(APInt::getSignedMinValue(BitWidth + 1)) - .trunc(BitWidth); - APInt One = V * MulInv; - if (!V.isZero() && V.countr_zero() == 0) { - // Multiplicative inverse exists for all odd numbers. - EXPECT_TRUE(One.isOne()); - } else { - // Multiplicative inverse does not exist for even numbers (and 0). - EXPECT_TRUE(MulInv.isZero()); - } + EXPECT_EQ(V * V.multiplicativeInverse(), 1); } } } diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index 12d7325036bf0..17c5c9d1c59ce 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -44,6 +44,7 @@ add_llvm_unittest(ADTTests ImmutableMapTest.cpp ImmutableSetTest.cpp IntEqClassesTest.cpp + Interleave.cpp IntervalMapTest.cpp IntervalTreeTest.cpp IntrusiveRefCntPtrTest.cpp diff --git a/llvm/unittests/ADT/Interleave.cpp b/llvm/unittests/ADT/Interleave.cpp new file mode 100644 index 0000000000000..bc1ab1fae725e --- /dev/null +++ b/llvm/unittests/ADT/Interleave.cpp @@ -0,0 +1,42 @@ +//===- unittests/ADT/Interleave.cpp - Interleave unit tests ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(InterleaveTest, Interleave) { + std::string Str; + raw_string_ostream OS(Str); + + // Check that interleave works on a SmallVector. + SmallVector Doodles = {"golden", "berna", "labra"}; + interleave( + Doodles, OS, [&](const char *Name) { OS << Name << "doodle"; }, ", "); + + EXPECT_EQ(OS.str(), "goldendoodle, bernadoodle, labradoodle"); +} + +TEST(InterleaveTest, InterleaveComma) { + std::string Str; + raw_string_ostream OS(Str); + + // Check that interleaveComma uses ADL to find begin/end on an array. + const StringRef LongDogs[] = {"dachshund", "doxie", "dackel", "teckel"}; + interleaveComma(LongDogs, OS); + + EXPECT_EQ(OS.str(), "dachshund, doxie, dackel, teckel"); +} + +} // anonymous namespace diff --git a/llvm/unittests/ADT/IteratorTest.cpp b/llvm/unittests/ADT/IteratorTest.cpp index 3a4a5b02b1040..a0d3c9b564d85 100644 --- a/llvm/unittests/ADT/IteratorTest.cpp +++ b/llvm/unittests/ADT/IteratorTest.cpp @@ -395,6 +395,21 @@ TEST(PointerIterator, Range) { EXPECT_EQ(A + I++, P); } +namespace rbegin_detail { +struct WithFreeRBegin { + int data[3] = {42, 43, 44}; +}; + +auto rbegin(const WithFreeRBegin &X) { return std::rbegin(X.data); } +auto rend(const WithFreeRBegin &X) { return std::rend(X.data); } +} // namespace rbegin_detail + +TEST(ReverseTest, ADL) { + // Check that we can find the rbegin/rend functions via ADL. + rbegin_detail::WithFreeRBegin Foo; + EXPECT_THAT(reverse(Foo), ElementsAre(44, 43, 42)); +} + TEST(ZipIteratorTest, Basic) { using namespace std; const SmallVector pi{3, 1, 4, 1, 5, 9}; diff --git a/llvm/unittests/ADT/RangeAdapterTest.cpp b/llvm/unittests/ADT/RangeAdapterTest.cpp index eb75ac301805b..c1a8a984f233b 100644 --- a/llvm/unittests/ADT/RangeAdapterTest.cpp +++ b/llvm/unittests/ADT/RangeAdapterTest.cpp @@ -150,10 +150,6 @@ TYPED_TEST(RangeAdapterRValueTest, TrivialOperation) { TestRev(reverse(TypeParam({0, 1, 2, 3}))); } -TYPED_TEST(RangeAdapterRValueTest, HasRbegin) { - static_assert(has_rbegin::value, "rbegin() should be defined"); -} - TYPED_TEST(RangeAdapterRValueTest, RangeType) { static_assert( std::is_same_v()).begin()), diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index b73891b59f026..3927bc59c031a 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -405,6 +405,14 @@ std::vector::const_iterator end(const some_struct &s) { return s.data.end(); } +std::vector::const_reverse_iterator rbegin(const some_struct &s) { + return s.data.rbegin(); +} + +std::vector::const_reverse_iterator rend(const some_struct &s) { + return s.data.rend(); +} + void swap(some_struct &lhs, some_struct &rhs) { // make swap visible as non-adl swap would even seem to // work with std::swap which defaults to moving @@ -573,6 +581,8 @@ TEST(STLExtrasTest, ADLTest) { EXPECT_EQ(*adl_begin(s), 1); EXPECT_EQ(*(adl_end(s) - 1), 5); + EXPECT_EQ(*adl_rbegin(s), 5); + EXPECT_EQ(*(adl_rend(s) - 1), 1); adl_swap(s, s2); EXPECT_EQ(s.swap_val, "lhs"); diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 6c6897d83a256..b4d2270d70703 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -2016,6 +2016,27 @@ TEST_F(ComputeKnownFPClassTest, SqrtNszSignBit) { } } +TEST_F(ComputeKnownFPClassTest, Constants) { + parseAssembly("declare float @func()\n" + "define float @test() {\n" + " %A = call float @func()\n" + " ret float %A\n" + "}\n"); + + Type *F32 = Type::getFloatTy(Context); + Type *V4F32 = FixedVectorType::get(F32, 4); + + { + KnownFPClass ConstAggZero = computeKnownFPClass( + ConstantAggregateZero::get(V4F32), M->getDataLayout(), fcAllFlags, 0, + nullptr, nullptr, nullptr, nullptr); + + EXPECT_EQ(fcPosZero, ConstAggZero.KnownFPClasses); + ASSERT_TRUE(ConstAggZero.SignBit); + EXPECT_FALSE(*ConstAggZero.SignBit); + } +} + TEST_F(ValueTrackingTest, isNonZeroRecurrence) { parseAssembly(R"( define i1 @test(i8 %n, i8 %r) { diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 5c415cadcd686..db1c4a8951ad2 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -6975,16 +6975,16 @@ TEST_F(OpenMPIRBuilderTest, registerTargetGlobalVariable) { // Clauses for data_int_0 with To + Any clauses for the host std::vector OffloadEntries; - OffloadEntries.push_back(M->getNamedGlobal(".omp_offloading.entry_name")); + OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name")); OffloadEntries.push_back( - M->getNamedGlobal(".omp_offloading.entry.test_data_int_0")); + M->getNamedGlobal(".offloading.entry.test_data_int_0")); // Clauses for data_int_1 with Link + Any clauses for the host OffloadEntries.push_back( M->getNamedGlobal("test_data_int_1_decl_tgt_ref_ptr")); - OffloadEntries.push_back(M->getNamedGlobal(".omp_offloading.entry_name.1")); - OffloadEntries.push_back(M->getNamedGlobal( - ".omp_offloading.entry.test_data_int_1_decl_tgt_ref_ptr")); + OffloadEntries.push_back(M->getNamedGlobal(".offloading.entry_name.1")); + OffloadEntries.push_back( + M->getNamedGlobal(".offloading.entry.test_data_int_1_decl_tgt_ref_ptr")); for (unsigned I = 0; I < OffloadEntries.size(); ++I) EXPECT_NE(OffloadEntries[I], nullptr); diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index 93023a671bc23..13c7b5d5327f0 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -368,5 +368,33 @@ TEST(VerifierTest, CrossFunctionRef) { Store->eraseFromParent(); } +TEST(VerifierTest, AtomicRMW) { + LLVMContext C; + Module M("M", C); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false); + Function *F = Function::Create(FTy, Function::ExternalLinkage, "foo", M); + BasicBlock *Entry = BasicBlock::Create(C, "entry", F); + Value *Ptr = PoisonValue::get(PointerType::get(C, 0)); + + Type *FPTy = Type::getFloatTy(C); + Constant *CF = ConstantFP::getZero(FPTy); + + // Invalid scalable type : atomicrmw () + Constant *CV = ConstantVector::getSplat(ElementCount::getScalable(2), CF); + new AtomicRMWInst(AtomicRMWInst::FAdd, Ptr, CV, Align(8), + AtomicOrdering::SequentiallyConsistent, SyncScope::System, + Entry); + ReturnInst::Create(C, Entry); + + std::string Error; + raw_string_ostream ErrorOS(Error); + EXPECT_TRUE(verifyFunction(*F, &ErrorOS)); + EXPECT_TRUE( + StringRef(ErrorOS.str()) + .starts_with("atomicrmw fadd operand must have floating-point or " + "fixed vector of floating-point type!")) + << ErrorOS.str(); +} + } // end anonymous namespace } // end namespace llvm diff --git a/llvm/unittests/Object/GOFFObjectFileTest.cpp b/llvm/unittests/Object/GOFFObjectFileTest.cpp index 734dac6b8507a..69f60d016a808 100644 --- a/llvm/unittests/Object/GOFFObjectFileTest.cpp +++ b/llvm/unittests/Object/GOFFObjectFileTest.cpp @@ -502,3 +502,100 @@ TEST(GOFFObjectFileTest, InvalidERSymbolType) { FailedWithMessage("ESD record 1 has unknown Executable type 0x03")); } } + +TEST(GOFFObjectFileTest, TXTConstruct) { + char GOFFData[GOFF::RecordLength * 6] = {}; + + // HDR record. + GOFFData[0] = 0x03; + GOFFData[1] = 0xF0; + GOFFData[50] = 0x01; + + // ESD record. + GOFFData[GOFF::RecordLength] = 0x03; + GOFFData[GOFF::RecordLength + 7] = 0x01; // ESDID. + GOFFData[GOFF::RecordLength + 71] = 0x05; // Size of symbol name. + GOFFData[GOFF::RecordLength + 72] = 0xa5; // Symbol name is v. + GOFFData[GOFF::RecordLength + 73] = 0x81; // Symbol name is a. + GOFFData[GOFF::RecordLength + 74] = 0x99; // Symbol name is r. + GOFFData[GOFF::RecordLength + 75] = 0x7b; // Symbol name is #. + GOFFData[GOFF::RecordLength + 76] = 0x83; // Symbol name is c. + + // ESD record. + GOFFData[GOFF::RecordLength * 2] = 0x03; + GOFFData[GOFF::RecordLength * 2 + 3] = 0x01; + GOFFData[GOFF::RecordLength * 2 + 7] = 0x02; // ESDID. + GOFFData[GOFF::RecordLength * 2 + 11] = 0x01; // Parent ESDID. + GOFFData[GOFF::RecordLength * 2 + 27] = 0x08; // Length. + GOFFData[GOFF::RecordLength * 2 + 40] = 0x01; // Name Space ID. + GOFFData[GOFF::RecordLength * 2 + 41] = 0x80; + GOFFData[GOFF::RecordLength * 2 + 60] = 0x04; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 61] = 0x04; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 63] = 0x0a; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 66] = 0x03; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 71] = 0x08; // Size of symbol name. + GOFFData[GOFF::RecordLength * 2 + 72] = 0xc3; // Symbol name is c. + GOFFData[GOFF::RecordLength * 2 + 73] = 0x6d; // Symbol name is _. + GOFFData[GOFF::RecordLength * 2 + 74] = 0xc3; // Symbol name is c. + GOFFData[GOFF::RecordLength * 2 + 75] = 0xd6; // Symbol name is o. + GOFFData[GOFF::RecordLength * 2 + 76] = 0xc4; // Symbol name is D. + GOFFData[GOFF::RecordLength * 2 + 77] = 0xc5; // Symbol name is E. + GOFFData[GOFF::RecordLength * 2 + 78] = 0xf6; // Symbol name is 6. + GOFFData[GOFF::RecordLength * 2 + 79] = 0xf4; // Symbol name is 4. + + // ESD record. + GOFFData[GOFF::RecordLength * 3] = 0x03; + GOFFData[GOFF::RecordLength * 3 + 3] = 0x02; + GOFFData[GOFF::RecordLength * 3 + 7] = 0x03; // ESDID. + GOFFData[GOFF::RecordLength * 3 + 11] = 0x02; // Parent ESDID. + GOFFData[GOFF::RecordLength * 3 + 71] = 0x05; // Size of symbol name. + GOFFData[GOFF::RecordLength * 3 + 72] = 0xa5; // Symbol name is v. + GOFFData[GOFF::RecordLength * 3 + 73] = 0x81; // Symbol name is a. + GOFFData[GOFF::RecordLength * 3 + 74] = 0x99; // Symbol name is r. + GOFFData[GOFF::RecordLength * 3 + 75] = 0x7b; // Symbol name is #. + GOFFData[GOFF::RecordLength * 3 + 76] = 0x83; // Symbol name is c. + + // TXT record. + GOFFData[GOFF::RecordLength * 4] = 0x03; + GOFFData[GOFF::RecordLength * 4 + 1] = 0x10; + GOFFData[GOFF::RecordLength * 4 + 7] = 0x02; + GOFFData[GOFF::RecordLength * 4 + 23] = 0x08; // Data Length. + GOFFData[GOFF::RecordLength * 4 + 24] = 0x12; + GOFFData[GOFF::RecordLength * 4 + 25] = 0x34; + GOFFData[GOFF::RecordLength * 4 + 26] = 0x56; + GOFFData[GOFF::RecordLength * 4 + 27] = 0x78; + GOFFData[GOFF::RecordLength * 4 + 28] = 0x9a; + GOFFData[GOFF::RecordLength * 4 + 29] = 0xbc; + GOFFData[GOFF::RecordLength * 4 + 30] = 0xde; + GOFFData[GOFF::RecordLength * 4 + 31] = 0xf0; + + // END record. + GOFFData[GOFF::RecordLength * 5] = 0x03; + GOFFData[GOFF::RecordLength * 5 + 1] = 0x40; + GOFFData[GOFF::RecordLength * 5 + 11] = 0x06; + + StringRef Data(GOFFData, GOFF::RecordLength * 6); + + Expected> GOFFObjOrErr = + object::ObjectFile::createGOFFObjectFile( + MemoryBufferRef(Data, "dummyGOFF")); + + ASSERT_THAT_EXPECTED(GOFFObjOrErr, Succeeded()); + + GOFFObjectFile *GOFFObj = dyn_cast((*GOFFObjOrErr).get()); + auto Symbols = GOFFObj->symbols(); + ASSERT_EQ(std::distance(Symbols.begin(), Symbols.end()), 1); + SymbolRef Symbol = *Symbols.begin(); + Expected SymbolNameOrErr = GOFFObj->getSymbolName(Symbol); + ASSERT_THAT_EXPECTED(SymbolNameOrErr, Succeeded()); + StringRef SymbolName = SymbolNameOrErr.get(); + EXPECT_EQ(SymbolName, "var#c"); + + auto Sections = GOFFObj->sections(); + ASSERT_EQ(std::distance(Sections.begin(), Sections.end()), 1); + SectionRef Section = *Sections.begin(); + Expected SectionContent = Section.getContents(); + ASSERT_THAT_EXPECTED(SectionContent, Succeeded()); + StringRef Contents = SectionContent.get(); + EXPECT_EQ(Contents, "\x12\x34\x56\x78\x9a\xbc\xde\xf0"); +} diff --git a/llvm/unittests/Option/OptionMarshallingTest.cpp b/llvm/unittests/Option/OptionMarshallingTest.cpp index 339d825c2016b..0464e27d5248a 100644 --- a/llvm/unittests/Option/OptionMarshallingTest.cpp +++ b/llvm/unittests/Option/OptionMarshallingTest.cpp @@ -19,9 +19,9 @@ struct OptionWithMarshallingInfo { static const OptionWithMarshallingInfo MarshallingTable[] = { #define OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, PREFIXED_NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ - VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES, SHOULD_PARSE, ALWAYS_EMIT, \ - KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, \ - DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ + VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ + SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, IMPLIED_CHECK, \ + IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, TABLE_INDEX) \ {PREFIXED_NAME, #KEYPATH, #IMPLIED_CHECK, #IMPLIED_VALUE}, #include "Opts.inc" #undef OPTION_WITH_MARSHALLING diff --git a/llvm/unittests/Support/RISCVISAInfoTest.cpp b/llvm/unittests/Support/RISCVISAInfoTest.cpp index a331e6a74ceb6..67012d2e6dc72 100644 --- a/llvm/unittests/Support/RISCVISAInfoTest.cpp +++ b/llvm/unittests/Support/RISCVISAInfoTest.cpp @@ -764,6 +764,7 @@ R"(All available -march extensions for RISC-V zihintntl 1.0 zihintpause 2.0 zihpm 2.0 + zimop 1.0 zmmul 1.0 za128rs 1.0 za64rs 1.0 @@ -779,6 +780,7 @@ R"(All available -march extensions for RISC-V zcd 1.0 zce 1.0 zcf 1.0 + zcmop 1.0 zcmp 1.0 zcmt 1.0 zba 1.0 @@ -890,13 +892,11 @@ R"(All available -march extensions for RISC-V Experimental extensions zicfilp 0.4 This is a long dummy description zicfiss 0.4 - zimop 0.1 zaamo 0.2 zabha 1.0 zalasr 0.1 zalrsc 0.2 zfbfmin 1.0 - zcmop 0.2 ztso 0.1 zvfbfmin 1.0 zvfbfwma 1.0 diff --git a/llvm/unittests/Support/raw_socket_stream_test.cpp b/llvm/unittests/Support/raw_socket_stream_test.cpp index 6903862e54031..a8536228666db 100644 --- a/llvm/unittests/Support/raw_socket_stream_test.cpp +++ b/llvm/unittests/Support/raw_socket_stream_test.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #ifdef _WIN32 #include "llvm/Support/Windows/WindowsSupport.h" @@ -32,10 +33,10 @@ TEST(raw_socket_streamTest, CLIENT_TO_SERVER_AND_SERVER_TO_CLIENT) { GTEST_SKIP(); SmallString<100> SocketPath; - llvm::sys::fs::createUniquePath("test_raw_socket_stream.sock", SocketPath, - true); + llvm::sys::fs::createUniquePath("client_server_comms.sock", SocketPath, true); - char Bytes[8]; + // Make sure socket file does not exist. May still be there from the last test + std::remove(SocketPath.c_str()); Expected MaybeServerListener = ListeningSocket::createUnix(SocketPath); @@ -58,6 +59,7 @@ TEST(raw_socket_streamTest, CLIENT_TO_SERVER_AND_SERVER_TO_CLIENT) { Client << "01234567"; Client.flush(); + char Bytes[8]; ssize_t BytesRead = Server.read(Bytes, 8); std::string string(Bytes, 8); @@ -65,4 +67,67 @@ TEST(raw_socket_streamTest, CLIENT_TO_SERVER_AND_SERVER_TO_CLIENT) { ASSERT_EQ(8, BytesRead); ASSERT_EQ("01234567", string); } -} // namespace \ No newline at end of file + +TEST(raw_socket_streamTest, TIMEOUT_PROVIDED) { + if (!hasUnixSocketSupport()) + GTEST_SKIP(); + + SmallString<100> SocketPath; + llvm::sys::fs::createUniquePath("timout_provided.sock", SocketPath, true); + + // Make sure socket file does not exist. May still be there from the last test + std::remove(SocketPath.c_str()); + + Expected MaybeServerListener = + ListeningSocket::createUnix(SocketPath); + ASSERT_THAT_EXPECTED(MaybeServerListener, llvm::Succeeded()); + ListeningSocket ServerListener = std::move(*MaybeServerListener); + + std::chrono::milliseconds Timeout = std::chrono::milliseconds(100); + Expected> MaybeServer = + ServerListener.accept(Timeout); + + ASSERT_THAT_EXPECTED(MaybeServer, Failed()); + llvm::Error Err = MaybeServer.takeError(); + llvm::handleAllErrors(std::move(Err), [&](const llvm::StringError &SE) { + std::error_code EC = SE.convertToErrorCode(); + ASSERT_EQ(EC, std::errc::timed_out); + }); +} + +TEST(raw_socket_streamTest, FILE_DESCRIPTOR_CLOSED) { + if (!hasUnixSocketSupport()) + GTEST_SKIP(); + + SmallString<100> SocketPath; + llvm::sys::fs::createUniquePath("fd_closed.sock", SocketPath, true); + + // Make sure socket file does not exist. May still be there from the last test + std::remove(SocketPath.c_str()); + + Expected MaybeServerListener = + ListeningSocket::createUnix(SocketPath); + ASSERT_THAT_EXPECTED(MaybeServerListener, llvm::Succeeded()); + ListeningSocket ServerListener = std::move(*MaybeServerListener); + + // Create a separate thread to close the socket after a delay. Simulates a + // signal handler calling ServerListener::shutdown + std::thread CloseThread([&]() { + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + ServerListener.shutdown(); + }); + + Expected> MaybeServer = + ServerListener.accept(); + + // Wait for the CloseThread to finish + CloseThread.join(); + + ASSERT_THAT_EXPECTED(MaybeServer, Failed()); + llvm::Error Err = MaybeServer.takeError(); + llvm::handleAllErrors(std::move(Err), [&](const llvm::StringError &SE) { + std::error_code EC = SE.convertToErrorCode(); + ASSERT_EQ(EC, std::errc::operation_canceled); + }); +} +} // namespace diff --git a/llvm/unittests/TextAPI/TextStubV5Tests.cpp b/llvm/unittests/TextAPI/TextStubV5Tests.cpp index c77d13ef8f230..62fdd79ae4970 100644 --- a/llvm/unittests/TextAPI/TextStubV5Tests.cpp +++ b/llvm/unittests/TextAPI/TextStubV5Tests.cpp @@ -722,7 +722,7 @@ TEST(TBDv5, WriteFile) { File.setInstallName("@rpath/S/L/F/Foo.framework/Foo"); File.setCurrentVersion(PackedVersion(1, 2, 0)); File.setCompatibilityVersion(PackedVersion(1, 1, 0)); - File.addRPath(AllTargets[0], "@executable_path/.../Frameworks"); + File.addRPath("@executable_path/.../Frameworks", AllTargets[0]); for (const auto &Targ : AllTargets) { File.addParentUmbrella(Targ, "System"); @@ -897,7 +897,7 @@ TEST(TBDv5, WriteMultipleDocuments) { NestedFile.setTwoLevelNamespace(); NestedFile.setApplicationExtensionSafe(false); NestedFile.setCurrentVersion(PackedVersion(2, 1, 1)); - NestedFile.addRPath(AllTargets[0], "@executable_path/.../Frameworks"); + NestedFile.addRPath("@executable_path/.../Frameworks", AllTargets[0]); for (const auto &Targ : AllTargets) NestedFile.addReexportedLibrary("@rpath/libfoo.dylib", Targ); NestedFile.addSymbol(EncodeKind::GlobalSymbol, "_funcFoo", AllTargets, diff --git a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp index 8554d1a33cade..dbc1e21527855 100644 --- a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp @@ -673,6 +673,11 @@ TEST(CodeMoverUtils, IsSafeToMoveTest4) { // Can move as %add2 and %sub2 are control flow equivalent, // although %add2 does not strictly dominate %sub2. EXPECT_TRUE(isSafeToMoveBefore(*SubInst2, *AddInst2, DT, &PDT, &DI)); + + BasicBlock *BB0 = getBasicBlockByName(F, "if.then.first"); + BasicBlock *BB1 = getBasicBlockByName(F, "if.then.second"); + EXPECT_TRUE( + isSafeToMoveBefore(*BB0, *BB1->getTerminator(), DT, &PDT, &DI)); }); } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 02e7ca341fe22..73687846e281a 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -972,11 +972,13 @@ TEST(VPRecipeTest, CastVPBlendRecipeToVPUser) { IntegerType *Int32 = IntegerType::get(C, 32); auto *Phi = PHINode::Create(Int32, 1); - VPValue Op1; - VPValue Op2; + VPValue I1; + VPValue I2; + VPValue M2; SmallVector Args; - Args.push_back(&Op1); - Args.push_back(&Op2); + Args.push_back(&I1); + Args.push_back(&I2); + Args.push_back(&M2); VPBlendRecipe Recipe(Phi, Args); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; @@ -1119,7 +1121,7 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { VPValue VecOp; VPValue CondOp; VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, &ChainOp, &CondOp, - &VecOp); + &VecOp, false); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1287,7 +1289,7 @@ TEST(VPRecipeTest, CastVPReductionRecipeToVPUser) { VPValue VecOp; VPValue CondOp; VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, &ChainOp, &CondOp, - &VecOp); + &VecOp, false); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index 076d0427a8597..7a5d2be3ae95b 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -3858,8 +3858,10 @@ void CodeGenDAGPatterns::parseInstructionPattern(CodeGenInstruction &CGI, for (unsigned i = NumResults, e = CGI.Operands.size(); i != e; ++i) { CGIOperandList::OperandInfo &Op = CGI.Operands[i]; const std::string &OpName = Op.Name; - if (OpName.empty()) + if (OpName.empty()) { I.error("Operand #" + Twine(i) + " in operands list has no name!"); + continue; + } if (!InstInputs.count(OpName)) { // If this is an operand with a DefaultOps set filled in, we can ignore @@ -3872,16 +3874,19 @@ void CodeGenDAGPatterns::parseInstructionPattern(CodeGenInstruction &CGI, } I.error("Operand $" + OpName + " does not appear in the instruction pattern"); + continue; } TreePatternNodePtr InVal = InstInputs[OpName]; InstInputs.erase(OpName); // It occurred, remove from map. if (InVal->isLeaf() && isa(InVal->getLeafValue())) { Record *InRec = cast(InVal->getLeafValue())->getDef(); - if (!checkOperandClass(Op, InRec)) + if (!checkOperandClass(Op, InRec)) { I.error("Operand $" + OpName + "'s register class disagrees" " between the operand and pattern"); + continue; + } } Operands.push_back(Op.Rec); diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp index 6334af53f88f6..4fb1ca18ac11d 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptParserEmitter.cpp @@ -191,6 +191,62 @@ static MarshallingInfo createMarshallingInfo(const Record &R) { return Ret; } +static void EmitHelpTextsForVariants( + raw_ostream &OS, std::vector, StringRef>> + HelpTextsForVariants) { + // OptTable must be constexpr so it uses std::arrays with these capacities. + const unsigned MaxVisibilityPerHelp = 2; + const unsigned MaxVisibilityHelp = 1; + + assert(HelpTextsForVariants.size() <= MaxVisibilityHelp && + "Too many help text variants to store in " + "OptTable::HelpTextsForVariants"); + + // This function must initialise any unused elements of those arrays. + for (auto [Visibilities, _] : HelpTextsForVariants) + while (Visibilities.size() < MaxVisibilityPerHelp) + Visibilities.push_back("0"); + + while (HelpTextsForVariants.size() < MaxVisibilityHelp) + HelpTextsForVariants.push_back( + {std::vector(MaxVisibilityPerHelp, "0"), ""}); + + OS << ", (std::array, const char*>, " << MaxVisibilityHelp << ">{{ "; + + auto VisibilityHelpEnd = HelpTextsForVariants.cend(); + for (auto VisibilityHelp = HelpTextsForVariants.cbegin(); + VisibilityHelp != VisibilityHelpEnd; ++VisibilityHelp) { + auto [Visibilities, Help] = *VisibilityHelp; + + assert(Visibilities.size() <= MaxVisibilityPerHelp && + "Too many visibilities to store in an " + "OptTable::HelpTextsForVariants entry"); + OS << "std::make_pair(std::array{{"; + + auto VisibilityEnd = Visibilities.cend(); + for (auto Visibility = Visibilities.cbegin(); Visibility != VisibilityEnd; + ++Visibility) { + OS << *Visibility; + if (std::next(Visibility) != VisibilityEnd) + OS << ", "; + } + + OS << "}}, "; + + if (Help.size()) + write_cstring(OS, Help); + else + OS << "nullptr"; + OS << ")"; + + if (std::next(VisibilityHelp) != VisibilityHelpEnd) + OS << ", "; + } + OS << " }})"; +} + /// OptParserEmitter - This tablegen backend takes an input .td file /// describing a list of options and emits a data structure for parsing and /// working with those options when given an input command line. @@ -312,6 +368,9 @@ static void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { } else OS << ", nullptr"; + // Not using Visibility specific text for group help. + EmitHelpTextsForVariants(OS, {}); + // The option meta-variable name (unused). OS << ", nullptr"; @@ -410,6 +469,22 @@ static void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { } else OS << ", nullptr"; + std::vector, StringRef>> + HelpTextsForVariants; + for (Record *VisibilityHelp : + R.getValueAsListOfDefs("HelpTextsForVariants")) { + ArrayRef Visibilities = + VisibilityHelp->getValueAsListInit("Visibilities")->getValues(); + + std::vector VisibilityNames; + for (Init *Visibility : Visibilities) + VisibilityNames.push_back(Visibility->getAsUnquotedString()); + + HelpTextsForVariants.push_back(std::make_pair( + VisibilityNames, VisibilityHelp->getValueAsString("Text"))); + } + EmitHelpTextsForVariants(OS, HelpTextsForVariants); + // The option meta-variable name. OS << ", "; if (!isa(R.getValueInit("MetaVarName"))) diff --git a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def index 77cf65be68425..665a394f57a6a 100644 --- a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def +++ b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def @@ -197,12 +197,12 @@ ENTRY(VPUNPCKLQDQZ128rm, VPUNPCKLQDQrm) ENTRY(VPUNPCKLQDQZ128rr, VPUNPCKLQDQrr) ENTRY(VPXORQZ128rm, VPXORrm) ENTRY(VPXORQZ128rr, VPXORrr) -ENTRY(VRNDSCALEPDZ128rmi, VROUNDPDm) -ENTRY(VRNDSCALEPDZ128rri, VROUNDPDr) -ENTRY(VRNDSCALESDZm, VROUNDSDm) -ENTRY(VRNDSCALESDZm_Int, VROUNDSDm_Int) -ENTRY(VRNDSCALESDZr, VROUNDSDr) -ENTRY(VRNDSCALESDZr_Int, VROUNDSDr_Int) +ENTRY(VRNDSCALEPDZ128rmi, VROUNDPDmi) +ENTRY(VRNDSCALEPDZ128rri, VROUNDPDri) +ENTRY(VRNDSCALESDZm, VROUNDSDmi) +ENTRY(VRNDSCALESDZm_Int, VROUNDSDmi_Int) +ENTRY(VRNDSCALESDZr, VROUNDSDri) +ENTRY(VRNDSCALESDZr_Int, VROUNDSDri_Int) ENTRY(VSHUFPDZ128rmi, VSHUFPDrmi) ENTRY(VSHUFPDZ128rri, VSHUFPDrri) ENTRY(VSQRTPDZ128m, VSQRTPDm) @@ -306,8 +306,8 @@ ENTRY(VPUNPCKLQDQZ256rm, VPUNPCKLQDQYrm) ENTRY(VPUNPCKLQDQZ256rr, VPUNPCKLQDQYrr) ENTRY(VPXORQZ256rm, VPXORYrm) ENTRY(VPXORQZ256rr, VPXORYrr) -ENTRY(VRNDSCALEPDZ256rmi, VROUNDPDYm) -ENTRY(VRNDSCALEPDZ256rri, VROUNDPDYr) +ENTRY(VRNDSCALEPDZ256rmi, VROUNDPDYmi) +ENTRY(VRNDSCALEPDZ256rri, VROUNDPDYri) ENTRY(VSHUFPDZ256rmi, VSHUFPDYrmi) ENTRY(VSHUFPDZ256rri, VSHUFPDYrri) ENTRY(VSQRTPDZ256m, VSQRTPDYm) diff --git a/llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn index 5fead24afb451..dc85fb0641211 100644 --- a/llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn @@ -12,7 +12,6 @@ static_library("Profile") { "DataAggregator.cpp", "DataReader.cpp", "Heatmap.cpp", - "ProfileReaderBase.cpp", "StaleProfileMatching.cpp", "YAMLProfileReader.cpp", "YAMLProfileWriter.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn index 33fdecfd948e1..59dc38c8c4d8a 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn @@ -25,6 +25,7 @@ static_library("readability") { "DeleteNullPointerCheck.cpp", "DuplicateIncludeCheck.cpp", "ElseAfterReturnCheck.cpp", + "EnumInitialValueCheck.cpp", "FunctionCognitiveComplexityCheck.cpp", "FunctionSizeCheck.cpp", "IdentifierLengthCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn index da3a37d461530..adcebcab7ef7d 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/utils/BUILD.gn @@ -13,6 +13,7 @@ static_library("utils") { sources = [ "ASTUtils.cpp", "Aliasing.cpp", + "BracesAroundStatement.cpp", "DeclRefExprUtils.cpp", "DesignatedInitializers.cpp", "ExceptionAnalyzer.cpp", diff --git a/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn b/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn index b18b109dcff56..39ff9d0f18fa1 100644 --- a/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/include/clang/Basic/BUILD.gn @@ -68,16 +68,6 @@ clang_tablegen("AttrList") { td_file = "Attr.td" } -clang_tablegen("AttrLeftSideCanPrintList") { - args = [ "-gen-clang-attr-can-print-left-list" ] - td_file = "Attr.td" -} - -clang_tablegen("AttrLeftSideMustPrintList") { - args = [ "-gen-clang-attr-must-print-left-list" ] - td_file = "Attr.td" -} - clang_tablegen("AttrSubMatchRulesList") { args = [ "-gen-clang-attr-subject-match-rule-list" ] td_file = "Attr.td" diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn index a405878fecb2c..0cf99256c9bdb 100644 --- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn @@ -127,6 +127,7 @@ static_library("AST") { "ODRDiagsEmitter.cpp", "ODRHash.cpp", "OSLog.cpp", + "OpenACCClause.cpp", "OpenMPClause.cpp", "ParentMap.cpp", "ParentMapContext.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn index bbe9373016204..05504ddb79f87 100644 --- a/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Basic/BUILD.gn @@ -21,8 +21,6 @@ static_library("Basic") { configs += [ "//llvm/utils/gn/build:clang_code" ] public_deps = [ # public_dep because public header Version.h includes generated Version.inc. - "//clang/include/clang/Basic:AttrLeftSideCanPrintList", - "//clang/include/clang/Basic:AttrLeftSideMustPrintList", "//clang/include/clang/Basic:AttrList", "//clang/include/clang/Basic:AttrSubMatchRulesList", "//clang/include/clang/Basic:Builtins", diff --git a/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn index 2f43a915b2a66..9f7749d6847c2 100644 --- a/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/InstallAPI/BUILD.gn @@ -8,6 +8,7 @@ static_library("InstallAPI") { "//llvm/lib/TextAPI", ] sources = [ + "DiagnosticBuilderWrappers.cpp", "DylibVerifier.cpp", "FileList.cpp", "Frontend.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn index b03032300a764..e9ba5fb132b02 100644 --- a/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Sema/BUILD.gn @@ -54,6 +54,7 @@ static_library("Sema") { "SemaAccess.cpp", "SemaAttr.cpp", "SemaAvailability.cpp", + "SemaBase.cpp", "SemaCUDA.cpp", "SemaCXXScopeSpec.cpp", "SemaCast.cpp", diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 8a2ab18bf953a..66e8084d5808a 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -352,6 +352,7 @@ if (current_toolchain == default_toolchain) { "__chrono/formatter.h", "__chrono/hh_mm_ss.h", "__chrono/high_resolution_clock.h", + "__chrono/leap_second.h", "__chrono/literals.h", "__chrono/month.h", "__chrono/month_weekday.h", @@ -778,12 +779,12 @@ if (current_toolchain == default_toolchain) { "__tree", "__tuple/find_index.h", "__tuple/make_tuple_types.h", - "__tuple/pair_like.h", "__tuple/sfinae_helpers.h", "__tuple/tuple_element.h", "__tuple/tuple_indices.h", "__tuple/tuple_like.h", "__tuple/tuple_like_ext.h", + "__tuple/tuple_like_no_subrange.h", "__tuple/tuple_size.h", "__tuple/tuple_types.h", "__type_traits/add_const.h", @@ -806,6 +807,7 @@ if (current_toolchain == default_toolchain) { "__type_traits/datasizeof.h", "__type_traits/decay.h", "__type_traits/dependent_type.h", + "__type_traits/desugars_to.h", "__type_traits/disjunction.h", "__type_traits/enable_if.h", "__type_traits/extent.h", @@ -890,7 +892,6 @@ if (current_toolchain == default_toolchain) { "__type_traits/nat.h", "__type_traits/negation.h", "__type_traits/noexcept_move_assign_container.h", - "__type_traits/operation_traits.h", "__type_traits/promote.h", "__type_traits/rank.h", "__type_traits/remove_all_extents.h", diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index 55309720725b4..90f6f5d0f1458 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -315,6 +315,7 @@ if (libcxx_enable_experimental) { sources = [ "experimental/keep.cpp" ] if (libcxx_enable_filesystem && libcxx_enable_time_zone_database) { sources += [ + "include/tzdb/leap_second_private.h", "include/tzdb/time_zone_link_private.h", "include/tzdb/time_zone_private.h", "include/tzdb/types_private.h", diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index e5fb529b455fc..80a91507fcc69 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -122,6 +122,7 @@ write_cmake_config("config") { "HOST_LINK_VERSION=", "LIBPFM_HAS_FIELD_CYCLES=", "LLVM_TARGET_TRIPLE_ENV=", + "LLVM_VERSION_PRINTER_SHOW_BUILD_CONFIG=1", "LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO=1", "LLVM_WINDOWS_PREFER_FORWARD_SLASH=", "PACKAGE_BUGREPORT=https://github.com/llvm/llvm-project/issues/", diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 27f7e2b395ec0..4f03e01c39c1e 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -65,8 +65,6 @@ static_library("Analysis") { "InstructionPrecedenceTracking.cpp", "InstructionSimplify.cpp", "InteractiveModelRunner.cpp", - "Interval.cpp", - "IntervalPartition.cpp", "LazyBlockFrequencyInfo.cpp", "LazyBranchProbabilityInfo.cpp", "LazyCallGraph.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn index 15766d4a7e346..fba81184d9920 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn @@ -17,6 +17,7 @@ static_library("GlobalISel") { "CallLowering.cpp", "Combiner.cpp", "CombinerHelper.cpp", + "CombinerHelperVectorOps.cpp", "GIMatchTableExecutor.cpp", "GISelChangeObserver.cpp", "GISelKnownBits.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/TextAPI/BinaryReader/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/TextAPI/BinaryReader/BUILD.gn index 3eeb32aae4e3c..24130bc2e08fc 100644 --- a/llvm/utils/gn/secondary/llvm/lib/TextAPI/BinaryReader/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/TextAPI/BinaryReader/BUILD.gn @@ -1,6 +1,7 @@ static_library("BinaryReader") { output_name = "LLVMTextAPIBinaryReader" deps = [ + "//llvm/lib/DebugInfo/DWARF", "//llvm/lib/Object", "//llvm/lib/Support", "//llvm/lib/TargetParser", diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn index 131308db2aa55..d79b5efe69eb0 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Instrumentation/BUILD.gn @@ -23,13 +23,13 @@ static_library("Instrumentation") { "InstrProfiling.cpp", "Instrumentation.cpp", "KCFI.cpp", + "LowerAllowCheckPass.cpp", "MemProfiler.cpp", "MemorySanitizer.cpp", "PGOForceFunctionAttrs.cpp", "PGOInstrumentation.cpp", "PGOMemOPSizeOpt.cpp", "PoisonChecking.cpp", - "RemoveTrapsPass.cpp", "SanitizerBinaryMetadata.cpp", "SanitizerCoverage.cpp", "ThreadSanitizer.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn index 15c198c73f941..edf06df4f6091 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn @@ -52,6 +52,7 @@ unittest("ADTTests") { "ImmutableMapTest.cpp", "ImmutableSetTest.cpp", "IntEqClassesTest.cpp", + "Interleave.cpp", "IntervalMapTest.cpp", "IntervalTreeTest.cpp", "IntrusiveRefCntPtrTest.cpp", diff --git a/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake b/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake index fff0424593fb9..7bc13287cd655 100644 --- a/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake +++ b/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake @@ -41,19 +41,18 @@ function(check_hwcap hwcap_spec output) string(REPLACE "" ${hwcap_vec} hwcap_test_src "${hwcap_test_src}") string(REPLACE "" ${hwcap_spec} hwcap_test_src "${hwcap_test_src}") - set(hwcap_test_file ${CMAKE_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/hwcap_check.c) + set(hwcap_test_file ${CMAKE_BINARY_DIR}/temp/hwcap_check.c) file(WRITE ${hwcap_test_file} "${hwcap_test_src}") # Compile _and_ run try_run( test_run_result test_compile_result - ${CMAKE_BINARY_DIR} - ${hwcap_test_file} + "${CMAKE_BINARY_DIR}" + "${hwcap_test_file}" ) # Compilation will fail if hwcap_spec is not defined - this usually means # that your Linux kernel is too old. if(${test_compile_result} AND (DEFINED test_run_result)) - message(${test_run_result}) message(STATUS "Checking whether ${hwcap_spec} is supported by the host system: ${test_run_result}") set(${output} ${test_run_result} PARENT_SCOPE) else() diff --git a/mlir/docs/DefiningDialects/AttributesAndTypes.md b/mlir/docs/DefiningDialects/AttributesAndTypes.md index 950acb842022d..d6941c0b681f8 100644 --- a/mlir/docs/DefiningDialects/AttributesAndTypes.md +++ b/mlir/docs/DefiningDialects/AttributesAndTypes.md @@ -14,8 +14,8 @@ from the [MLIR LangRef](../LangRef.md). Attributes are the mechanism for specifying constant data on operations in places where a variable is never allowed - e.g. the comparison predicate of a -[`arith.cmpi` operation](../Dialects/ArithOps.md#arithcmpi-mlirarithcmpiop), or -the underlying value of a [`arith.constant` operation](../Dialects/ArithOps.md#arithconstant-mlirarithconstantop). +[`arith.cmpi` operation](../Dialects/ArithOps.md/#arithcmpi-arithcmpiop), or +the underlying value of a [`arith.constant` operation](../Dialects/ArithOps.md/#arithconstant-arithconstantop). Each operation has an attribute dictionary, which associates a set of attribute names to attribute values. @@ -24,7 +24,7 @@ names to attribute values. Every SSA value, such as operation results or block arguments, in MLIR has a type defined by the type system. MLIR has an open type system with no fixed list of types, and there are no restrictions on the abstractions they represent. For example, take -the following [Arithmetic AddI operation](../Dialects/ArithOps.md#arithaddi-mlirarithaddiop): +the following [Arithmetic AddI operation](../Dialects/ArithOps.md/#arithaddi-arithaddiop): ```mlir %result = arith.addi %lhs, %rhs : i64 @@ -32,7 +32,7 @@ the following [Arithmetic AddI operation](../Dialects/ArithOps.md#arithaddi-mlir It takes two input SSA values (`%lhs` and `%rhs`), and returns a single SSA value (`%result`). The inputs and outputs of this operation are of type `i64`, -which is an instance of the [Builtin IntegerType](../Dialects/Builtin.md#integertype). +which is an instance of the [Builtin IntegerType](../Dialects/Builtin.md/#integertype). ## Attributes and Types diff --git a/mlir/docs/DefiningDialects/_index.md b/mlir/docs/DefiningDialects/_index.md index 5a3993508fce8..83070b616d0ef 100644 --- a/mlir/docs/DefiningDialects/_index.md +++ b/mlir/docs/DefiningDialects/_index.md @@ -10,8 +10,8 @@ Before diving into how to define these constructs, below is a quick refresher from the [MLIR LangRef](../LangRef.md). Dialects are the mechanism by which to engage with and extend the MLIR -ecosystem. They allow for defining new [attributes](../LangRef.md#attributes), -[operations](../LangRef.md#operations), and [types](../LangRef.md#type-system). +ecosystem. They allow for defining new [attributes](../LangRef.md/#attributes), +[operations](../LangRef.md/#operations), and [types](../LangRef.md/#type-system). Dialects are used to model a variety of different abstractions; from traditional [arithmetic](../Dialects/ArithOps.md) to [pattern rewrites](../Dialects/PDLOps.md); and is one of the most fundamental @@ -196,7 +196,7 @@ only the declaration of the destructor is generated for the Dialect class. ### Discardable Attribute Verification -As described by the [MLIR Language Reference](../LangRef.md#attributes), +As described by the [MLIR Language Reference](../LangRef.md/#attributes), *discardable attribute* are a type of attribute that has its semantics defined by the dialect whose name prefixes that of the attribute. For example, if an operation has an attribute named `gpu.contained_module`, the `gpu` dialect @@ -270,7 +270,7 @@ void *MyDialect::getRegisteredInterfaceForOp(TypeID typeID, StringAttr opName); ``` For a more detail description of the expected usages of this hook, view the detailed -[interface documentation](../Interfaces.md#dialect-fallback-for-opinterface). +[interface documentation](../Interfaces.md/#dialect-fallback-for-opinterface). ### Default Attribute/Type Parsers and Printers diff --git a/mlir/docs/Dialects/Linalg/_index.md b/mlir/docs/Dialects/Linalg/_index.md index c8fa0374334ec..fbd1a451dc094 100644 --- a/mlir/docs/Dialects/Linalg/_index.md +++ b/mlir/docs/Dialects/Linalg/_index.md @@ -279,7 +279,7 @@ loops but previous experience shows that the abstraction generalizes. A `linalg.generic` op has a compute payload that is fully generic thanks to the use of -[Regions](https://github.com/llvm/llvm-project/blob/58265ad42a90ae8905be6a447cb42e53529a54a0/mlir/docs/LangRef.md#regions). +[Regions](https://github.com/llvm/llvm-project/blob/58265ad42a90ae8905be6a447cb42e53529a54a0/mlir/docs/LangRef.md/#regions). The region takes as arguments the scalar elemental types of the tensor or buffer operands of the `linalg.generic`. For flexibility and ability to match library diff --git a/mlir/docs/Dialects/Transform.md b/mlir/docs/Dialects/Transform.md index 768de9561b951..02f4a766ff429 100644 --- a/mlir/docs/Dialects/Transform.md +++ b/mlir/docs/Dialects/Transform.md @@ -84,11 +84,11 @@ try to be explicitly typed when possible. The transform IR values have transform IR types, which should implement exactly one of: - * [TransformHandleTypeInterface](Transform.md#transformhandletypeinterface-transformhandletypeinterface), + * [TransformHandleTypeInterface](#transformhandletypeinterface-transformhandletypeinterface), - * [TransformValueHandleTypeInterface](Transform.md#transformvaluehandletypeinterface-transformvaluehandletypeinterface), + * [TransformValueHandleTypeInterface](#transformvaluehandletypeinterface-transformvaluehandletypeinterface), - * [TransformParamTypeInterface](Transform.md##transformparamtypeinterface-transformparamtypeinterface). + * [TransformParamTypeInterface](#transformparamtypeinterface-transformparamtypeinterface). The goal of these type interfaces, beyond providing a common base for accepted types, is to verify the properties of the associated objects. For example, a diff --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md index 8efc88815b887..21cfdc78a6a43 100644 --- a/mlir/docs/LangRef.md +++ b/mlir/docs/LangRef.md @@ -761,7 +761,7 @@ attribute-value ::= attribute-alias | dialect-attribute | builtin-attribute Attributes are the mechanism for specifying constant data on operations in places where a variable is never allowed - e.g. the comparison predicate of a -[`cmpi` operation](Dialects/ArithOps.md#arithcmpi-mlirarithcmpiop). Each operation has an +[`cmpi` operation](Dialects/ArithOps.md/#arithcmpi-arithcmpiop). Each operation has an attribute dictionary, which associates a set of attribute names to attribute values. MLIR's builtin dialect provides a rich set of [builtin attribute values](#builtin-attribute-values) out of the box (such as diff --git a/mlir/docs/Rationale/SideEffectsAndSpeculation.md b/mlir/docs/Rationale/SideEffectsAndSpeculation.md index 0cd5ea59822a0..8b08b757531be 100644 --- a/mlir/docs/Rationale/SideEffectsAndSpeculation.md +++ b/mlir/docs/Rationale/SideEffectsAndSpeculation.md @@ -4,8 +4,8 @@ This document outlines how MLIR models side effects and how speculation works in MLIR. This rationale only applies to operations used in -[CFG regions](../LangRef.md#control-flow-and-ssacfg-regions). Side effect -modeling in [graph regions](../LangRef.md#graph-regions) is TBD. +[CFG regions](../LangRef.md/#control-flow-and-ssacfg-regions). Side effect +modeling in [graph regions](../LangRef.md/#graph-regions) is TBD. [TOC] diff --git a/mlir/docs/TargetLLVMIR.md b/mlir/docs/TargetLLVMIR.md index df281f65768e8..96a4589eb80e7 100644 --- a/mlir/docs/TargetLLVMIR.md +++ b/mlir/docs/TargetLLVMIR.md @@ -41,7 +41,7 @@ they use or produce `memref`-typed values. The process relies on the [Dialect Conversion](DialectConversion.md) infrastructure and, in particular, on the -[materialization](DialectConversion.md#type-conversion) hooks of `TypeConverter` +[materialization](DialectConversion.md/#type-conversion) hooks of `TypeConverter` to support progressive lowering by injecting `unrealized_conversion_cast` operations between converted and unconverted operations. After multiple partial conversions to the LLVM dialect are performed, the cast operations that became @@ -58,7 +58,7 @@ same type converter. #### LLVM Dialect-compatible Types -The types [compatible](Dialects/LLVM.md#built-in-type-compatibility) with the +The types [compatible](Dialects/LLVM.md/#built-in-type-compatibility) with the LLVM dialect are kept as is. #### Complex Type @@ -188,7 +188,7 @@ Function types are converted to LLVM dialect function types as follows: arguments to allow for specifying metadata such as aliasing information on individual pointers; - the conversion of `memref`-typed arguments is subject to - [calling conventions](TargetLLVMIR.md#calling-conventions). + [calling conventions](#calling-conventions). - if a function type has boolean attribute `func.varargs` being set, the converted LLVM function will be variadic. @@ -364,7 +364,7 @@ llvm.func @bar() { The default calling convention converts `memref`-typed function arguments to LLVM dialect literal structs -[defined above](TargetLLVMIR.md#ranked-memref-types) before unbundling them into +[defined above](#ranked-memref-types) before unbundling them into individual scalar arguments. Examples: diff --git a/mlir/docs/Tutorials/Toy/Ch-4.md b/mlir/docs/Tutorials/Toy/Ch-4.md index ae10dc4a0113d..b753ee7a5332f 100644 --- a/mlir/docs/Tutorials/Toy/Ch-4.md +++ b/mlir/docs/Tutorials/Toy/Ch-4.md @@ -383,7 +383,7 @@ void MulOp::inferShapes() { getResult().setType(getLhs().getType()); } At this point, each of the necessary Toy operations provide a mechanism by which to infer their output shapes. The ShapeInferencePass will operate on functions: it will run on each function in isolation. MLIR also supports general -[OperationPasses](../../PassManagement.md#operation-pass) that run on any +[OperationPasses](../../PassManagement.md/#operation-pass) that run on any isolated operation, but here our module only contains functions, so there is no need to generalize to all operations. diff --git a/mlir/docs/Tutorials/transform/Ch4.md b/mlir/docs/Tutorials/transform/Ch4.md index c3159eb991d1e..81263e10b0984 100644 --- a/mlir/docs/Tutorials/transform/Ch4.md +++ b/mlir/docs/Tutorials/transform/Ch4.md @@ -27,7 +27,7 @@ directly within the transform dialect. ## Simple match Let us reconsider the “fully connected layer” example from [Chapter -1](Ch1.md#chaining-transformations-with-handles), reproduced below for +1](Ch1.md/#chaining-transformations-with-handles), reproduced below for convenience. diff --git a/mlir/include/mlir-c/Dialect/LLVM.h b/mlir/include/mlir-c/Dialect/LLVM.h index 4f1d646f5bc83..bd9b7dd26f5e9 100644 --- a/mlir/include/mlir-c/Dialect/LLVM.h +++ b/mlir/include/mlir-c/Dialect/LLVM.h @@ -257,11 +257,19 @@ enum MlirLLVMDIEmissionKind { }; typedef enum MlirLLVMDIEmissionKind MlirLLVMDIEmissionKind; +enum MlirLLVMDINameTableKind { + MlirLLVMDINameTableKindDefault = 0, + MlirLLVMDINameTableKindGNU = 1, + MlirLLVMDINameTableKindNone = 2, + MlirLLVMDINameTableKindApple = 3, +}; +typedef enum MlirLLVMDINameTableKind MlirLLVMDINameTableKind; + /// Creates a LLVM DICompileUnit attribute. MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDICompileUnitAttrGet( MlirContext ctx, MlirAttribute id, unsigned int sourceLanguage, MlirAttribute file, MlirAttribute producer, bool isOptimized, - MlirLLVMDIEmissionKind emissionKind); + MlirLLVMDIEmissionKind emissionKind, MlirLLVMDINameTableKind nameTableKind); /// Creates a LLVM DIFlags attribute. MLIR_CAPI_EXPORTED MlirAttribute mlirLLVMDIFlagsAttrGet(MlirContext ctx, diff --git a/mlir/include/mlir/Dialect/Affine/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h index 723a262f24acc..d143954b78fc1 100644 --- a/mlir/include/mlir/Dialect/Affine/LoopUtils.h +++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h @@ -299,53 +299,8 @@ LogicalResult separateFullTiles(MutableArrayRef nest, SmallVectorImpl *fullTileNest = nullptr); -/// Walk either an scf.for or an affine.for to find a band to coalesce. -template -LogicalResult coalescePerfectlyNestedLoops(LoopOpTy op) { - LogicalResult result(failure()); - SmallVector loops; - getPerfectlyNestedLoops(loops, op); - - // Look for a band of loops that can be coalesced, i.e. perfectly nested - // loops with bounds defined above some loop. - // 1. For each loop, find above which parent loop its operands are - // defined. - SmallVector operandsDefinedAbove(loops.size()); - for (unsigned i = 0, e = loops.size(); i < e; ++i) { - operandsDefinedAbove[i] = i; - for (unsigned j = 0; j < i; ++j) { - if (areValuesDefinedAbove(loops[i].getOperands(), loops[j].getRegion())) { - operandsDefinedAbove[i] = j; - break; - } - } - } - - // 2. Identify bands of loops such that the operands of all of them are - // defined above the first loop in the band. Traverse the nest bottom-up - // so that modifications don't invalidate the inner loops. - for (unsigned end = loops.size(); end > 0; --end) { - unsigned start = 0; - for (; start < end - 1; ++start) { - auto maxPos = - *std::max_element(std::next(operandsDefinedAbove.begin(), start), - std::next(operandsDefinedAbove.begin(), end)); - if (maxPos > start) - continue; - assert(maxPos == start && - "expected loop bounds to be known at the start of the band"); - auto band = llvm::MutableArrayRef(loops.data() + start, end - start); - if (succeeded(coalesceLoops(band))) - result = success(); - break; - } - // If a band was found and transformed, keep looking at the loops above - // the outermost transformed loop. - if (start != end - 1) - end = start + 1; - } - return result; -} +/// Walk an affine.for to find a band to coalesce. +LogicalResult coalescePerfectlyNestedAffineLoops(AffineForOp op); } // namespace affine } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index ead19c69a0831..4e4c6fd601777 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -343,7 +343,9 @@ def Arith_SubIOp : Arith_IntBinaryOpWithOverflowFlags<"subi"> { // MulIOp //===----------------------------------------------------------------------===// -def Arith_MulIOp : Arith_IntBinaryOpWithOverflowFlags<"muli", [Commutative]> { +def Arith_MulIOp : Arith_IntBinaryOpWithOverflowFlags<"muli", + [Commutative, DeclareOpInterfaceMethods] +> { let summary = [{ Integer multiplication operation. }]; diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td index 91bd3702f93b9..cc849cb7c978d 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMAttrDefs.td @@ -350,8 +350,20 @@ def LLVM_DICompileUnitAttr : LLVM_Attr<"DICompileUnit", "di_compile_unit", "DIFileAttr":$file, OptionalParameter<"StringAttr">:$producer, "bool":$isOptimized, - "DIEmissionKind":$emissionKind + "DIEmissionKind":$emissionKind, + OptionalParameter<"DINameTableKind">:$nameTableKind ); + let builders = [ + AttrBuilderWithInferredContext<(ins + "DistinctAttr":$id, "unsigned":$sourceLanguage, "DIFileAttr":$file, + "StringAttr":$producer, "bool":$isOptimized, + "DIEmissionKind":$emissionKind, + CArg<"DINameTableKind", "DINameTableKind::Default">:$nameTableKind + ), [{ + return $_get(id.getContext(), id, sourceLanguage, file, producer, + isOptimized, emissionKind, nameTableKind); + }]> + ]; let assemblyFormat = "`<` struct(params) `>`"; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td index 04d797031245e..a93964abcb42e 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMEnums.td @@ -393,6 +393,26 @@ def DIFlags : I32BitEnumAttr< let printBitEnumPrimaryGroups = 1; } +//===----------------------------------------------------------------------===// +// DINameTableKind +//===----------------------------------------------------------------------===// + +def LLVM_DINameTableDefault : I64EnumAttrCase<"Default", 0>; +def LLVM_DINameTableGNU : I64EnumAttrCase<"GNU", 1>; +def LLVM_DINameTableNone : I64EnumAttrCase<"None", 2>; +def LLVM_DINameTableApple : I64EnumAttrCase<"Apple", 3>; + +def LLVM_DINameTableKind : I64EnumAttr< + "DINameTableKind", + "LLVM debug name table kind", [ + LLVM_DINameTableDefault, + LLVM_DINameTableGNU, + LLVM_DINameTableNone, + LLVM_DINameTableApple, + ]> { + let cppNamespace = "::mlir::LLVM"; +} + //===----------------------------------------------------------------------===// // DISubprogramFlags //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 28526f1a1560c..a52cca3c95dee 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -562,8 +562,10 @@ class LLVM_DbgIntrOp traits = []> }]; } -def LLVM_DbgDeclareOp : LLVM_DbgIntrOp<"dbg.declare", "addr", - [DeclareOpInterfaceMethods]> { +def LLVM_DbgDeclareOp : LLVM_DbgIntrOp<"dbg.declare", "addr", [ + DeclareOpInterfaceMethods]> { let summary = "Describes how the address relates to a source language variable."; let arguments = (ins LLVM_AnyPointer:$addr, diff --git a/mlir/include/mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h b/mlir/include/mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h index cacb241bfd7a1..a4bb380b99b86 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h +++ b/mlir/include/mlir/Dialect/LLVMIR/Transforms/TypeConsistency.h @@ -56,17 +56,6 @@ class SplitStores : public OpRewritePattern { PatternRewriter &rewrite) const override; }; -/// Transforms type-inconsistent stores, aka stores where the type hint of -/// the address contradicts the value stored, by inserting a bitcast if -/// possible. -class BitcastStores : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(StoreOp store, - PatternRewriter &rewriter) const override; -}; - /// Splits GEPs with more than two indices into multiple GEPs with exactly /// two indices. The created GEPs are then guaranteed to index into only /// one aggregate at a time. diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index c260fe3f7a46a..8edaa7db6cef3 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -2138,25 +2138,16 @@ def VectorizeOp : Op:$vector_sizes, + Variadic:$vector_sizes, + DefaultValuedOptionalAttr: + $static_vector_sizes, OptionalAttr:$vectorize_nd_extract, DefaultValuedOptionalAttr: - $scalable_sizes, - DefaultValuedOptionalAttr: - $static_vector_sizes); + $scalable_sizes); let results = (outs); - let assemblyFormat = [{ - $target oilist( - `vector_sizes` custom($vector_sizes, - $static_vector_sizes, - type($vector_sizes), - $scalable_sizes) | - `vectorize_nd_extract` $vectorize_nd_extract - ) - attr-dict - `:` type($target) - }]; + + let hasCustomAssemblyFormat = 1; let hasVerifier = 1; let extraClassDeclaration = [{ diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h new file mode 100644 index 0000000000000..6454076f7593b --- /dev/null +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -0,0 +1,300 @@ +//===-- OpenMPClauseOperands.h ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the structures defining MLIR operands associated with each +// OpenMP clause, and structures grouping the appropriate operands for each +// construct. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_OPENMP_OPENMPCLAUSEOPERANDS_H_ +#define MLIR_DIALECT_OPENMP_OPENMPCLAUSEOPERANDS_H_ + +#include "mlir/IR/BuiltinAttributes.h" +#include "llvm/ADT/SmallVector.h" + +#include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" + +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc" + +namespace mlir { +namespace omp { + +//===----------------------------------------------------------------------===// +// Mixin structures defining MLIR operands associated with each OpenMP clause. +//===----------------------------------------------------------------------===// + +struct AlignedClauseOps { + llvm::SmallVector alignedVars; + llvm::SmallVector alignmentAttrs; +}; + +struct AllocateClauseOps { + llvm::SmallVector allocatorVars, allocateVars; +}; + +struct CollapseClauseOps { + llvm::SmallVector loopLBVar, loopUBVar, loopStepVar; +}; + +struct CopyprivateClauseOps { + llvm::SmallVector copyprivateVars; + llvm::SmallVector copyprivateFuncs; +}; + +struct DependClauseOps { + llvm::SmallVector dependTypeAttrs; + llvm::SmallVector dependVars; +}; + +struct DeviceClauseOps { + Value deviceVar; +}; + +struct DeviceTypeClauseOps { + // The default capture type. + DeclareTargetDeviceType deviceType = DeclareTargetDeviceType::any; +}; + +struct DistScheduleClauseOps { + UnitAttr distScheduleStaticAttr; + Value distScheduleChunkSizeVar; +}; + +struct DoacrossClauseOps { + llvm::SmallVector doacrossVectorVars; + ClauseDependAttr doacrossDependTypeAttr; + IntegerAttr doacrossNumLoopsAttr; +}; + +struct FinalClauseOps { + Value finalVar; +}; + +struct GrainsizeClauseOps { + Value grainsizeVar; +}; + +struct HintClauseOps { + IntegerAttr hintAttr; +}; + +struct IfClauseOps { + Value ifVar; +}; + +struct InReductionClauseOps { + llvm::SmallVector inReductionVars; + llvm::SmallVector inReductionDeclSymbols; +}; + +struct LinearClauseOps { + llvm::SmallVector linearVars, linearStepVars; +}; + +struct LoopRelatedOps { + UnitAttr loopInclusiveAttr; +}; + +struct MapClauseOps { + llvm::SmallVector mapVars; +}; + +struct MergeableClauseOps { + UnitAttr mergeableAttr; +}; + +struct NameClauseOps { + StringAttr nameAttr; +}; + +struct NogroupClauseOps { + UnitAttr nogroupAttr; +}; + +struct NontemporalClauseOps { + llvm::SmallVector nontemporalVars; +}; + +struct NowaitClauseOps { + UnitAttr nowaitAttr; +}; + +struct NumTasksClauseOps { + Value numTasksVar; +}; + +struct NumTeamsClauseOps { + Value numTeamsLowerVar, numTeamsUpperVar; +}; + +struct NumThreadsClauseOps { + Value numThreadsVar; +}; + +struct OrderClauseOps { + ClauseOrderKindAttr orderAttr; +}; + +struct OrderedClauseOps { + IntegerAttr orderedAttr; +}; + +struct ParallelizationLevelClauseOps { + UnitAttr parLevelSimdAttr; +}; + +struct PriorityClauseOps { + Value priorityVar; +}; + +struct PrivateClauseOps { + // SSA values that correspond to "original" values being privatized. + // They refer to the SSA value outside the OpenMP region from which a clone is + // created inside the region. + llvm::SmallVector privateVars; + // The list of symbols referring to delayed privatizer ops (i.e. `omp.private` + // ops). + llvm::SmallVector privatizers; +}; + +struct ProcBindClauseOps { + ClauseProcBindKindAttr procBindKindAttr; +}; + +struct ReductionClauseOps { + llvm::SmallVector reductionVars; + llvm::SmallVector reductionDeclSymbols; + UnitAttr reductionByRefAttr; +}; + +struct SafelenClauseOps { + IntegerAttr safelenAttr; +}; + +struct ScheduleClauseOps { + ClauseScheduleKindAttr scheduleValAttr; + ScheduleModifierAttr scheduleModAttr; + Value scheduleChunkVar; + UnitAttr scheduleSimdAttr; +}; + +struct SimdlenClauseOps { + IntegerAttr simdlenAttr; +}; + +struct TaskReductionClauseOps { + llvm::SmallVector taskReductionVars; + llvm::SmallVector taskReductionDeclSymbols; +}; + +struct ThreadLimitClauseOps { + Value threadLimitVar; +}; + +struct UntiedClauseOps { + UnitAttr untiedAttr; +}; + +struct UseDeviceClauseOps { + llvm::SmallVector useDevicePtrVars, useDeviceAddrVars; +}; + +//===----------------------------------------------------------------------===// +// Structures defining clause operands associated with each OpenMP leaf +// construct. +// +// These mirror the arguments expected by the corresponding OpenMP MLIR ops. +//===----------------------------------------------------------------------===// + +namespace detail { +template +struct Clauses : public Mixins... {}; +} // namespace detail + +using CriticalClauseOps = detail::Clauses; + +// TODO `indirect` clause. +using DeclareTargetClauseOps = detail::Clauses; + +using DistributeClauseOps = + detail::Clauses; + +// TODO `filter` clause. +using MaskedClauseOps = detail::Clauses<>; + +using OrderedOpClauseOps = detail::Clauses; + +using OrderedRegionClauseOps = detail::Clauses; + +using ParallelClauseOps = + detail::Clauses; + +using SectionsClauseOps = detail::Clauses; + +// TODO `linear` clause. +using SimdLoopClauseOps = + detail::Clauses; + +using SingleClauseOps = detail::Clauses; + +// TODO `defaultmap`, `has_device_addr`, `is_device_ptr`, `uses_allocators` +// clauses. +using TargetClauseOps = + detail::Clauses; + +using TargetDataClauseOps = detail::Clauses; + +using TargetEnterExitUpdateDataClauseOps = + detail::Clauses; + +// TODO `affinity`, `detach` clauses. +using TaskClauseOps = + detail::Clauses; + +using TaskgroupClauseOps = + detail::Clauses; + +using TaskloopClauseOps = + detail::Clauses; + +using TaskwaitClauseOps = detail::Clauses; + +using TeamsClauseOps = + detail::Clauses; + +using WsloopClauseOps = + detail::Clauses; + +} // namespace omp +} // namespace mlir + +#endif // MLIR_DIALECT_OPENMP_OPENMPCLAUSEOPERANDS_H_ diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h index 23509c5b60701..c656bdc870976 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h @@ -26,11 +26,10 @@ #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.h.inc" #include "mlir/Dialect/OpenMP/OpenMPOpsDialect.h.inc" -#include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc" -#include "mlir/Dialect/OpenMP/OpenMPTypeInterfaces.h.inc" -#define GET_ATTRDEF_CLASSES -#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc" +#include "mlir/Dialect/OpenMP/OpenMPClauseOperands.h" + +#include "mlir/Dialect/OpenMP/OpenMPTypeInterfaces.h.inc" #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 457451886e14e..a38a82f9cc607 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -287,7 +287,8 @@ def ParallelOp : OpenMP_Op<"parallel", [ let regions = (region AnyRegion:$region); let builders = [ - OpBuilder<(ins CArg<"ArrayRef", "{}">:$attributes)> + OpBuilder<(ins CArg<"ArrayRef", "{}">:$attributes)>, + OpBuilder<(ins CArg<"const ParallelClauseOps &">:$clauses)> ]; let extraClassDeclaration = [{ /// Returns the number of reduction variables. @@ -362,6 +363,10 @@ def TeamsOp : OpenMP_Op<"teams", [ let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const TeamsClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist( `num_teams` `(` ( $num_teams_lower^ `:` type($num_teams_lower) )? `to` @@ -451,6 +456,10 @@ def SectionsOp : OpenMP_Op<"sections", [AttrSizedOperandSegments, let regions = (region SizedRegion<1>:$region); + let builders = [ + OpBuilder<(ins CArg<"const SectionsClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist( `reduction` `(` custom( @@ -495,6 +504,10 @@ def SingleOp : OpenMP_Op<"single", [AttrSizedOperandSegments]> { let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const SingleClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`allocate` `(` custom( @@ -601,6 +614,7 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, OpBuilder<(ins "ValueRange":$lowerBound, "ValueRange":$upperBound, "ValueRange":$step, CArg<"ArrayRef", "{}">:$attributes)>, + OpBuilder<(ins CArg<"const WsloopClauseOps &">:$clauses)> ]; let regions = (region AnyRegion:$region); @@ -698,6 +712,11 @@ def SimdLoopOp : OpenMP_Op<"simdloop", [AttrSizedOperandSegments, ); let regions = (region AnyRegion:$region); + + let builders = [ + OpBuilder<(ins CArg<"const SimdLoopClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`aligned` `(` custom($aligned_vars, type($aligned_vars), @@ -781,6 +800,10 @@ def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments, let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const DistributeClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`dist_schedule_static` $dist_schedule_static |`chunk_size` `(` $chunk_size `:` type($chunk_size) `)` @@ -883,6 +906,9 @@ def TaskOp : OpenMP_Op<"task", [AttrSizedOperandSegments, Variadic:$allocate_vars, Variadic:$allocators_vars); let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const TaskClauseOps &">:$clauses)> + ]; let assemblyFormat = [{ oilist(`if` `(` $if_expr `)` |`final` `(` $final_expr `)` @@ -1037,6 +1063,10 @@ def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments, let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const TaskloopClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`if` `(` $if_expr `)` |`final` `(` $final_expr `)` @@ -1106,6 +1136,10 @@ def TaskgroupOp : OpenMP_Op<"taskgroup", [AttrSizedOperandSegments, let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const TaskgroupClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`task_reduction` `(` custom( @@ -1432,6 +1466,10 @@ def TargetDataOp: OpenMP_Op<"target_data", [AttrSizedOperandSegments, let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const TargetDataClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`if` `(` $if_expr `:` type($if_expr) `)` | `device` `(` $device `:` type($device) `)` @@ -1486,6 +1524,10 @@ def TargetEnterDataOp: OpenMP_Op<"target_enter_data", UnitAttr:$nowait, Variadic:$map_operands); + let builders = [ + OpBuilder<(ins CArg<"const TargetEnterExitUpdateDataClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`if` `(` $if_expr `:` type($if_expr) `)` | `device` `(` $device `:` type($device) `)` @@ -1540,6 +1582,10 @@ def TargetExitDataOp: OpenMP_Op<"target_exit_data", UnitAttr:$nowait, Variadic:$map_operands); + let builders = [ + OpBuilder<(ins CArg<"const TargetEnterExitUpdateDataClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`if` `(` $if_expr `:` type($if_expr) `)` | `device` `(` $device `:` type($device) `)` @@ -1596,6 +1642,10 @@ def TargetUpdateOp: OpenMP_Op<"target_update", [AttrSizedOperandSegments, UnitAttr:$nowait, Variadic:$map_operands); + let builders = [ + OpBuilder<(ins CArg<"const TargetEnterExitUpdateDataClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist(`if` `(` $if_expr `:` type($if_expr) `)` | `device` `(` $device `:` type($device) `)` @@ -1649,6 +1699,10 @@ def TargetOp : OpenMP_Op<"target", [IsolatedFromAbove, MapClauseOwningOpInterfac let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const TargetClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ oilist( `if` `(` $if_expr `)` | `device` `(` $device `:` type($device) `)` @@ -1693,6 +1747,10 @@ def CriticalDeclareOp : OpenMP_Op<"critical.declare", [Symbol]> { let arguments = (ins SymbolNameAttr:$sym_name, DefaultValuedAttr:$hint_val); + let builders = [ + OpBuilder<(ins CArg<"const CriticalClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ $sym_name oilist(`hint` `(` custom($hint_val) `)`) attr-dict @@ -1773,6 +1831,10 @@ def OrderedOp : OpenMP_Op<"ordered"> { ConfinedAttr, [IntMinValue<0>]>:$num_loops_val, Variadic:$depend_vec_vars); + let builders = [ + OpBuilder<(ins CArg<"const OrderedOpClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ ( `depend_type` `` $depend_type_val^ )? ( `depend_vec` `(` $depend_vec_vars^ `:` type($depend_vec_vars) `)` )? @@ -1797,6 +1859,10 @@ def OrderedRegionOp : OpenMP_Op<"ordered.region"> { let regions = (region AnyRegion:$region); + let builders = [ + OpBuilder<(ins CArg<"const OrderedRegionClauseOps &">:$clauses)> + ]; + let assemblyFormat = [{ ( `simd` $simd^ )? $region attr-dict}]; let hasVerifier = 1; } @@ -1812,6 +1878,10 @@ def TaskwaitOp : OpenMP_Op<"taskwait"> { of the current task. }]; + let builders = [ + OpBuilder<(ins CArg<"const TaskwaitClauseOps &">:$clauses)> + ]; + let assemblyFormat = "attr-dict"; } diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h index 883d11bcc4df0..bc09cc7f7fa5e 100644 --- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h @@ -100,11 +100,16 @@ getSCFMinMaxExpr(Value value, SmallVectorImpl &dims, /// `loops` contains a list of perfectly nested loops with bounds and steps /// independent of any loop induction variable involved in the nest. LogicalResult coalesceLoops(MutableArrayRef loops); +LogicalResult coalesceLoops(RewriterBase &rewriter, + MutableArrayRef); + +/// Walk an affine.for to find a band to coalesce. +LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op); /// Take the ParallelLoop and for each set of dimension indices, combine them /// into a single dimension. combinedDimensions must contain each index into /// loops exactly once. -void collapseParallelLoops(scf::ParallelOp loops, +void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef> combinedDimensions); /// Unrolls this for operation by the specified unroll factor. Returns failure diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 06360bd10e525..147bc2354977d 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -1325,7 +1325,7 @@ def Vector_TransferReadOp : // Update the temporary gathered slice with the individual element %slice = memref.load %tmp : memref> -> vector<3x4x5xf32> %updated = vector.insert %a, %slice[%i, %j, %k] : f32 into vector<3x4x5xf32> - memref.store %updated, %temp : memref> + memref.store %updated, %tmp : memref> }}} // At this point we gathered the elements from the original // memref into the desired vector layout, stored in the `%tmp` allocation. @@ -1348,7 +1348,7 @@ def Vector_TransferReadOp : %slice = memref.load %tmp : memref> -> vector<3x4x5xf32> // Here we only store to the first element in dimension one %updated = vector.insert %a, %slice[%i, 0, %k] : f32 into vector<3x4x5xf32> - memref.store %updated, %temp : memref> + memref.store %updated, %tmp : memref> }} // At this point we gathered the elements from the original // memref into the desired vector layout, stored in the `%tmp` allocation. diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h index bf89b01e2b60c..911402551e14d 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h @@ -9,6 +9,7 @@ #ifndef MLIR_DIALECT_VECTOR_TRANSFORMS_PASSES_H_ #define MLIR_DIALECT_VECTOR_TRANSFORMS_PASSES_H_ +#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/Pass/Pass.h" namespace mlir { @@ -22,6 +23,11 @@ std::unique_ptr createVectorBufferizePass(); /// Creates an instance of the `vector.mask` lowering pass. std::unique_ptr createLowerVectorMaskPass(); +/// Creates an instance of the `vector.multi_reduction` lowering pass. +std::unique_ptr createLowerVectorMultiReductionPass( + VectorMultiReductionLowering option = + VectorMultiReductionLowering::InnerParallel); + //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td index 4911a61ab3c25..31a0b3b2f0c53 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.td @@ -21,4 +21,22 @@ def LowerVectorMaskPass : Pass<"lower-vector-mask", "func::FuncOp"> { let constructor = "mlir::vector::createLowerVectorMaskPass()"; } +def LowerVectorMultiReduction : Pass<"lower-vector-multi-reduction", "func::FuncOp"> { + let summary = "Lower 'vector.multi_reduction' operations"; + let constructor = "mlir::vector::createLowerVectorMultiReductionPass()"; + let options = [ + Option<"loweringStrategy", "lowering-strategy", "mlir::vector::VectorMultiReductionLowering", + /*default=*/"mlir::vector::VectorMultiReductionLowering::InnerParallel", + "Select the strategy to control how multi_reduction is lowered.", + [{::llvm::cl::values( + clEnumValN(mlir::vector::VectorMultiReductionLowering::InnerParallel, + "inner-parallel", + "Lower multi_reduction into outer-reduction and inner-parallel ops."), + clEnumValN(mlir::vector::VectorMultiReductionLowering::InnerReduction, + "inner-reduction", + "Lower multi_reduction into outer-parallel and inner-reduction ops.") + )}]> + ]; +} + #endif // MLIR_DIALECT_VECTOR_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/IR/AffineExprVisitor.h b/mlir/include/mlir/IR/AffineExprVisitor.h index 3e1bbb4b3fa0e..27c49cd80018e 100644 --- a/mlir/include/mlir/IR/AffineExprVisitor.h +++ b/mlir/include/mlir/IR/AffineExprVisitor.h @@ -222,7 +222,7 @@ class AffineExprVisitor : public AffineExprVisitorBase { walkPostOrder(expr.getLHS()); } if constexpr (std::is_same::value) { - if (walkPostOrder(expr.getLHS()).wasInterrupted()) + if (walkPostOrder(expr.getRHS()).wasInterrupted()) return WalkResult::interrupt(); return WalkResult::advance(); } else { diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index 90e63ff8fcb38..2c1c490aac49b 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -1136,6 +1136,13 @@ class OpPrintingFlags { /// elements. OpPrintingFlags &elideLargeElementsAttrs(int64_t largeElementLimit = 16); + /// Enables the printing of large element attributes with a hex string. The + /// `largeElementLimit` is used to configure what is considered to be a + /// "large" ElementsAttr by providing an upper limit to the number of + /// elements. Use -1 to disable the hex printing. + OpPrintingFlags & + printLargeElementsAttrWithHex(int64_t largeElementLimit = 100); + /// Enables the elision of large resources strings by omitting them from the /// `dialect_resources` section. The `largeResourceLimit` is used to configure /// what is considered to be a "large" resource by providing an upper limit to @@ -1169,9 +1176,15 @@ class OpPrintingFlags { /// Return if the given ElementsAttr should be elided. bool shouldElideElementsAttr(ElementsAttr attr) const; + /// Return if the given ElementsAttr should be printed as hex string. + bool shouldPrintElementsAttrWithHex(ElementsAttr attr) const; + /// Return the size limit for printing large ElementsAttr. std::optional getLargeElementsAttrLimit() const; + /// Return the size limit for printing large ElementsAttr as hex string. + int64_t getLargeElementsAttrHexLimit() const; + /// Return the size limit in chars for printing large resources. std::optional getLargeResourceStringLimit() const; @@ -1204,6 +1217,10 @@ class OpPrintingFlags { /// Elide printing large resources based on size of string. std::optional resourceStringCharLimit; + /// Print large element attributes with hex strings if the number of elements + /// is larger than the upper limit. + int64_t elementsAttrHexElementLimit = 100; + /// Print debug information. bool printDebugInfoFlag : 1; bool printDebugInfoPrettyFormFlag : 1; diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 15b1c38929485..2562301e499dd 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -15,6 +15,7 @@ #include "llvm/Support/TypeName.h" #include +using llvm::SmallPtrSetImpl; namespace mlir { class PatternRewriter; @@ -704,6 +705,8 @@ class RewriterBase : public OpBuilder { return user != exceptedUser; }); } + void replaceAllUsesExcept(Value from, Value to, + const SmallPtrSetImpl &preservedUsers); /// Used to notify the listener that the IR failed to be rewritten because of /// a match failure, and provide a callback to populate a diagnostic with the diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td index e10e2d4e104c3..9db89361c7800 100644 --- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td +++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td @@ -229,6 +229,36 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> { (ins "const ::llvm::SmallPtrSetImpl &":$blockingUses, "::mlir::RewriterBase &":$rewriter) >, + InterfaceMethod<[{ + This method allows the promoted operation to visit the SSA values used + in place of the memory slot once the promotion process of the memory + slot is complete. + + If this method returns true, the `visitReplacedValues` method on this + operation will be called after the main mutation stage finishes + (i.e., after all ops have been processed with `removeBlockingUses`). + + Operations should only the replaced values if the intended + transformation applies to all the replaced values. Furthermore, replaced + values must not be deleted. + }], "bool", "requiresReplacedValues", (ins), [{}], + [{ return false; }] + >, + InterfaceMethod<[{ + Transforms the IR using the SSA values that replaced the memory slot. + + This method will only be called after all blocking uses have been + scheduled for removal and if `requiresReplacedValues` returned + true. + + The rewriter is located after the promotable operation on call. All IR + mutations must happen through the rewriter. During the transformation, + *no operation should be deleted*. + }], + "void", "visitReplacedValues", + (ins "::llvm::ArrayRef>":$mutatedDefs, + "::mlir::RewriterBase &":$rewriter), [{}], [{ return; }] + >, ]; } diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h index 83107a3f5f941..3543ab52407a3 100644 --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -203,6 +203,26 @@ class ValueBoundsConstraintSet std::optional dim1 = std::nullopt, std::optional dim2 = std::nullopt); + /// Traverse the IR starting from the given value/dim and populate constraints + /// as long as the stop condition holds. Also process all values/dims that are + /// already on the worklist. + void populateConstraints(Value value, std::optional dim); + + /// Comparison operator for `ValueBoundsConstraintSet::compare`. + enum ComparisonOperator { LT, LE, EQ, GT, GE }; + + /// Try to prove that, based on the current state of this constraint set + /// (i.e., without analyzing additional IR or adding new constraints), the + /// "lhs" value/dim is LE/LT/EQ/GT/GE than the "rhs" value/dim. + /// + /// Return "true" if the specified relation between the two values/dims was + /// proven to hold. Return "false" if the specified relation could not be + /// proven. This could be because the specified relation does in fact not hold + /// or because there is not enough information in the constraint set. In other + /// words, if we do not know for sure, this function returns "false". + bool compare(Value lhs, std::optional lhsDim, ComparisonOperator cmp, + Value rhs, std::optional rhsDim); + /// Compute whether the given values/dimensions are equal. Return "failure" if /// equality could not be determined. /// @@ -274,13 +294,13 @@ class ValueBoundsConstraintSet ValueBoundsConstraintSet(MLIRContext *ctx, StopConditionFn stopCondition); - /// Populates the constraint set for a value/map without actually computing - /// the bound. Returns the position for the value/map (via the return value - /// and `posOut` output parameter). - int64_t populateConstraintsSet(Value value, - std::optional dim = std::nullopt); - int64_t populateConstraintsSet(AffineMap map, ValueDimList mapOperands, - int64_t *posOut = nullptr); + /// Given an affine map with a single result (and map operands), add a new + /// column to the constraint set that represents the result of the map. + /// Traverse additional IR starting from the map operands as needed (as long + /// as the stop condition is not satisfied). Also process all values/dims that + /// are already on the worklist. Return the position of the newly added + /// column. + int64_t populateConstraints(AffineMap map, ValueDimList mapOperands); /// Iteratively process all elements on the worklist until an index-typed /// value or shaped value meets `stopCondition`. Such values are not processed @@ -295,14 +315,19 @@ class ValueBoundsConstraintSet /// value/dimension exists in the constraint set. int64_t getPos(Value value, std::optional dim = std::nullopt) const; + /// Return an affine expression that represents column `pos` in the constraint + /// set. + AffineExpr getPosExpr(int64_t pos); + /// Insert a value/dimension into the constraint set. If `isSymbol` is set to /// "false", a dimension is added. The value/dimension is added to the - /// worklist. + /// worklist if `addToWorklist` is set. /// /// Note: There are certain affine restrictions wrt. dimensions. E.g., they /// cannot be multiplied. Furthermore, bounds can only be queried for /// dimensions but not for symbols. - int64_t insert(Value value, std::optional dim, bool isSymbol = true); + int64_t insert(Value value, std::optional dim, bool isSymbol = true, + bool addToWorklist = true); /// Insert an anonymous column into the constraint set. The column is not /// bound to any value/dimension. If `isSymbol` is set to "false", a dimension diff --git a/mlir/include/mlir/TableGen/Class.h b/mlir/include/mlir/TableGen/Class.h index 81cdf7dbef5f7..92fec6a3b11d9 100644 --- a/mlir/include/mlir/TableGen/Class.h +++ b/mlir/include/mlir/TableGen/Class.h @@ -681,7 +681,7 @@ class Class { Method *addMethod(RetTypeT &&retType, NameT &&name, Method::Properties properties, ArrayRef parameters) { - // If the class has template parameters, the has to defined inline. + // If the class has template parameters, then it has to be defined inline. if (!templateParams.empty()) properties |= Method::Inline; return addMethodAndPrune(Method(std::forward(retType), diff --git a/mlir/include/mlir/Target/LLVMIR/LLVMImportInterface.h b/mlir/include/mlir/Target/LLVMIR/LLVMImportInterface.h index 9f8da83ae9c20..86bcd580c1b44 100644 --- a/mlir/include/mlir/Target/LLVMIR/LLVMImportInterface.h +++ b/mlir/include/mlir/Target/LLVMIR/LLVMImportInterface.h @@ -52,6 +52,15 @@ class LLVMImportDialectInterface return failure(); } + /// Hook for derived dialect interfaces to implement the import of + /// instructions into MLIR. + virtual LogicalResult + convertInstruction(OpBuilder &builder, llvm::Instruction *inst, + ArrayRef llvmOperands, + LLVM::ModuleImport &moduleImport) const { + return failure(); + } + /// Hook for derived dialect interfaces to implement the import of metadata /// into MLIR. Attaches the converted metadata kind and node to the provided /// operation. @@ -66,6 +75,14 @@ class LLVMImportDialectInterface /// returns the list of supported intrinsic identifiers. virtual ArrayRef getSupportedIntrinsics() const { return {}; } + /// Hook for derived dialect interfaces to publish the supported instructions. + /// As every LLVM IR instruction has a unique integer identifier, the function + /// returns the list of supported instruction identifiers. These identifiers + /// will then be used to match LLVM instructions to the appropriate import + /// interface and `convertInstruction` method. It is an error to have multiple + /// interfaces overriding the same instruction. + virtual ArrayRef getSupportedInstructions() const { return {}; } + /// Hook for derived dialect interfaces to publish the supported metadata /// kinds. As every metadata kind has a unique integer identifier, the /// function returns the list of supported metadata identifiers. @@ -88,21 +105,40 @@ class LLVMImportInterface LogicalResult initializeImport() { for (const LLVMImportDialectInterface &iface : *this) { // Verify the supported intrinsics have not been mapped before. - const auto *it = + const auto *intrinsicIt = llvm::find_if(iface.getSupportedIntrinsics(), [&](unsigned id) { return intrinsicToDialect.count(id); }); - if (it != iface.getSupportedIntrinsics().end()) { + if (intrinsicIt != iface.getSupportedIntrinsics().end()) { + return emitError( + UnknownLoc::get(iface.getContext()), + llvm::formatv( + "expected unique conversion for intrinsic ({0}), but " + "got conflicting {1} and {2} conversions", + *intrinsicIt, iface.getDialect()->getNamespace(), + intrinsicToDialect.lookup(*intrinsicIt)->getNamespace())); + } + const auto *instructionIt = + llvm::find_if(iface.getSupportedInstructions(), [&](unsigned id) { + return instructionToDialect.count(id); + }); + if (instructionIt != iface.getSupportedInstructions().end()) { return emitError( UnknownLoc::get(iface.getContext()), - llvm::formatv("expected unique conversion for intrinsic ({0}), but " - "got conflicting {1} and {2} conversions", - *it, iface.getDialect()->getNamespace(), - intrinsicToDialect.lookup(*it)->getNamespace())); + llvm::formatv( + "expected unique conversion for instruction ({0}), but " + "got conflicting {1} and {2} conversions", + *intrinsicIt, iface.getDialect()->getNamespace(), + instructionToDialect.lookup(*intrinsicIt) + ->getDialect() + ->getNamespace())); } // Add a mapping for all supported intrinsic identifiers. for (unsigned id : iface.getSupportedIntrinsics()) intrinsicToDialect[id] = iface.getDialect(); + // Add a mapping for all supported instruction identifiers. + for (unsigned id : iface.getSupportedInstructions()) + instructionToDialect[id] = &iface; // Add a mapping for all supported metadata kinds. for (unsigned kind : iface.getSupportedMetadata()) metadataToDialect[kind].push_back(iface.getDialect()); @@ -132,6 +168,26 @@ class LLVMImportInterface return intrinsicToDialect.count(id); } + /// Converts the LLVM instruction to an MLIR operation if a conversion exists. + /// Returns failure otherwise. + LogicalResult convertInstruction(OpBuilder &builder, llvm::Instruction *inst, + ArrayRef llvmOperands, + LLVM::ModuleImport &moduleImport) const { + // Lookup the dialect interface for the given instruction. + const LLVMImportDialectInterface *iface = + instructionToDialect.lookup(inst->getOpcode()); + if (!iface) + return failure(); + + return iface->convertInstruction(builder, inst, llvmOperands, moduleImport); + } + + /// Returns true if the given LLVM IR instruction is convertible to an MLIR + /// operation. + bool isConvertibleInstruction(unsigned id) { + return instructionToDialect.count(id); + } + /// Attaches the given LLVM metadata to the imported operation if a conversion /// to one or more MLIR dialect attributes exists and succeeds. Returns /// success if at least one of the conversions is successful and failure if @@ -166,6 +222,7 @@ class LLVMImportInterface private: DenseMap intrinsicToDialect; + DenseMap instructionToDialect; DenseMap> metadataToDialect; }; diff --git a/mlir/lib/CAPI/Dialect/LLVM.cpp b/mlir/lib/CAPI/Dialect/LLVM.cpp index 71f2b73dd73bc..4669c40f843d9 100644 --- a/mlir/lib/CAPI/Dialect/LLVM.cpp +++ b/mlir/lib/CAPI/Dialect/LLVM.cpp @@ -206,11 +206,13 @@ MlirAttribute mlirLLVMDICompileUnitAttrGet(MlirContext ctx, MlirAttribute id, unsigned int sourceLanguage, MlirAttribute file, MlirAttribute producer, bool isOptimized, - MlirLLVMDIEmissionKind emissionKind) { + MlirLLVMDIEmissionKind emissionKind, + MlirLLVMDINameTableKind nameTableKind) { return wrap(DICompileUnitAttr::get( unwrap(ctx), cast(unwrap(id)), sourceLanguage, cast(unwrap(file)), cast(unwrap(producer)), - isOptimized, DIEmissionKind(emissionKind))); + isOptimized, DIEmissionKind(emissionKind), + DINameTableKind(nameTableKind))); } MlirAttribute mlirLLVMDIFlagsAttrGet(MlirContext ctx, uint64_t value) { diff --git a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp index 17f64f1b65b7c..9c3c4d96a301e 100644 --- a/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +++ b/mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp @@ -918,6 +918,7 @@ struct SignOpConversion : public OpConversionPattern { auto type = cast(adaptor.getComplex().getType()); auto elementType = cast(type.getElementType()); mlir::ImplicitLocOpBuilder b(op.getLoc(), rewriter); + arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); Value real = b.create(elementType, adaptor.getComplex()); Value imag = b.create(elementType, adaptor.getComplex()); @@ -928,9 +929,9 @@ struct SignOpConversion : public OpConversionPattern { Value imagIsZero = b.create(arith::CmpFPredicate::OEQ, imag, zero); Value isZero = b.create(realIsZero, imagIsZero); - auto abs = b.create(elementType, adaptor.getComplex()); - Value realSign = b.create(real, abs); - Value imagSign = b.create(imag, abs); + auto abs = b.create(elementType, adaptor.getComplex(), fmf); + Value realSign = b.create(real, abs, fmf); + Value imagSign = b.create(imag, abs, fmf); Value sign = b.create(type, realSign, imagSign); rewriter.replaceOpWithNewOp(op, isZero, adaptor.getComplex(), sign); @@ -945,9 +946,11 @@ struct TanOpConversion : public OpConversionPattern { matchAndRewrite(complex::TanOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override { auto loc = op.getLoc(); - Value cos = rewriter.create(loc, adaptor.getComplex()); - Value sin = rewriter.create(loc, adaptor.getComplex()); - rewriter.replaceOpWithNewOp(op, sin, cos); + arith::FastMathFlagsAttr fmf = op.getFastMathFlagsAttr(); + + Value cos = rewriter.create(loc, adaptor.getComplex(), fmf); + Value sin = rewriter.create(loc, adaptor.getComplex(), fmf); + rewriter.replaceOpWithNewOp(op, sin, cos, fmf); return success(); } }; diff --git a/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp b/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp index d1372576407f9..5b1c59d0c95e9 100644 --- a/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp +++ b/mlir/lib/Conversion/MathToLibm/MathToLibm.cpp @@ -162,6 +162,7 @@ ScalarOpToLibmCall::matchAndRewrite(Op op, void mlir::populateMathToLibmConversionPatterns(RewritePatternSet &patterns) { MLIRContext *ctx = patterns.getContext(); + populatePatternsForOp(patterns, ctx, "fabsf", "fabs"); populatePatternsForOp(patterns, ctx, "acosf", "acos"); populatePatternsForOp(patterns, ctx, "acoshf", "acosh"); populatePatternsForOp(patterns, ctx, "asinf", "asin"); @@ -174,14 +175,22 @@ void mlir::populateMathToLibmConversionPatterns(RewritePatternSet &patterns) { populatePatternsForOp(patterns, ctx, "cosf", "cos"); populatePatternsForOp(patterns, ctx, "coshf", "cosh"); populatePatternsForOp(patterns, ctx, "erff", "erf"); + populatePatternsForOp(patterns, ctx, "expf", "exp"); + populatePatternsForOp(patterns, ctx, "exp2f", "exp2"); populatePatternsForOp(patterns, ctx, "expm1f", "expm1"); populatePatternsForOp(patterns, ctx, "floorf", "floor"); + populatePatternsForOp(patterns, ctx, "fmaf", "fma"); + populatePatternsForOp(patterns, ctx, "logf", "log"); + populatePatternsForOp(patterns, ctx, "log2f", "log2"); + populatePatternsForOp(patterns, ctx, "log10f", "log10"); populatePatternsForOp(patterns, ctx, "log1pf", "log1p"); + populatePatternsForOp(patterns, ctx, "powf", "pow"); populatePatternsForOp(patterns, ctx, "roundevenf", "roundeven"); populatePatternsForOp(patterns, ctx, "roundf", "round"); populatePatternsForOp(patterns, ctx, "sinf", "sin"); populatePatternsForOp(patterns, ctx, "sinhf", "sinh"); + populatePatternsForOp(patterns, ctx, "sqrtf", "sqrt"); populatePatternsForOp(patterns, ctx, "tanf", "tan"); populatePatternsForOp(patterns, ctx, "tanhf", "tanh"); populatePatternsForOp(patterns, ctx, "truncf", "trunc"); diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 337f8bb6ab99e..85d10f326e260 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -1082,14 +1082,8 @@ class VectorExtractOpConversion if (!llvmResultType) return failure(); - SmallVector positionVec; - for (auto [idx, pos] : llvm::enumerate(extractOp.getMixedPosition())) { - if (pos.is()) - // Make sure we use the value that has been already converted to LLVM. - positionVec.push_back(adaptor.getDynamicPosition()[idx]); - else - positionVec.push_back(pos); - } + SmallVector positionVec = getMixedValues( + adaptor.getStaticPosition(), adaptor.getDynamicPosition(), rewriter); // Extract entire vector. Should be handled by folder, but just to be safe. ArrayRef position(positionVec); @@ -1209,14 +1203,8 @@ class VectorInsertOpConversion if (!llvmResultType) return failure(); - SmallVector positionVec; - for (auto [idx, pos] : llvm::enumerate(insertOp.getMixedPosition())) { - if (pos.is()) - // Make sure we use the value that has been already converted to LLVM. - positionVec.push_back(adaptor.getDynamicPosition()[idx]); - else - positionVec.push_back(pos); - } + SmallVector positionVec = getMixedValues( + adaptor.getStaticPosition(), adaptor.getDynamicPosition(), rewriter); // Overwrite entire vector with value. Should be handled by folder, but // just to be safe. diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp index 1dc69ab493d47..05c77070a70c1 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp @@ -39,9 +39,9 @@ struct LoopCoalescingPass func::FuncOp func = getOperation(); func.walk([](Operation *op) { if (auto scfForOp = dyn_cast(op)) - (void)coalescePerfectlyNestedLoops(scfForOp); + (void)coalescePerfectlyNestedSCFForLoops(scfForOp); else if (auto affineForOp = dyn_cast(op)) - (void)coalescePerfectlyNestedLoops(affineForOp); + (void)coalescePerfectlyNestedAffineLoops(affineForOp); }); } }; diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index af59973d7a92c..268050a30e002 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2765,3 +2765,51 @@ mlir::affine::separateFullTiles(MutableArrayRef inputNest, return success(); } + +LogicalResult affine::coalescePerfectlyNestedAffineLoops(AffineForOp op) { + LogicalResult result(failure()); + SmallVector loops; + getPerfectlyNestedLoops(loops, op); + if (loops.size() <= 1) + return success(); + + // Look for a band of loops that can be coalesced, i.e. perfectly nested + // loops with bounds defined above some loop. + // 1. For each loop, find above which parent loop its operands are + // defined. + SmallVector operandsDefinedAbove(loops.size()); + for (unsigned i = 0, e = loops.size(); i < e; ++i) { + operandsDefinedAbove[i] = i; + for (unsigned j = 0; j < i; ++j) { + if (areValuesDefinedAbove(loops[i].getOperands(), loops[j].getRegion())) { + operandsDefinedAbove[i] = j; + break; + } + } + } + + // 2. Identify bands of loops such that the operands of all of them are + // defined above the first loop in the band. Traverse the nest bottom-up + // so that modifications don't invalidate the inner loops. + for (unsigned end = loops.size(); end > 0; --end) { + unsigned start = 0; + for (; start < end - 1; ++start) { + auto maxPos = + *std::max_element(std::next(operandsDefinedAbove.begin(), start), + std::next(operandsDefinedAbove.begin(), end)); + if (maxPos > start) + continue; + assert(maxPos == start && + "expected loop bounds to be known at the start of the band"); + auto band = llvm::MutableArrayRef(loops.data() + start, end - start); + if (succeeded(coalesceLoops(band))) + result = success(); + break; + } + // If a band was found and transformed, keep looking at the loops above + // the outermost transformed loop. + if (start != end - 1) + end = start + 1; + } + return result; +} diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index ce0602c853e3c..6f995b93bc3ec 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -213,6 +213,15 @@ LogicalResult arith::ConstantOp::verify() { return emitOpError( "value must be an integer, float, or elements attribute"); } + + // Note, we could relax this for vectors with 1 scalable dim, e.g.: + // * arith.constant dense<[[3, 3], [1, 1]]> : vector<2 x [2] x i32> + // However, this would most likely require updating the lowerings to LLVM. + auto vecType = dyn_cast(type); + if (vecType && vecType.isScalable() && !isa(getValue())) + return emitOpError( + "intializing scalable vectors with elements attribute is not supported" + " unless it's a vector splat"); return success(); } @@ -423,6 +432,33 @@ OpFoldResult arith::MulIOp::fold(FoldAdaptor adaptor) { [](const APInt &a, const APInt &b) { return a * b; }); } +void arith::MulIOp::getAsmResultNames( + function_ref setNameFn) { + if (!isa(getType())) + return; + + // Match vector.vscale by name to avoid depending on the vector dialect (which + // is a circular dependency). + auto isVscale = [](Operation *op) { + return op && op->getName().getStringRef() == "vector.vscale"; + }; + + IntegerAttr baseValue; + auto isVscaleExpr = [&](Value a, Value b) { + return matchPattern(a, m_Constant(&baseValue)) && + isVscale(b.getDefiningOp()); + }; + + if (!isVscaleExpr(getLhs(), getRhs()) && !isVscaleExpr(getRhs(), getLhs())) + return; + + // Name `base * vscale` or `vscale * base` as `c_vscale`. + SmallString<32> specialNameBuffer; + llvm::raw_svector_ostream specialName(specialNameBuffer); + specialName << 'c' << baseValue.getInt() << "_vscale"; + setNameFn(getResult(), specialName.str()); +} + void arith::MulIOp::getCanonicalizationPatterns(RewritePatternSet &patterns, MLIRContext *context) { patterns.add(context); diff --git a/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp index 9c6b50e767ea2..f0d43808bc45d 100644 --- a/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp @@ -24,8 +24,15 @@ struct AddIOpInterface auto addIOp = cast(op); assert(value == addIOp.getResult() && "invalid value"); - cstr.bound(value) == - cstr.getExpr(addIOp.getLhs()) + cstr.getExpr(addIOp.getRhs()); + // Note: `getExpr` has a side effect: it may add a new column to the + // constraint system. The evaluation order of addition operands is + // unspecified in C++. To make sure that all compilers produce the exact + // same results (that can be FileCheck'd), it is important that `getExpr` + // is called first and assigned to temporary variables, and the addition + // is performed afterwards. + AffineExpr lhs = cstr.getExpr(addIOp.getLhs()); + AffineExpr rhs = cstr.getExpr(addIOp.getRhs()); + cstr.bound(value) == lhs + rhs; } }; @@ -49,8 +56,9 @@ struct SubIOpInterface auto subIOp = cast(op); assert(value == subIOp.getResult() && "invalid value"); - cstr.bound(value) == - cstr.getExpr(subIOp.getLhs()) - cstr.getExpr(subIOp.getRhs()); + AffineExpr lhs = cstr.getExpr(subIOp.getLhs()); + AffineExpr rhs = cstr.getExpr(subIOp.getRhs()); + cstr.bound(value) == lhs - rhs; } }; @@ -61,11 +69,81 @@ struct MulIOpInterface auto mulIOp = cast(op); assert(value == mulIOp.getResult() && "invalid value"); - cstr.bound(value) == - cstr.getExpr(mulIOp.getLhs()) * cstr.getExpr(mulIOp.getRhs()); + AffineExpr lhs = cstr.getExpr(mulIOp.getLhs()); + AffineExpr rhs = cstr.getExpr(mulIOp.getRhs()); + cstr.bound(value) == lhs *rhs; } }; +struct SelectOpInterface + : public ValueBoundsOpInterface::ExternalModel { + + static void populateBounds(SelectOp selectOp, std::optional dim, + ValueBoundsConstraintSet &cstr) { + Value value = selectOp.getResult(); + Value condition = selectOp.getCondition(); + Value trueValue = selectOp.getTrueValue(); + Value falseValue = selectOp.getFalseValue(); + + if (isa(condition.getType())) { + // If the condition is a shaped type, the condition is applied + // element-wise. All three operands must have the same shape. + cstr.bound(value)[*dim] == cstr.getExpr(trueValue, dim); + cstr.bound(value)[*dim] == cstr.getExpr(falseValue, dim); + cstr.bound(value)[*dim] == cstr.getExpr(condition, dim); + return; + } + + // Populate constraints for the true/false values (and all values on the + // backward slice, as long as the current stop condition is not satisfied). + cstr.populateConstraints(trueValue, dim); + cstr.populateConstraints(falseValue, dim); + auto boundsBuilder = cstr.bound(value); + if (dim) + boundsBuilder[*dim]; + + // Compare yielded values. + // If trueValue <= falseValue: + // * result <= falseValue + // * result >= trueValue + if (cstr.compare(trueValue, dim, + ValueBoundsConstraintSet::ComparisonOperator::LE, + falseValue, dim)) { + if (dim) { + cstr.bound(value)[*dim] >= cstr.getExpr(trueValue, dim); + cstr.bound(value)[*dim] <= cstr.getExpr(falseValue, dim); + } else { + cstr.bound(value) >= trueValue; + cstr.bound(value) <= falseValue; + } + } + // If falseValue <= trueValue: + // * result <= trueValue + // * result >= falseValue + if (cstr.compare(falseValue, dim, + ValueBoundsConstraintSet::ComparisonOperator::LE, + trueValue, dim)) { + if (dim) { + cstr.bound(value)[*dim] >= cstr.getExpr(falseValue, dim); + cstr.bound(value)[*dim] <= cstr.getExpr(trueValue, dim); + } else { + cstr.bound(value) >= falseValue; + cstr.bound(value) <= trueValue; + } + } + } + + void populateBoundsForIndexValue(Operation *op, Value value, + ValueBoundsConstraintSet &cstr) const { + populateBounds(cast(op), /*dim=*/std::nullopt, cstr); + } + + void populateBoundsForShapedValueDim(Operation *op, Value value, int64_t dim, + ValueBoundsConstraintSet &cstr) const { + populateBounds(cast(op), dim, cstr); + } +}; } // namespace } // namespace arith } // namespace mlir @@ -77,5 +155,6 @@ void mlir::arith::registerValueBoundsOpInterfaceExternalModels( arith::ConstantOp::attachInterface(*ctx); arith::SubIOp::attachInterface(*ctx); arith::MulIOp::attachInterface(*ctx); + arith::SelectOp::attachInterface(*ctx); }); } diff --git a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp index 5d11f8f6cc458..1320db3f9e543 100644 --- a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp +++ b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp @@ -531,8 +531,8 @@ static ParseResult parseSwitchOpCases( failed(parser.parseSuccessor(destination))) return failure(); if (succeeded(parser.parseOptionalLParen())) { - if (failed(parser.parseOperandList(operands, OpAsmParser::Delimiter::None, - /*allowResultNumber=*/false)) || + if (failed(parser.parseOperandList(operands, + OpAsmParser::Delimiter::None)) || failed(parser.parseColonTypeList(operandTypes)) || failed(parser.parseRParen())) return failure(); diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index e5c19a916392e..f90240a67dcc5 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -2761,17 +2761,27 @@ LogicalResult SExtOp::verify() { return verifyExtOp(*this); } // Folder and verifier for LLVM::BitcastOp //===----------------------------------------------------------------------===// -OpFoldResult LLVM::BitcastOp::fold(FoldAdaptor adaptor) { - // bitcast(x : T0, T0) -> x - if (getArg().getType() == getType()) - return getArg(); - // bitcast(bitcast(x : T0, T1), T0) -> x - if (auto prev = getArg().getDefiningOp()) - if (prev.getArg().getType() == getType()) +/// Folds a cast op that can be chained. +template +static Value foldChainableCast(T castOp, typename T::FoldAdaptor adaptor) { + // cast(x : T0, T0) -> x + if (castOp.getArg().getType() == castOp.getType()) + return castOp.getArg(); + if (auto prev = castOp.getArg().template getDefiningOp()) { + // cast(cast(x : T0, T1), T0) -> x + if (prev.getArg().getType() == castOp.getType()) return prev.getArg(); + // cast(cast(x : T0, T1), T2) -> cast(x: T0, T2) + castOp.getArgMutable().set(prev.getArg()); + return Value{castOp}; + } return {}; } +OpFoldResult LLVM::BitcastOp::fold(FoldAdaptor adaptor) { + return foldChainableCast(*this, adaptor); +} + LogicalResult LLVM::BitcastOp::verify() { auto resultType = llvm::dyn_cast( extractVectorElementType(getResult().getType())); @@ -2811,14 +2821,7 @@ LogicalResult LLVM::BitcastOp::verify() { //===----------------------------------------------------------------------===// OpFoldResult LLVM::AddrSpaceCastOp::fold(FoldAdaptor adaptor) { - // addrcast(x : T0, T0) -> x - if (getArg().getType() == getType()) - return getArg(); - // addrcast(addrcast(x : T0, T1), T0) -> x - if (auto prev = getArg().getDefiningOp()) - if (prev.getArg().getType() == getType()) - return prev.getArg(); - return {}; + return foldChainableCast(*this, adaptor); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index 06c1fdd2eb2d9..c7ca0b4a5843a 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -122,8 +122,37 @@ bool LLVM::StoreOp::storesTo(const MemorySlot &slot) { return getAddr() == slot.ptr; } +/// Checks that two types are the same or can be cast into one another. +static bool areCastCompatible(const DataLayout &layout, Type lhs, Type rhs) { + return lhs == rhs || (!isa(lhs) && + !isa(rhs) && + layout.getTypeSize(lhs) == layout.getTypeSize(rhs)); +} + +/// Constructs operations that convert `inputValue` into a new value of type +/// `targetType`. Assumes that this conversion is possible. +static Value createConversionSequence(RewriterBase &rewriter, Location loc, + Value inputValue, Type targetType) { + if (inputValue.getType() == targetType) + return inputValue; + + if (!isa(targetType) && + !isa(inputValue.getType())) + return rewriter.createOrFold(loc, targetType, inputValue); + + if (!isa(targetType)) + return rewriter.createOrFold(loc, targetType, inputValue); + + if (!isa(inputValue.getType())) + return rewriter.createOrFold(loc, targetType, inputValue); + + return rewriter.createOrFold(loc, targetType, + inputValue); +} + Value LLVM::StoreOp::getStored(const MemorySlot &slot, RewriterBase &rewriter) { - return getValue(); + return createConversionSequence(rewriter, getLoc(), getValue(), + slot.elemType); } bool LLVM::LoadOp::canUsesBeRemoved( @@ -138,7 +167,8 @@ bool LLVM::LoadOp::canUsesBeRemoved( // be removed (provided it loads the exact stored value and is not // volatile). return blockingUse == slot.ptr && getAddr() == slot.ptr && - getResult().getType() == slot.elemType && !getVolatile_(); + areCastCompatible(dataLayout, getResult().getType(), slot.elemType) && + !getVolatile_(); } DeletionKind LLVM::LoadOp::removeBlockingUses( @@ -146,7 +176,9 @@ DeletionKind LLVM::LoadOp::removeBlockingUses( RewriterBase &rewriter, Value reachingDefinition) { // `canUsesBeRemoved` checked this blocking use must be the loaded slot // pointer. - rewriter.replaceAllUsesWith(getResult(), reachingDefinition); + Value newResult = createConversionSequence( + rewriter, getLoc(), reachingDefinition, getResult().getType()); + rewriter.replaceAllUsesWith(getResult(), newResult); return DeletionKind::Delete; } @@ -161,20 +193,14 @@ bool LLVM::StoreOp::canUsesBeRemoved( // fine, provided we are currently promoting its target value. Don't allow a // store OF the slot pointer, only INTO the slot pointer. return blockingUse == slot.ptr && getAddr() == slot.ptr && - getValue() != slot.ptr && getValue().getType() == slot.elemType && + getValue() != slot.ptr && + areCastCompatible(dataLayout, slot.elemType, getValue().getType()) && !getVolatile_(); } DeletionKind LLVM::StoreOp::removeBlockingUses( const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter, Value reachingDefinition) { - // `canUsesBeRemoved` checked this blocking use must be the stored slot - // pointer. - for (Operation *user : slot.ptr.getUsers()) - if (auto declareOp = dyn_cast(user)) - rewriter.create(declareOp->getLoc(), getValue(), - declareOp.getVarInfo(), - declareOp.getLocationExpr()); return DeletionKind::Delete; } @@ -407,6 +433,18 @@ DeletionKind LLVM::DbgValueOp::removeBlockingUses( return DeletionKind::Keep; } +bool LLVM::DbgDeclareOp::requiresReplacedValues() { return true; } + +void LLVM::DbgDeclareOp::visitReplacedValues( + ArrayRef> definitions, + RewriterBase &rewriter) { + for (auto [op, value] : definitions) { + rewriter.setInsertionPointAfter(op); + rewriter.create(getLoc(), value, getVarInfo(), + getLocationExpr()); + } +} + //===----------------------------------------------------------------------===// // Interfaces for GEPOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp index 2960cc6220d9f..395ff6ed1e48e 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/DIScopeForLLVMFuncOp.cpp @@ -115,8 +115,8 @@ struct DIScopeForLLVMFuncOp } compileUnitAttr = LLVM::DICompileUnitAttr::get( - context, DistinctAttr::create(UnitAttr::get(context)), - llvm::dwarf::DW_LANG_C, fileAttr, StringAttr::get(context, "MLIR"), + DistinctAttr::create(UnitAttr::get(context)), llvm::dwarf::DW_LANG_C, + fileAttr, StringAttr::get(context, "MLIR"), /*isOptimized=*/true, LLVM::DIEmissionKind::LineTablesOnly); } diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp index 3d700fe94e3b9..b264e9ff9283d 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/TypeConsistency.cpp @@ -42,13 +42,6 @@ static Type isElementTypeInconsistent(Value addr, Type expectedType) { return elemType; } -/// Checks that two types are the same or can be bitcast into one another. -static bool areBitcastCompatible(DataLayout &layout, Type lhs, Type rhs) { - return lhs == rhs || (!isa(lhs) && - !isa(rhs) && - layout.getTypeSize(lhs) == layout.getTypeSize(rhs)); -} - //===----------------------------------------------------------------------===// // CanonicalizeAlignedGep //===----------------------------------------------------------------------===// @@ -518,26 +511,6 @@ LogicalResult SplitStores::matchAndRewrite(StoreOp store, return success(); } -LogicalResult BitcastStores::matchAndRewrite(StoreOp store, - PatternRewriter &rewriter) const { - Type sourceType = store.getValue().getType(); - Type typeHint = isElementTypeInconsistent(store.getAddr(), sourceType); - if (!typeHint) { - // Nothing to do, since it is already consistent. - return failure(); - } - - auto dataLayout = DataLayout::closest(store); - if (!areBitcastCompatible(dataLayout, typeHint, sourceType)) - return failure(); - - auto bitcastOp = - rewriter.create(store.getLoc(), typeHint, store.getValue()); - rewriter.modifyOpInPlace(store, - [&] { store.getValueMutable().assign(bitcastOp); }); - return success(); -} - LogicalResult SplitGEP::matchAndRewrite(GEPOp gepOp, PatternRewriter &rewriter) const { FailureOr typeHint = getRequiredConsistentGEPType(gepOp); @@ -588,7 +561,6 @@ struct LLVMTypeConsistencyPass RewritePatternSet rewritePatterns(&getContext()); rewritePatterns.add(&getContext()); rewritePatterns.add(&getContext(), maxVectorSplitSize); - rewritePatterns.add(&getContext()); rewritePatterns.add(&getContext()); FrozenRewritePatternSet frozen(std::move(rewritePatterns)); diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 88819cd964354..7e7cf1d024461 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -3122,6 +3122,81 @@ transform::VectorizeChildrenAndApplyPatternsOp::applyToOne( //===----------------------------------------------------------------------===// // VectorizeOp //===----------------------------------------------------------------------===// + +static const StringLiteral kVectorSizesKeyword = "vector_sizes"; + +ParseResult transform::VectorizeOp::parse(OpAsmParser &parser, + OperationState &result) { + OpAsmParser::UnresolvedOperand target; + SmallVector dynamicSizes; + DenseI64ArrayAttr staticSizes; + SmallVector operandTypes; + llvm::SMLoc operandLoc; + DenseBoolArrayAttr scalableVals; + + if (parser.parseOperand(target) || parser.getCurrentLocation(&operandLoc)) + return ParseResult::failure(); + + if (succeeded(parser.parseOptionalKeyword(kVectorSizesKeyword))) { + if (failed(parseDynamicIndexList(parser, dynamicSizes, staticSizes, + scalableVals))) + return ParseResult::failure(); + } + + if (succeeded(parser.parseOptionalKeyword( + getVectorizeNdExtractAttrName(result.name)))) + result.addAttribute(getVectorizeNdExtractAttrName(result.name), + parser.getBuilder().getUnitAttr()); + + if (parser.parseOptionalAttrDict(result.attributes) || + parser.parseColonTypeList(operandTypes)) + return ParseResult::failure(); + + if (operandTypes.size() != dynamicSizes.size() + 1) { + return parser.emitError(operandLoc) + << "expected " << dynamicSizes.size() + 1 << " operand type(s)"; + } + if (parser.resolveOperand(target, operandTypes.front(), result.operands) || + parser.resolveOperands(dynamicSizes, ArrayRef(operandTypes).drop_front(), + operandLoc, result.operands)) { + return failure(); + } + + if (scalableVals) + result.addAttribute(getScalableSizesAttrName(result.name), scalableVals); + if (staticSizes) + result.addAttribute(getStaticVectorSizesAttrName(result.name), staticSizes); + + return success(); +} + +void transform::VectorizeOp::print(OpAsmPrinter &p) { + p << ' ' << getTarget() << ' '; + if (!getMixedVectorSizes().empty()) { + p << kVectorSizesKeyword << ' '; + printDynamicIndexList(p, getOperation(), getVectorSizes(), + getStaticVectorSizesAttr(), + /*valueTypes=*/{}, getScalableSizesAttr(), + OpAsmParser::Delimiter::Square); + } + + if (getVectorizeNdExtract()) + p << getVectorizeNdExtractAttrName() << ' '; + + p.printOptionalAttrDict( + (*this)->getAttrs(), + /*elidedAttrs=*/{ + getScalableSizesAttrName(getOperation()->getName()), + getStaticVectorSizesAttrName(getOperation()->getName())}); + p << " : "; + p << getTarget().getType(); + if (!getVectorSizes().empty()) { + p << ", "; + llvm::interleaveComma(getVectorSizes(), p, + [&](Value operand) { p << operand.getType(); }); + } +} + DiagnosedSilenceableFailure transform::VectorizeOp::apply( transform::TransformRewriter &rewriter, mlir::transform::TransformResults &transformResults, @@ -3136,6 +3211,13 @@ DiagnosedSilenceableFailure transform::VectorizeOp::apply( auto attr = sz.get(); vectorSizes.push_back(cast(attr).getInt()); continue; + } else if (sz.is() && isa(sz.get().getType())) { + ArrayRef params = state.getParams(sz.get()); + if (params.size() != 1) + return emitSilenceableFailure(getLoc()) << "expected a single param"; + vectorSizes.push_back( + cast(params.front()).getValue().getSExtValue()); + continue; } auto szPayloads = state.getPayloadOps(sz.get()); diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp index 9453502a253f1..373e9cfc3ce71 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -1497,9 +1497,10 @@ LinalgOp cloneToCollapsedOp(RewriterBase &rewriter, LinalgOp origOp, SmallVector resultTypes; collapseOperandsAndResults(origOp, collapsingInfo, rewriter, inputOperands, outputOperands, resultTypes); - return cast(clone( + + return clone( rewriter, origOp, resultTypes, - llvm::to_vector(llvm::concat(inputOperands, outputOperands)))); + llvm::to_vector(llvm::concat(inputOperands, outputOperands))); } /// Collapse a `GenericOp` diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index a04343154a4de..543655338db8c 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -41,6 +41,11 @@ using namespace mlir; using namespace mlir::omp; +static ArrayAttr makeArrayAttr(MLIRContext *context, + llvm::ArrayRef attrs) { + return attrs.empty() ? nullptr : ArrayAttr::get(context, attrs); +} + namespace { struct MemRefPointerLikeModel : public PointerLikeType::ExternalModel static LogicalResult verifyPrivateVarList(OpType &op) { auto privateVars = op.getPrivateVars(); @@ -1280,6 +1364,17 @@ static bool opInGlobalImplicitParallelRegion(Operation *op) { return true; } +void TeamsOp::build(OpBuilder &builder, OperationState &state, + const TeamsClauseOps &clauses) { + MLIRContext *ctx = builder.getContext(); + // TODO Store clauses in op: reductionByRefAttr, privateVars, privatizers. + TeamsOp::build(builder, state, clauses.numTeamsLowerVar, + clauses.numTeamsUpperVar, clauses.ifVar, + clauses.threadLimitVar, clauses.allocateVars, + clauses.allocatorVars, clauses.reductionVars, + makeArrayAttr(ctx, clauses.reductionDeclSymbols)); +} + LogicalResult TeamsOp::verify() { // Check parent region // TODO If nested inside of a target region, also check that it does not @@ -1312,9 +1407,19 @@ LogicalResult TeamsOp::verify() { } //===----------------------------------------------------------------------===// -// Verifier for SectionsOp +// SectionsOp //===----------------------------------------------------------------------===// +void SectionsOp::build(OpBuilder &builder, OperationState &state, + const SectionsClauseOps &clauses) { + MLIRContext *ctx = builder.getContext(); + // TODO Store clauses in op: reductionByRefAttr, privateVars, privatizers. + SectionsOp::build(builder, state, clauses.reductionVars, + makeArrayAttr(ctx, clauses.reductionDeclSymbols), + clauses.allocateVars, clauses.allocatorVars, + clauses.nowaitAttr); +} + LogicalResult SectionsOp::verify() { if (getAllocateVars().size() != getAllocatorsVars().size()) return emitError( @@ -1334,6 +1439,20 @@ LogicalResult SectionsOp::verifyRegions() { return success(); } +//===----------------------------------------------------------------------===// +// SingleOp +//===----------------------------------------------------------------------===// + +void SingleOp::build(OpBuilder &builder, OperationState &state, + const SingleClauseOps &clauses) { + MLIRContext *ctx = builder.getContext(); + // TODO Store clauses in op: privateVars, privatizers. + SingleOp::build(builder, state, clauses.allocateVars, clauses.allocatorVars, + clauses.copyprivateVars, + makeArrayAttr(ctx, clauses.copyprivateFuncs), + clauses.nowaitAttr); +} + LogicalResult SingleOp::verify() { // Check for allocate clause restrictions if (getAllocateVars().size() != getAllocatorsVars().size()) @@ -1481,9 +1600,21 @@ void printLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion, } //===----------------------------------------------------------------------===// -// Verifier for Simd construct [2.9.3.1] +// Simd construct [2.9.3.1] //===----------------------------------------------------------------------===// +void SimdLoopOp::build(OpBuilder &builder, OperationState &state, + const SimdLoopClauseOps &clauses) { + MLIRContext *ctx = builder.getContext(); + // TODO Store clauses in op: privateVars, reductionByRefAttr, reductionVars, + // privatizers, reductionDeclSymbols. + SimdLoopOp::build( + builder, state, clauses.loopLBVar, clauses.loopUBVar, clauses.loopStepVar, + clauses.alignedVars, makeArrayAttr(ctx, clauses.alignmentAttrs), + clauses.ifVar, clauses.nontemporalVars, clauses.orderAttr, + clauses.simdlenAttr, clauses.safelenAttr, clauses.loopInclusiveAttr); +} + LogicalResult SimdLoopOp::verify() { if (this->getLowerBound().empty()) { return emitOpError() << "empty lowerbound for simd loop operation"; @@ -1504,9 +1635,17 @@ LogicalResult SimdLoopOp::verify() { } //===----------------------------------------------------------------------===// -// Verifier for Distribute construct [2.9.4.1] +// Distribute construct [2.9.4.1] //===----------------------------------------------------------------------===// +void DistributeOp::build(OpBuilder &builder, OperationState &state, + const DistributeClauseOps &clauses) { + // TODO Store clauses in op: privateVars, privatizers. + DistributeOp::build(builder, state, clauses.distScheduleStaticAttr, + clauses.distScheduleChunkSizeVar, clauses.allocateVars, + clauses.allocatorVars, clauses.orderAttr); +} + LogicalResult DistributeOp::verify() { if (this->getChunkSize() && !this->getDistScheduleStatic()) return emitOpError() << "chunk size set without " @@ -1630,6 +1769,19 @@ LogicalResult ReductionOp::verify() { //===----------------------------------------------------------------------===// // TaskOp //===----------------------------------------------------------------------===// + +void TaskOp::build(OpBuilder &builder, OperationState &state, + const TaskClauseOps &clauses) { + MLIRContext *ctx = builder.getContext(); + // TODO Store clauses in op: privateVars, privatizers. + TaskOp::build( + builder, state, clauses.ifVar, clauses.finalVar, clauses.untiedAttr, + clauses.mergeableAttr, clauses.inReductionVars, + makeArrayAttr(ctx, clauses.inReductionDeclSymbols), clauses.priorityVar, + makeArrayAttr(ctx, clauses.dependTypeAttrs), clauses.dependVars, + clauses.allocateVars, clauses.allocatorVars); +} + LogicalResult TaskOp::verify() { LogicalResult verifyDependVars = verifyDependVarList(*this, getDepends(), getDependVars()); @@ -1642,6 +1794,15 @@ LogicalResult TaskOp::verify() { //===----------------------------------------------------------------------===// // TaskgroupOp //===----------------------------------------------------------------------===// + +void TaskgroupOp::build(OpBuilder &builder, OperationState &state, + const TaskgroupClauseOps &clauses) { + MLIRContext *ctx = builder.getContext(); + TaskgroupOp::build(builder, state, clauses.taskReductionVars, + makeArrayAttr(ctx, clauses.taskReductionDeclSymbols), + clauses.allocateVars, clauses.allocatorVars); +} + LogicalResult TaskgroupOp::verify() { return verifyReductionVarList(*this, getTaskReductions(), getTaskReductionVars()); @@ -1650,6 +1811,21 @@ LogicalResult TaskgroupOp::verify() { //===----------------------------------------------------------------------===// // TaskloopOp //===----------------------------------------------------------------------===// + +void TaskloopOp::build(OpBuilder &builder, OperationState &state, + const TaskloopClauseOps &clauses) { + MLIRContext *ctx = builder.getContext(); + // TODO Store clauses in op: reductionByRefAttr, privateVars, privatizers. + TaskloopOp::build( + builder, state, clauses.loopLBVar, clauses.loopUBVar, clauses.loopStepVar, + clauses.loopInclusiveAttr, clauses.ifVar, clauses.finalVar, + clauses.untiedAttr, clauses.mergeableAttr, clauses.inReductionVars, + makeArrayAttr(ctx, clauses.inReductionDeclSymbols), clauses.reductionVars, + makeArrayAttr(ctx, clauses.reductionDeclSymbols), clauses.priorityVar, + clauses.allocateVars, clauses.allocatorVars, clauses.grainsizeVar, + clauses.numTasksVar, clauses.nogroupAttr); +} + SmallVector TaskloopOp::getAllReductionVars() { SmallVector allReductionNvars(getInReductionVars().begin(), getInReductionVars().end()); @@ -1703,14 +1879,33 @@ void WsloopOp::build(OpBuilder &builder, OperationState &state, state.addAttributes(attributes); } +void WsloopOp::build(OpBuilder &builder, OperationState &state, + const WsloopClauseOps &clauses) { + MLIRContext *ctx = builder.getContext(); + // TODO Store clauses in op: allocateVars, allocatorVars, privateVars, + // privatizers. + WsloopOp::build( + builder, state, clauses.loopLBVar, clauses.loopUBVar, clauses.loopStepVar, + clauses.linearVars, clauses.linearStepVars, clauses.reductionVars, + makeArrayAttr(ctx, clauses.reductionDeclSymbols), clauses.scheduleValAttr, + clauses.scheduleChunkVar, clauses.scheduleModAttr, + clauses.scheduleSimdAttr, clauses.nowaitAttr, clauses.reductionByRefAttr, + clauses.orderedAttr, clauses.orderAttr, clauses.loopInclusiveAttr); +} + LogicalResult WsloopOp::verify() { return verifyReductionVarList(*this, getReductions(), getReductionVars()); } //===----------------------------------------------------------------------===// -// Verifier for critical construct (2.17.1) +// Critical construct (2.17.1) //===----------------------------------------------------------------------===// +void CriticalDeclareOp::build(OpBuilder &builder, OperationState &state, + const CriticalClauseOps &clauses) { + CriticalDeclareOp::build(builder, state, clauses.nameAttr, clauses.hintAttr); +} + LogicalResult CriticalDeclareOp::verify() { return verifySynchronizationHint(*this, getHintVal()); } @@ -1730,9 +1925,15 @@ LogicalResult CriticalOp::verifySymbolUses(SymbolTableCollection &symbolTable) { } //===----------------------------------------------------------------------===// -// Verifier for ordered construct +// Ordered construct //===----------------------------------------------------------------------===// +void OrderedOp::build(OpBuilder &builder, OperationState &state, + const OrderedOpClauseOps &clauses) { + OrderedOp::build(builder, state, clauses.doacrossDependTypeAttr, + clauses.doacrossNumLoopsAttr, clauses.doacrossVectorVars); +} + LogicalResult OrderedOp::verify() { auto container = (*this)->getParentOfType(); if (!container || !container.getOrderedValAttr() || @@ -1749,6 +1950,11 @@ LogicalResult OrderedOp::verify() { return success(); } +void OrderedRegionOp::build(OpBuilder &builder, OperationState &state, + const OrderedRegionClauseOps &clauses) { + OrderedRegionOp::build(builder, state, clauses.parLevelSimdAttr); +} + LogicalResult OrderedRegionOp::verify() { // TODO: The code generation for ordered simd directive is not supported yet. if (getSimd()) @@ -1765,6 +1971,16 @@ LogicalResult OrderedRegionOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// TaskwaitOp +//===----------------------------------------------------------------------===// + +void TaskwaitOp::build(OpBuilder &builder, OperationState &state, + const TaskwaitClauseOps &clauses) { + // TODO Store clauses in op: dependTypeAttrs, dependVars, nowaitAttr. + TaskwaitOp::build(builder, state); +} + //===----------------------------------------------------------------------===// // Verifier for AtomicReadOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp index 1e13e60068ee7..72c5aaa230678 100644 --- a/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp @@ -12,7 +12,6 @@ #include "mlir/Interfaces/ValueBoundsOpInterface.h" using namespace mlir; -using presburger::BoundType; namespace mlir { namespace scf { @@ -21,7 +20,28 @@ namespace { struct ForOpInterface : public ValueBoundsOpInterface::ExternalModel { - /// Populate bounds of values/dimensions for iter_args/OpResults. + /// Populate bounds of values/dimensions for iter_args/OpResults. If the + /// value/dimension size does not change in an iteration, we can deduce that + /// it the same as the initial value/dimension. + /// + /// Example 1: + /// %0 = scf.for ... iter_args(%arg0 = %t) -> tensor { + /// ... + /// %1 = tensor.insert %f into %arg0[...] : tensor + /// scf.yield %1 : tensor + /// } + /// --> bound(%0)[0] == bound(%t)[0] + /// --> bound(%arg0)[0] == bound(%t)[0] + /// + /// Example 2: + /// %0 = scf.for ... iter_args(%arg0 = %t) -> tensor { + /// %sz = tensor.dim %arg0 : tensor + /// %incr = arith.addi %sz, %c1 : index + /// %1 = tensor.empty(%incr) : tensor + /// scf.yield %1 : tensor + /// } + /// --> The yielded tensor dimension size changes with each iteration. Such + /// loops are not supported and no constraints are added. static void populateIterArgBounds(scf::ForOp forOp, Value value, std::optional dim, ValueBoundsConstraintSet &cstr) { @@ -33,59 +53,31 @@ struct ForOpInterface iterArgIdx = llvm::cast(value).getResultNumber(); } - // An EQ constraint can be added if the yielded value (dimension size) - // equals the corresponding block argument (dimension size). Value yieldedValue = cast(forOp.getBody()->getTerminator()) .getOperand(iterArgIdx); Value iterArg = forOp.getRegionIterArg(iterArgIdx); Value initArg = forOp.getInitArgs()[iterArgIdx]; - auto addEqBound = [&]() { + // Populate constraints for the yielded value. + cstr.populateConstraints(yieldedValue, dim); + // Populate constraints for the iter_arg. This is just to ensure that the + // iter_arg is mapped in the constraint set, which is a prerequisite for + // `compare`. It may lead to a recursive call to this function in case the + // iter_arg was not visited when the constraints for the yielded value were + // populated, but no additional work is done. + cstr.populateConstraints(iterArg, dim); + + // An EQ constraint can be added if the yielded value (dimension size) + // equals the corresponding block argument (dimension size). + if (cstr.compare(yieldedValue, dim, + ValueBoundsConstraintSet::ComparisonOperator::EQ, iterArg, + dim)) { if (dim.has_value()) { cstr.bound(value)[*dim] == cstr.getExpr(initArg, dim); } else { cstr.bound(value) == initArg; } - }; - - if (yieldedValue == iterArg) { - addEqBound(); - return; } - - // Compute EQ bound for yielded value. - AffineMap bound; - ValueDimList boundOperands; - LogicalResult status = ValueBoundsConstraintSet::computeBound( - bound, boundOperands, BoundType::EQ, yieldedValue, dim, - [&](Value v, std::optional d, ValueBoundsConstraintSet &cstr) { - // Stop when reaching a block argument of the loop body. - if (auto bbArg = llvm::dyn_cast(v)) - return bbArg.getOwner()->getParentOp() == forOp; - // Stop when reaching a value that is defined outside of the loop. It - // is impossible to reach an iter_arg from there. - Operation *op = v.getDefiningOp(); - return forOp.getRegion().findAncestorOpInRegion(*op) == nullptr; - }); - if (failed(status)) - return; - if (bound.getNumResults() != 1) - return; - - // Check if computed bound equals the corresponding iter_arg. - Value singleValue = nullptr; - std::optional singleDim; - if (auto dimExpr = dyn_cast(bound.getResult(0))) { - int64_t idx = dimExpr.getPosition(); - singleValue = boundOperands[idx].first; - singleDim = boundOperands[idx].second; - } else if (auto symExpr = dyn_cast(bound.getResult(0))) { - int64_t idx = symExpr.getPosition() + bound.getNumDims(); - singleValue = boundOperands[idx].first; - singleDim = boundOperands[idx].second; - } - if (singleValue == iterArg && singleDim == dim) - addEqBound(); } void populateBoundsForIndexValue(Operation *op, Value value, @@ -111,6 +103,66 @@ struct ForOpInterface } }; +struct IfOpInterface + : public ValueBoundsOpInterface::ExternalModel { + + static void populateBounds(scf::IfOp ifOp, Value value, + std::optional dim, + ValueBoundsConstraintSet &cstr) { + unsigned int resultNum = cast(value).getResultNumber(); + Value thenValue = ifOp.thenYield().getResults()[resultNum]; + Value elseValue = ifOp.elseYield().getResults()[resultNum]; + + // Populate constraints for the yielded value (and all values on the + // backward slice, as long as the current stop condition is not satisfied). + cstr.populateConstraints(thenValue, dim); + cstr.populateConstraints(elseValue, dim); + auto boundsBuilder = cstr.bound(value); + if (dim) + boundsBuilder[*dim]; + + // Compare yielded values. + // If thenValue <= elseValue: + // * result <= elseValue + // * result >= thenValue + if (cstr.compare(thenValue, dim, + ValueBoundsConstraintSet::ComparisonOperator::LE, + elseValue, dim)) { + if (dim) { + cstr.bound(value)[*dim] >= cstr.getExpr(thenValue, dim); + cstr.bound(value)[*dim] <= cstr.getExpr(elseValue, dim); + } else { + cstr.bound(value) >= thenValue; + cstr.bound(value) <= elseValue; + } + } + // If elseValue <= thenValue: + // * result <= thenValue + // * result >= elseValue + if (cstr.compare(elseValue, dim, + ValueBoundsConstraintSet::ComparisonOperator::LE, + thenValue, dim)) { + if (dim) { + cstr.bound(value)[*dim] >= cstr.getExpr(elseValue, dim); + cstr.bound(value)[*dim] <= cstr.getExpr(thenValue, dim); + } else { + cstr.bound(value) >= elseValue; + cstr.bound(value) <= thenValue; + } + } + } + + void populateBoundsForIndexValue(Operation *op, Value value, + ValueBoundsConstraintSet &cstr) const { + populateBounds(cast(op), value, /*dim=*/std::nullopt, cstr); + } + + void populateBoundsForShapedValueDim(Operation *op, Value value, int64_t dim, + ValueBoundsConstraintSet &cstr) const { + populateBounds(cast(op), value, dim, cstr); + } +}; + } // namespace } // namespace scf } // namespace mlir @@ -119,5 +171,6 @@ void mlir::scf::registerValueBoundsOpInterfaceExternalModels( DialectRegistry ®istry) { registry.addExtension(+[](MLIRContext *ctx, scf::SCFDialect *dialect) { scf::ForOp::attachInterface(*ctx); + scf::IfOp::attachInterface(*ctx); }); } diff --git a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp index c091841482080..7e4faf8b73afb 100644 --- a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp +++ b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp @@ -332,9 +332,9 @@ transform::LoopCoalesceOp::applyToOne(transform::TransformRewriter &rewriter, transform::TransformState &state) { LogicalResult result(failure()); if (scf::ForOp scfForOp = dyn_cast(op)) - result = coalescePerfectlyNestedLoops(scfForOp); + result = coalescePerfectlyNestedSCFForLoops(scfForOp); else if (AffineForOp affineForOp = dyn_cast(op)) - result = coalescePerfectlyNestedLoops(affineForOp); + result = coalescePerfectlyNestedAffineLoops(affineForOp); results.push_back(op); if (failed(result)) { diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp index a69df025bcba8..6ba7020e86fa6 100644 --- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp @@ -28,6 +28,7 @@ namespace { struct TestSCFParallelLoopCollapsing : public impl::TestSCFParallelLoopCollapsingBase< TestSCFParallelLoopCollapsing> { + void runOnOperation() override { Operation *module = getOperation(); @@ -88,6 +89,7 @@ struct TestSCFParallelLoopCollapsing // Only apply the transformation on parallel loops where the specified // transformation is valid, but do NOT early abort in the case of invalid // loops. + IRRewriter rewriter(&getContext()); module->walk([&](scf::ParallelOp op) { if (flattenedCombinedLoops.size() != op.getNumLoops()) { op.emitOpError("has ") @@ -97,7 +99,7 @@ struct TestSCFParallelLoopCollapsing << flattenedCombinedLoops.size() << " iter args."; return; } - collapseParallelLoops(op, combinedLoops); + collapseParallelLoops(rewriter, op, combinedLoops); }); } }; diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index 914aeb4fa79fd..9279081cfd45d 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/SCF/Utils/Utils.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/BuiltinOps.h" @@ -472,18 +473,23 @@ LogicalResult mlir::loopUnrollByFactor( return success(); } -/// Return the new lower bound, upper bound, and step in that order. Insert any -/// additional bounds calculations before the given builder and any additional -/// conversion back to the original loop induction value inside the given Block. -static LoopParams normalizeLoop(OpBuilder &boundsBuilder, - OpBuilder &insideLoopBuilder, Location loc, - Value lowerBound, Value upperBound, Value step, - Value inductionVar) { +/// Transform a loop with a strictly positive step +/// for %i = %lb to %ub step %s +/// into a 0-based loop with step 1 +/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 { +/// %i = %ii * %s + %lb +/// Insert the induction variable remapping in the body of `inner`, which is +/// expected to be either `loop` or another loop perfectly nested under `loop`. +/// Insert the definition of new bounds immediate before `outer`, which is +/// expected to be either `loop` or its parent in the loop nest. +static LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, + Value lb, Value ub, Value step) { + // For non-index types, generate `arith` instructions // Check if the loop is already known to have a constant zero lower bound or // a constant one step. bool isZeroBased = false; - if (auto ubCst = getConstantIntValue(lowerBound)) - isZeroBased = ubCst.value() == 0; + if (auto lbCst = getConstantIntValue(lb)) + isZeroBased = lbCst.value() == 0; bool isStepOne = false; if (auto stepCst = getConstantIntValue(step)) @@ -493,62 +499,90 @@ static LoopParams normalizeLoop(OpBuilder &boundsBuilder, // assuming the step is strictly positive. Update the bounds and the step // of the loop to go from 0 to the number of iterations, if necessary. if (isZeroBased && isStepOne) - return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound, - /*step=*/step}; + return {lb, ub, step}; - Value diff = boundsBuilder.create(loc, upperBound, lowerBound); + Value diff = isZeroBased ? ub : rewriter.create(loc, ub, lb); Value newUpperBound = - boundsBuilder.create(loc, diff, step); - - Value newLowerBound = - isZeroBased ? lowerBound - : boundsBuilder.create( - loc, boundsBuilder.getZeroAttr(lowerBound.getType())); - Value newStep = - isStepOne ? step - : boundsBuilder.create( - loc, boundsBuilder.getIntegerAttr(step.getType(), 1)); - - // Insert code computing the value of the original loop induction variable - // from the "normalized" one. - Value scaled = - isStepOne - ? inductionVar - : insideLoopBuilder.create(loc, inductionVar, step); - Value shifted = - isZeroBased - ? scaled - : insideLoopBuilder.create(loc, scaled, lowerBound); - - SmallPtrSet preserve{scaled.getDefiningOp(), - shifted.getDefiningOp()}; - inductionVar.replaceAllUsesExcept(shifted, preserve); - return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound, - /*step=*/newStep}; + isStepOne ? diff : rewriter.create(loc, diff, step); + + Value newLowerBound = isZeroBased + ? lb + : rewriter.create( + loc, rewriter.getZeroAttr(lb.getType())); + Value newStep = isStepOne + ? step + : rewriter.create( + loc, rewriter.getIntegerAttr(step.getType(), 1)); + + return {newLowerBound, newUpperBound, newStep}; } -/// Transform a loop with a strictly positive step -/// for %i = %lb to %ub step %s -/// into a 0-based loop with step 1 -/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 { -/// %i = %ii * %s + %lb -/// Insert the induction variable remapping in the body of `inner`, which is -/// expected to be either `loop` or another loop perfectly nested under `loop`. -/// Insert the definition of new bounds immediate before `outer`, which is -/// expected to be either `loop` or its parent in the loop nest. -static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) { - OpBuilder builder(outer); - OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody()); - auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(), - loop.getLowerBound(), loop.getUpperBound(), - loop.getStep(), loop.getInductionVar()); - - loop.setLowerBound(loopPieces.lowerBound); - loop.setUpperBound(loopPieces.upperBound); - loop.setStep(loopPieces.step); +/// Get back the original induction variable values after loop normalization +static void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, + Value normalizedIv, Value origLb, + Value origStep) { + Value denormalizedIv; + SmallPtrSet preserve; + bool isStepOne = isConstantIntValue(origStep, 1); + bool isZeroBased = isConstantIntValue(origLb, 0); + + Value scaled = normalizedIv; + if (!isStepOne) { + scaled = rewriter.create(loc, normalizedIv, origStep); + preserve.insert(scaled.getDefiningOp()); + } + denormalizedIv = scaled; + if (!isZeroBased) { + denormalizedIv = rewriter.create(loc, scaled, origLb); + preserve.insert(denormalizedIv.getDefiningOp()); + } + + rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIv, preserve); } -LogicalResult mlir::coalesceLoops(MutableArrayRef loops) { +/// Helper function to multiply a sequence of values. +static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, + ArrayRef values) { + assert(!values.empty() && "unexpected empty list"); + Value productOf = values.front(); + for (auto v : values.drop_front()) { + productOf = rewriter.create(loc, productOf, v); + } + return productOf; +} + +/// For each original loop, the value of the +/// induction variable can be obtained by dividing the induction variable of +/// the linearized loop by the total number of iterations of the loops nested +/// in it modulo the number of iterations in this loop (remove the values +/// related to the outer loops): +/// iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i. +/// Compute these iteratively from the innermost loop by creating a "running +/// quotient" of division by the range. +static std::pair, SmallPtrSet> +delinearizeInductionVariable(RewriterBase &rewriter, Location loc, + Value linearizedIv, ArrayRef ubs) { + Value previous = linearizedIv; + SmallVector delinearizedIvs(ubs.size()); + SmallPtrSet preservedUsers; + for (unsigned i = 0, e = ubs.size(); i < e; ++i) { + unsigned idx = ubs.size() - i - 1; + if (i != 0) { + previous = rewriter.create(loc, previous, ubs[idx + 1]); + preservedUsers.insert(previous.getDefiningOp()); + } + Value iv = previous; + if (i != e - 1) { + iv = rewriter.create(loc, previous, ubs[idx]); + preservedUsers.insert(iv.getDefiningOp()); + } + delinearizedIvs[idx] = iv; + } + return {delinearizedIvs, preservedUsers}; +} + +LogicalResult mlir::coalesceLoops(RewriterBase &rewriter, + MutableArrayRef loops) { if (loops.size() < 2) return failure(); @@ -557,57 +591,148 @@ LogicalResult mlir::coalesceLoops(MutableArrayRef loops) { // 1. Make sure all loops iterate from 0 to upperBound with step 1. This // allows the following code to assume upperBound is the number of iterations. - for (auto loop : loops) - normalizeLoop(loop, outermost, innermost); + for (auto loop : loops) { + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(outermost); + Value lb = loop.getLowerBound(); + Value ub = loop.getUpperBound(); + Value step = loop.getStep(); + auto newLoopParams = + emitNormalizedLoopBounds(rewriter, loop.getLoc(), lb, ub, step); + + rewriter.modifyOpInPlace(loop, [&]() { + loop.setLowerBound(newLoopParams.lowerBound); + loop.setUpperBound(newLoopParams.upperBound); + loop.setStep(newLoopParams.step); + }); + + rewriter.setInsertionPointToStart(innermost.getBody()); + denormalizeInductionVariable(rewriter, loop.getLoc(), + loop.getInductionVar(), lb, step); + } // 2. Emit code computing the upper bound of the coalesced loop as product // of the number of iterations of all loops. - OpBuilder builder(outermost); + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(outermost); Location loc = outermost.getLoc(); - Value upperBound = outermost.getUpperBound(); - for (auto loop : loops.drop_front()) - upperBound = - builder.create(loc, upperBound, loop.getUpperBound()); + SmallVector upperBounds = llvm::map_to_vector( + loops, [](auto loop) { return loop.getUpperBound(); }); + Value upperBound = getProductOfIntsOrIndexes(rewriter, loc, upperBounds); outermost.setUpperBound(upperBound); - builder.setInsertionPointToStart(outermost.getBody()); - - // 3. Remap induction variables. For each original loop, the value of the - // induction variable can be obtained by dividing the induction variable of - // the linearized loop by the total number of iterations of the loops nested - // in it modulo the number of iterations in this loop (remove the values - // related to the outer loops): - // iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i. - // Compute these iteratively from the innermost loop by creating a "running - // quotient" of division by the range. - Value previous = outermost.getInductionVar(); + rewriter.setInsertionPointToStart(innermost.getBody()); + auto [delinearizeIvs, preservedUsers] = delinearizeInductionVariable( + rewriter, loc, outermost.getInductionVar(), upperBounds); + rewriter.replaceAllUsesExcept(outermost.getInductionVar(), delinearizeIvs[0], + preservedUsers); + + for (int i = loops.size() - 1; i > 0; --i) { + auto outerLoop = loops[i - 1]; + auto innerLoop = loops[i]; + + Operation *innerTerminator = innerLoop.getBody()->getTerminator(); + auto yieldedVals = llvm::to_vector(innerTerminator->getOperands()); + rewriter.eraseOp(innerTerminator); + + SmallVector innerBlockArgs; + innerBlockArgs.push_back(delinearizeIvs[i]); + llvm::append_range(innerBlockArgs, outerLoop.getRegionIterArgs()); + rewriter.inlineBlockBefore(innerLoop.getBody(), outerLoop.getBody(), + Block::iterator(innerLoop), innerBlockArgs); + rewriter.replaceOp(innerLoop, yieldedVals); + } + return success(); +} + +LogicalResult mlir::coalesceLoops(MutableArrayRef loops) { + if (loops.empty()) { + return failure(); + } + IRRewriter rewriter(loops.front().getContext()); + return coalesceLoops(rewriter, loops); +} + +LogicalResult mlir::coalescePerfectlyNestedSCFForLoops(scf::ForOp op) { + LogicalResult result(failure()); + SmallVector loops; + getPerfectlyNestedLoops(loops, op); + + // Look for a band of loops that can be coalesced, i.e. perfectly nested + // loops with bounds defined above some loop. + + // 1. For each loop, find above which parent loop its bounds operands are + // defined. + SmallVector operandsDefinedAbove(loops.size()); for (unsigned i = 0, e = loops.size(); i < e; ++i) { - unsigned idx = loops.size() - i - 1; - if (i != 0) - previous = builder.create(loc, previous, - loops[idx + 1].getUpperBound()); - - Value iv = (i == e - 1) ? previous - : builder.create( - loc, previous, loops[idx].getUpperBound()); - replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv, - loops.back().getRegion()); + operandsDefinedAbove[i] = i; + for (unsigned j = 0; j < i; ++j) { + SmallVector boundsOperands = {loops[i].getLowerBound(), + loops[i].getUpperBound(), + loops[i].getStep()}; + if (areValuesDefinedAbove(boundsOperands, loops[j].getRegion())) { + operandsDefinedAbove[i] = j; + break; + } + } } - // 4. Move the operations from the innermost just above the second-outermost - // loop, delete the extra terminator and the second-outermost loop. - scf::ForOp second = loops[1]; - innermost.getBody()->back().erase(); - outermost.getBody()->getOperations().splice( - Block::iterator(second.getOperation()), - innermost.getBody()->getOperations()); - second.erase(); - return success(); + // 2. For each inner loop check that the iter_args for the immediately outer + // loop are the init for the immediately inner loop and that the yields of the + // return of the inner loop is the yield for the immediately outer loop. Keep + // track of where the chain starts from for each loop. + SmallVector iterArgChainStart(loops.size()); + iterArgChainStart[0] = 0; + for (unsigned i = 1, e = loops.size(); i < e; ++i) { + // By default set the start of the chain to itself. + iterArgChainStart[i] = i; + auto outerloop = loops[i - 1]; + auto innerLoop = loops[i]; + if (outerloop.getNumRegionIterArgs() != innerLoop.getNumRegionIterArgs()) { + continue; + } + if (!llvm::equal(outerloop.getRegionIterArgs(), innerLoop.getInitArgs())) { + continue; + } + auto outerloopTerminator = outerloop.getBody()->getTerminator(); + if (!llvm::equal(outerloopTerminator->getOperands(), + innerLoop.getResults())) { + continue; + } + iterArgChainStart[i] = iterArgChainStart[i - 1]; + } + + // 3. Identify bands of loops such that the operands of all of them are + // defined above the first loop in the band. Traverse the nest bottom-up + // so that modifications don't invalidate the inner loops. + for (unsigned end = loops.size(); end > 0; --end) { + unsigned start = 0; + for (; start < end - 1; ++start) { + auto maxPos = + *std::max_element(std::next(operandsDefinedAbove.begin(), start), + std::next(operandsDefinedAbove.begin(), end)); + if (maxPos > start) + continue; + if (iterArgChainStart[end - 1] > start) + continue; + auto band = llvm::MutableArrayRef(loops.data() + start, end - start); + if (succeeded(coalesceLoops(band))) + result = success(); + break; + } + // If a band was found and transformed, keep looking at the loops above + // the outermost transformed loop. + if (start != end - 1) + end = start + 1; + } + return result; } void mlir::collapseParallelLoops( - scf::ParallelOp loops, ArrayRef> combinedDimensions) { - OpBuilder outsideBuilder(loops); + RewriterBase &rewriter, scf::ParallelOp loops, + ArrayRef> combinedDimensions) { + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(loops); Location loc = loops.getLoc(); // Presort combined dimensions. @@ -619,25 +744,29 @@ void mlir::collapseParallelLoops( SmallVector normalizedLowerBounds, normalizedSteps, normalizedUpperBounds; for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) { - OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody()); - auto resultBounds = - normalizeLoop(outsideBuilder, insideLoopBuilder, loc, - loops.getLowerBound()[i], loops.getUpperBound()[i], - loops.getStep()[i], loops.getBody()->getArgument(i)); - - normalizedLowerBounds.push_back(resultBounds.lowerBound); - normalizedUpperBounds.push_back(resultBounds.upperBound); - normalizedSteps.push_back(resultBounds.step); + OpBuilder::InsertionGuard g2(rewriter); + rewriter.setInsertionPoint(loops); + Value lb = loops.getLowerBound()[i]; + Value ub = loops.getUpperBound()[i]; + Value step = loops.getStep()[i]; + auto newLoopParams = emitNormalizedLoopBounds(rewriter, loc, lb, ub, step); + normalizedLowerBounds.push_back(newLoopParams.lowerBound); + normalizedUpperBounds.push_back(newLoopParams.upperBound); + normalizedSteps.push_back(newLoopParams.step); + + rewriter.setInsertionPointToStart(loops.getBody()); + denormalizeInductionVariable(rewriter, loc, loops.getInductionVars()[i], lb, + step); } // Combine iteration spaces. SmallVector lowerBounds, upperBounds, steps; - auto cst0 = outsideBuilder.create(loc, 0); - auto cst1 = outsideBuilder.create(loc, 1); + auto cst0 = rewriter.create(loc, 0); + auto cst1 = rewriter.create(loc, 1); for (auto &sortedDimension : sortedDimensions) { - Value newUpperBound = outsideBuilder.create(loc, 1); + Value newUpperBound = rewriter.create(loc, 1); for (auto idx : sortedDimension) { - newUpperBound = outsideBuilder.create( + newUpperBound = rewriter.create( loc, newUpperBound, normalizedUpperBounds[idx]); } lowerBounds.push_back(cst0); @@ -651,7 +780,7 @@ void mlir::collapseParallelLoops( // value. The remainders then determine based on that range, which iteration // of the original induction value this represents. This is a normalized value // that is un-normalized already by the previous logic. - auto newPloop = outsideBuilder.create( + auto newPloop = rewriter.create( loc, lowerBounds, upperBounds, steps, [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) { for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt index 3c0f82fc00bb9..af3a1b48f45af 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt @@ -20,7 +20,7 @@ add_mlir_dialect_library(MLIRSparseTensorTransforms Utils/IterationGraphSorter.cpp Utils/LoopEmitter.cpp Utils/SparseTensorDescriptor.cpp - Utils/SparseTensorLevel.cpp + Utils/SparseTensorIterator.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/SparseTensor diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.h b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.h index b5a0ac8484abd..59c3e49264dbe 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/LoopEmitter.h @@ -11,7 +11,7 @@ #include -#include "SparseTensorLevel.h" +#include "SparseTensorIterator.h" #include "mlir/Dialect/SparseTensor/IR/Enums.h" #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp similarity index 96% rename from mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.cpp rename to mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp index bc27fae5d1948..60dca3c55dec3 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.cpp @@ -1,4 +1,4 @@ -//===- SparseTensorLevel.cpp - Tensor management class -------------------===// +//===- SparseTensorIterator.cpp -------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "SparseTensorLevel.h" +#include "SparseTensorIterator.h" #include "CodegenUtils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" @@ -46,21 +46,41 @@ using ValueTuple = std::tuple; namespace { +template class SparseLevel : public SparseTensorLevel { + // It is either an array of size 2 or size 1 depending on whether the sparse + // level requires a position array. + using BufferT = std::conditional_t, + std::array>; + public: SparseLevel(unsigned tid, Level lvl, LevelType lt, Value lvlSize, - Value crdBuffer) - : SparseTensorLevel(tid, lvl, lt, lvlSize), crdBuffer(crdBuffer) {} + BufferT buffers) + : SparseTensorLevel(tid, lvl, lt, lvlSize), buffers(buffers) {} + + ValueRange getLvlBuffers() const override { return buffers; } Value peekCrdAt(OpBuilder &b, Location l, ValueRange batchPrefix, Value iv) const override { SmallVector memCrd(batchPrefix); memCrd.push_back(iv); - return genIndexLoad(b, l, crdBuffer, memCrd); + return genIndexLoad(b, l, getCrdBuf(), memCrd); } protected: - const Value crdBuffer; + template > + Value getPosBuf() const { + return buffers[0]; + } + + Value getCrdBuf() const { + if constexpr (hasPosBuffer) + return buffers[1]; + else + return buffers[0]; + } + + const BufferT buffers; }; class DenseLevel : public SparseTensorLevel { @@ -72,6 +92,8 @@ class DenseLevel : public SparseTensorLevel { llvm_unreachable("locate random-accessible level instead"); } + ValueRange getLvlBuffers() const override { return {}; } + ValuePair peekRangeAt(OpBuilder &b, Location l, ValueRange, Value p, Value max) const override { Value posLo = MULI(p, lvlSize); @@ -88,6 +110,8 @@ class BatchLevel : public SparseTensorLevel { llvm_unreachable("locate random-accessible level instead"); } + ValueRange getLvlBuffers() const override { return {}; } + ValuePair peekRangeAt(OpBuilder &b, Location l, ValueRange, Value p, Value max) const override { assert(max == nullptr && "Dense level can not be non-unique."); @@ -96,11 +120,11 @@ class BatchLevel : public SparseTensorLevel { } }; -class CompressedLevel : public SparseLevel { +class CompressedLevel : public SparseLevel { public: CompressedLevel(unsigned tid, Level lvl, LevelType lt, Value lvlSize, Value posBuffer, Value crdBuffer) - : SparseLevel(tid, lvl, lt, lvlSize, crdBuffer), posBuffer(posBuffer) {} + : SparseLevel(tid, lvl, lt, lvlSize, {posBuffer, crdBuffer}) {} ValuePair peekRangeAt(OpBuilder &b, Location l, ValueRange batchPrefix, Value p, Value max) const override { @@ -109,21 +133,18 @@ class CompressedLevel : public SparseLevel { SmallVector memCrd(batchPrefix); memCrd.push_back(p); - Value pLo = genIndexLoad(b, l, posBuffer, memCrd); + Value pLo = genIndexLoad(b, l, getPosBuf(), memCrd); memCrd.back() = ADDI(p, C_IDX(1)); - Value pHi = genIndexLoad(b, l, posBuffer, memCrd); + Value pHi = genIndexLoad(b, l, getPosBuf(), memCrd); return {pLo, pHi}; } - -private: - const Value posBuffer; }; -class LooseCompressedLevel : public SparseLevel { +class LooseCompressedLevel : public SparseLevel { public: LooseCompressedLevel(unsigned tid, Level lvl, LevelType lt, Value lvlSize, Value posBuffer, Value crdBuffer) - : SparseLevel(tid, lvl, lt, lvlSize, crdBuffer), posBuffer(posBuffer) {} + : SparseLevel(tid, lvl, lt, lvlSize, {posBuffer, crdBuffer}) {} ValuePair peekRangeAt(OpBuilder &b, Location l, ValueRange batchPrefix, Value p, Value max) const override { @@ -133,21 +154,18 @@ class LooseCompressedLevel : public SparseLevel { p = MULI(p, C_IDX(2)); memCrd.push_back(p); - Value pLo = genIndexLoad(b, l, posBuffer, memCrd); + Value pLo = genIndexLoad(b, l, getPosBuf(), memCrd); memCrd.back() = ADDI(p, C_IDX(1)); - Value pHi = genIndexLoad(b, l, posBuffer, memCrd); + Value pHi = genIndexLoad(b, l, getPosBuf(), memCrd); return {pLo, pHi}; } - -private: - const Value posBuffer; }; -class SingletonLevel : public SparseLevel { +class SingletonLevel : public SparseLevel { public: SingletonLevel(unsigned tid, Level lvl, LevelType lt, Value lvlSize, Value crdBuffer) - : SparseLevel(tid, lvl, lt, lvlSize, crdBuffer) {} + : SparseLevel(tid, lvl, lt, lvlSize, {crdBuffer}) {} ValuePair peekRangeAt(OpBuilder &b, Location l, ValueRange batchPrefix, Value p, Value segHi) const override { @@ -159,11 +177,11 @@ class SingletonLevel : public SparseLevel { } }; -class NOutOfMLevel : public SparseLevel { +class NOutOfMLevel : public SparseLevel { public: NOutOfMLevel(unsigned tid, Level lvl, LevelType lt, Value lvlSize, Value crdBuffer) - : SparseLevel(tid, lvl, lt, lvlSize, crdBuffer) {} + : SparseLevel(tid, lvl, lt, lvlSize, {crdBuffer}) {} ValuePair peekRangeAt(OpBuilder &b, Location l, ValueRange batchPrefix, Value p, Value max) const override { diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.h b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h similarity index 97% rename from mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.h rename to mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h index 9f92eecdf75cb..9d69a23355598 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorLevel.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Utils/SparseTensorIterator.h @@ -1,4 +1,4 @@ -//===- SparseTensorLevel.h --------------------------------------*- C++ -*-===// +//===- SparseTensorIterator.h ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_SPARSETENSORLEVEL_H_ -#define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_SPARSETENSORLEVEL_H_ +#ifndef MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_SPARSETENSORITERATOR_H_ +#define MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_SPARSETENSORITERATOR_H_ #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" @@ -55,6 +55,7 @@ class SparseTensorLevel { Level getLevel() const { return lvl; } LevelType getLT() const { return lt; } Value getSize() const { return lvlSize; } + virtual ValueRange getLvlBuffers() const = 0; // // Level properties @@ -321,4 +322,4 @@ std::unique_ptr makeTraverseSubSectIterator( } // namespace sparse_tensor } // namespace mlir -#endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_SPARSETENSORLEVEL_H_ +#endif // MLIR_DIALECT_SPARSETENSOR_TRANSFORMS_UTILS_SPARSETENSORITERATOR_H_ diff --git a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp index 383ef1cea53fd..adde8a66d8354 100644 --- a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp @@ -199,8 +199,10 @@ Operation *mlir::clone(OpBuilder &b, Operation *op, TypeRange newResultTypes, IRMapping bvm; OperationState state(op->getLoc(), op->getName(), newOperands, newResultTypes, op->getAttrs()); - for (Region &r : op->getRegions()) - r.cloneInto(state.addRegion(), bvm); + for (Region &r : op->getRegions()) { + Region *newRegion = state.addRegion(); + b.cloneRegionBefore(r, *newRegion, newRegion->begin(), bvm); + } return b.create(state); } diff --git a/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp b/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp index 52359fa8a510d..f8df34843a363 100644 --- a/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp +++ b/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp @@ -59,12 +59,16 @@ ScalableValueBoundsConstraintSet::computeScalableBound( ScalableValueBoundsConstraintSet scalableCstr( value.getContext(), stopCondition ? stopCondition : defaultStopCondition, vscaleMin, vscaleMax); - int64_t pos = scalableCstr.populateConstraintsSet(value, dim); + int64_t pos = scalableCstr.insert(value, dim, /*isSymbol=*/false); + scalableCstr.processWorklist(); - // Project out all variables apart from vscale. - // This should result in constraints in terms of vscale only. + // Project out all columns apart from vscale and the starting point + // (value/dim). This should result in constraints in terms of vscale only. auto projectOutFn = [&](ValueDim p) { - return p.first != scalableCstr.getVscaleValue(); + bool isStartingPoint = + p.first == value && + p.second == dim.value_or(ValueBoundsConstraintSet::kIndexValue); + return p.first != scalableCstr.getVscaleValue() && !isStartingPoint; }; scalableCstr.projectOut(projectOutFn); @@ -72,7 +76,7 @@ ScalableValueBoundsConstraintSet::computeScalableBound( scalableCstr.positionToValueDim.size() && "inconsistent mapping state"); - // Check that the only symbols left are vscale. + // Check that the only columns left are vscale and the starting point. for (int64_t i = 0; i < scalableCstr.cstr.getNumDimAndSymbolVars(); ++i) { if (i == pos) continue; diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp index bed2c2496719d..2f21c50c63473 100644 --- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp @@ -12,9 +12,19 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Transforms/Passes.h" #include "mlir/IR/Builders.h" #include "mlir/IR/TypeUtilities.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +namespace mlir { +namespace vector { +#define GEN_PASS_DEF_LOWERVECTORMULTIREDUCTION +#include "mlir/Dialect/Vector/Transforms/Passes.h.inc" +} // namespace vector +} // namespace mlir #define DEBUG_TYPE "vector-multi-reduction" @@ -461,6 +471,31 @@ struct OneDimMultiReductionToTwoDim return success(); } }; + +struct LowerVectorMultiReductionPass + : public vector::impl::LowerVectorMultiReductionBase< + LowerVectorMultiReductionPass> { + LowerVectorMultiReductionPass(vector::VectorMultiReductionLowering option) { + this->loweringStrategy = option; + } + + void runOnOperation() override { + Operation *op = getOperation(); + MLIRContext *context = op->getContext(); + + RewritePatternSet loweringPatterns(context); + populateVectorMultiReductionLoweringPatterns(loweringPatterns, + this->loweringStrategy); + + if (failed(applyPatternsAndFoldGreedily(op, std::move(loweringPatterns)))) + signalPassFailure(); + } + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } +}; + } // namespace void mlir::vector::populateVectorMultiReductionLoweringPatterns( @@ -476,3 +511,8 @@ void mlir::vector::populateVectorMultiReductionLoweringPatterns( patterns.add(patterns.getContext(), benefit); } + +std::unique_ptr vector::createLowerVectorMultiReductionPass( + vector::VectorMultiReductionLowering option) { + return std::make_unique(option); +} diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 456cf6a2c2778..e915b97d9ff17 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -212,6 +212,9 @@ OpPrintingFlags::OpPrintingFlags() return; if (clOptions->elideElementsAttrIfLarger.getNumOccurrences()) elementsAttrElementLimit = clOptions->elideElementsAttrIfLarger; + if (clOptions->printElementsAttrWithHexIfLarger.getNumOccurrences()) + elementsAttrHexElementLimit = + clOptions->printElementsAttrWithHexIfLarger.getValue(); if (clOptions->elideResourceStringsIfLarger.getNumOccurrences()) resourceStringCharLimit = clOptions->elideResourceStringsIfLarger; printDebugInfoFlag = clOptions->printDebugInfoOpt; @@ -233,6 +236,12 @@ OpPrintingFlags::elideLargeElementsAttrs(int64_t largeElementLimit) { return *this; } +OpPrintingFlags & +OpPrintingFlags::printLargeElementsAttrWithHex(int64_t largeElementLimit) { + elementsAttrHexElementLimit = largeElementLimit; + return *this; +} + OpPrintingFlags & OpPrintingFlags::elideLargeResourceString(int64_t largeResourceLimit) { resourceStringCharLimit = largeResourceLimit; @@ -287,11 +296,24 @@ bool OpPrintingFlags::shouldElideElementsAttr(ElementsAttr attr) const { !llvm::isa(attr); } +/// Return if the given ElementsAttr should be printed as hex string. +bool OpPrintingFlags::shouldPrintElementsAttrWithHex(ElementsAttr attr) const { + // -1 is used to disable hex printing. + return (elementsAttrHexElementLimit != -1) && + (elementsAttrHexElementLimit < int64_t(attr.getNumElements())) && + !llvm::isa(attr); +} + /// Return the size limit for printing large ElementsAttr. std::optional OpPrintingFlags::getLargeElementsAttrLimit() const { return elementsAttrElementLimit; } +/// Return the size limit for printing large ElementsAttr as hex string. +int64_t OpPrintingFlags::getLargeElementsAttrHexLimit() const { + return elementsAttrHexElementLimit; +} + /// Return the size limit for printing large ElementsAttr. std::optional OpPrintingFlags::getLargeResourceStringLimit() const { return resourceStringCharLimit; @@ -328,23 +350,6 @@ bool OpPrintingFlags::shouldPrintValueUsers() const { return printValueUsersFlag; } -/// Returns true if an ElementsAttr with the given number of elements should be -/// printed with hex. -static bool shouldPrintElementsAttrWithHex(int64_t numElements) { - // Check to see if a command line option was provided for the limit. - if (clOptions.isConstructed()) { - if (clOptions->printElementsAttrWithHexIfLarger.getNumOccurrences()) { - // -1 is used to disable hex printing. - if (clOptions->printElementsAttrWithHexIfLarger == -1) - return false; - return numElements > clOptions->printElementsAttrWithHexIfLarger; - } - } - - // Otherwise, default to printing with hex if the number of elements is >100. - return numElements > 100; -} - //===----------------------------------------------------------------------===// // NewLineCounter //===----------------------------------------------------------------------===// @@ -2435,9 +2440,7 @@ void AsmPrinter::Impl::printDenseIntOrFPElementsAttr( auto elementType = type.getElementType(); // Check to see if we should format this attribute as a hex string. - auto numElements = type.getNumElements(); - if (!attr.isSplat() && allowHex && - shouldPrintElementsAttrWithHex(numElements)) { + if (allowHex && printerFlags.shouldPrintElementsAttrWithHex(attr)) { ArrayRef rawData = attr.getRawData(); if (llvm::endianness::native == llvm::endianness::big) { // Convert endianess in big-endian(BE) machines. `rawData` is BE in BE diff --git a/mlir/lib/IR/PatternMatch.cpp b/mlir/lib/IR/PatternMatch.cpp index 5944a0ea46a14..286f47ce69136 100644 --- a/mlir/lib/IR/PatternMatch.cpp +++ b/mlir/lib/IR/PatternMatch.cpp @@ -11,6 +11,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/IR/Iterators.h" #include "mlir/IR/RegionKindInterface.h" +#include "llvm/ADT/SmallPtrSet.h" using namespace mlir; @@ -250,6 +251,14 @@ void RewriterBase::finalizeOpModification(Operation *op) { rewriteListener->notifyOperationModified(op); } +void RewriterBase::replaceAllUsesExcept( + Value from, Value to, const SmallPtrSetImpl &preservedUsers) { + return replaceUsesWithIf(from, to, [&](OpOperand &use) { + Operation *user = use.getOwner(); + return !preservedUsers.contains(user); + }); +} + void RewriterBase::replaceUsesWithIf(Value from, Value to, function_ref functor, bool *allUsesReplaced) { diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index 0d362c7efa0a0..6e3d6dd3c7575 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -110,25 +110,47 @@ AffineExpr ValueBoundsConstraintSet::getExpr(Value value, assertValidValueDim(value, dim); #endif // NDEBUG + // Check if the value/dim is statically known. In that case, an affine + // constant expression should be returned. This allows us to support + // multiplications with constants. (Multiplications of two columns in the + // constraint set is not supported.) + std::optional constSize = std::nullopt; auto shapedType = dyn_cast(value.getType()); if (shapedType) { - // Static dimension: return constant directly. if (shapedType.hasRank() && !shapedType.isDynamicDim(*dim)) - return builder.getAffineConstantExpr(shapedType.getDimSize(*dim)); - } else { - // Constant index value: return directly. - if (auto constInt = ::getConstantIntValue(value)) - return builder.getAffineConstantExpr(*constInt); + constSize = shapedType.getDimSize(*dim); + } else if (auto constInt = ::getConstantIntValue(value)) { + constSize = *constInt; } - // Dynamic value: add to constraint set. + // If the value/dim is already mapped, return the corresponding expression + // directly. ValueDim valueDim = std::make_pair(value, dim.value_or(kIndexValue)); - if (!valueDimToPosition.contains(valueDim)) - (void)insert(value, dim); - int64_t pos = getPos(value, dim); - return pos < cstr.getNumDimVars() - ? builder.getAffineDimExpr(pos) - : builder.getAffineSymbolExpr(pos - cstr.getNumDimVars()); + if (valueDimToPosition.contains(valueDim)) { + // If it is a constant, return an affine constant expression. Otherwise, + // return an affine expression that represents the respective column in the + // constraint set. + if (constSize) + return builder.getAffineConstantExpr(*constSize); + return getPosExpr(getPos(value, dim)); + } + + if (constSize) { + // Constant index value/dim: add column to the constraint set, add EQ bound + // and return an affine constant expression without pushing the newly added + // column to the worklist. + (void)insert(value, dim, /*isSymbol=*/true, /*addToWorklist=*/false); + if (shapedType) + bound(value)[*dim] == *constSize; + else + bound(value) == *constSize; + return builder.getAffineConstantExpr(*constSize); + } + + // Dynamic value/dim: insert column to the constraint set and put it on the + // worklist. Return an affine expression that represents the newly inserted + // column in the constraint set. + return getPosExpr(insert(value, dim, /*isSymbol=*/true)); } AffineExpr ValueBoundsConstraintSet::getExpr(OpFoldResult ofr) { @@ -145,7 +167,7 @@ AffineExpr ValueBoundsConstraintSet::getExpr(int64_t constant) { int64_t ValueBoundsConstraintSet::insert(Value value, std::optional dim, - bool isSymbol) { + bool isSymbol, bool addToWorklist) { #ifndef NDEBUG assertValidValueDim(value, dim); #endif // NDEBUG @@ -160,7 +182,12 @@ int64_t ValueBoundsConstraintSet::insert(Value value, if (positionToValueDim[i].has_value()) valueDimToPosition[*positionToValueDim[i]] = i; - worklist.push(pos); + if (addToWorklist) { + LLVM_DEBUG(llvm::dbgs() << "Push to worklist: " << value + << " (dim: " << dim.value_or(kIndexValue) << ")\n"); + worklist.push(pos); + } + return pos; } @@ -190,6 +217,13 @@ int64_t ValueBoundsConstraintSet::getPos(Value value, return it->second; } +AffineExpr ValueBoundsConstraintSet::getPosExpr(int64_t pos) { + assert(pos >= 0 && pos < cstr.getNumDimAndSymbolVars() && "invalid position"); + return pos < cstr.getNumDimVars() + ? builder.getAffineDimExpr(pos) + : builder.getAffineSymbolExpr(pos - cstr.getNumDimVars()); +} + static Operation *getOwnerOfValue(Value value) { if (auto bbArg = dyn_cast(value)) return bbArg.getOwner()->getParentOp(); @@ -492,7 +526,7 @@ FailureOr ValueBoundsConstraintSet::computeConstantBound( // Default stop condition if none was specified: Keep adding constraints until // a bound could be computed. - int64_t pos; + int64_t pos = 0; auto defaultStopCondition = [&](Value v, std::optional dim, ValueBoundsConstraintSet &cstr) { return cstr.cstr.getConstantBound64(type, pos).has_value(); @@ -500,7 +534,8 @@ FailureOr ValueBoundsConstraintSet::computeConstantBound( ValueBoundsConstraintSet cstr( map.getContext(), stopCondition ? stopCondition : defaultStopCondition); - cstr.populateConstraintsSet(map, operands, &pos); + pos = cstr.populateConstraints(map, operands); + assert(pos == 0 && "expected `map` is the first column"); // Compute constant bound for `valueDim`. int64_t ubAdjustment = closedUB ? 0 : 1; @@ -509,29 +544,28 @@ FailureOr ValueBoundsConstraintSet::computeConstantBound( return failure(); } -int64_t -ValueBoundsConstraintSet::populateConstraintsSet(Value value, - std::optional dim) { +void ValueBoundsConstraintSet::populateConstraints(Value value, + std::optional dim) { #ifndef NDEBUG assertValidValueDim(value, dim); #endif // NDEBUG - AffineMap map = - AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, - Builder(value.getContext()).getAffineDimExpr(0)); - return populateConstraintsSet(map, {{value, dim}}); + // `getExpr` pushes the value/dim onto the worklist (unless it was already + // analyzed). + (void)getExpr(value, dim); + // Process all values/dims on the worklist. This may traverse and analyze + // additional IR, depending the current stop function. + processWorklist(); } -int64_t ValueBoundsConstraintSet::populateConstraintsSet(AffineMap map, - ValueDimList operands, - int64_t *posOut) { +int64_t ValueBoundsConstraintSet::populateConstraints(AffineMap map, + ValueDimList operands) { assert(map.getNumResults() == 1 && "expected affine map with one result"); int64_t pos = insert(/*isSymbol=*/false); - if (posOut) - *posOut = pos; // Add map and operands to the constraint set. Dimensions are converted to - // symbols. All operands are added to the worklist. + // symbols. All operands are added to the worklist (unless they were already + // processed). auto mapper = [&](std::pair> v) { return getExpr(v.first, v.second); }; @@ -566,6 +600,55 @@ ValueBoundsConstraintSet::computeConstantDelta(Value value1, Value value2, {{value1, dim1}, {value2, dim2}}); } +bool ValueBoundsConstraintSet::compare(Value lhs, std::optional lhsDim, + ComparisonOperator cmp, Value rhs, + std::optional rhsDim) { + // This function returns "true" if "lhs CMP rhs" is proven to hold. + // + // Example for ComparisonOperator::LE and index-typed values: We would like to + // prove that lhs <= rhs. Proof by contradiction: add the inverse + // relation (lhs > rhs) to the constraint set and check if the resulting + // constraint set is "empty" (i.e. has no solution). In that case, + // lhs > rhs must be incorrect and we can deduce that lhs <= rhs holds. + + // We cannot prove anything if the constraint set is already empty. + if (cstr.isEmpty()) { + LLVM_DEBUG( + llvm::dbgs() + << "cannot compare value/dims: constraint system is already empty"); + return false; + } + + // EQ can be expressed as LE and GE. + if (cmp == EQ) + return compare(lhs, lhsDim, ComparisonOperator::LE, rhs, rhsDim) && + compare(lhs, lhsDim, ComparisonOperator::GE, rhs, rhsDim); + + // Construct inequality. For the above example: lhs > rhs. + // `IntegerRelation` inequalities are expressed in the "flattened" form and + // with ">= 0". I.e., lhs - rhs - 1 >= 0. + SmallVector eq(cstr.getNumDimAndSymbolVars() + 1, 0); + if (cmp == LT || cmp == LE) { + ++eq[getPos(lhs, lhsDim)]; + --eq[getPos(rhs, rhsDim)]; + } else if (cmp == GT || cmp == GE) { + --eq[getPos(lhs, lhsDim)]; + ++eq[getPos(rhs, rhsDim)]; + } else { + llvm_unreachable("unsupported comparison operator"); + } + if (cmp == LE || cmp == GE) + eq[cstr.getNumDimAndSymbolVars()] -= 1; + + // Add inequality to the constraint set and check if it made the constraint + // set empty. + int64_t ineqPos = cstr.getNumInequalities(); + cstr.addInequality(eq); + bool isEmpty = cstr.isEmpty(); + cstr.removeInequality(ineqPos); + return isEmpty; +} + FailureOr ValueBoundsConstraintSet::areEqual(Value value1, Value value2, std::optional dim1, diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.cpp b/mlir/lib/Target/LLVMIR/DebugImporter.cpp index 779ad26fc847e..4a4e1d1ecdd86 100644 --- a/mlir/lib/Target/LLVMIR/DebugImporter.cpp +++ b/mlir/lib/Target/LLVMIR/DebugImporter.cpp @@ -13,6 +13,7 @@ #include "mlir/IR/Location.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/Constants.h" @@ -25,6 +26,10 @@ using namespace mlir; using namespace mlir::LLVM; using namespace mlir::LLVM::detail; +DebugImporter::DebugImporter(ModuleOp mlirModule) + : recursionPruner(mlirModule.getContext()), + context(mlirModule.getContext()), mlirModule(mlirModule) {} + Location DebugImporter::translateFuncLocation(llvm::Function *func) { llvm::DISubprogram *subprogram = func->getSubprogram(); if (!subprogram) @@ -51,10 +56,14 @@ DIBasicTypeAttr DebugImporter::translateImpl(llvm::DIBasicType *node) { DICompileUnitAttr DebugImporter::translateImpl(llvm::DICompileUnit *node) { std::optional emissionKind = symbolizeDIEmissionKind(node->getEmissionKind()); + std::optional nameTableKind = symbolizeDINameTableKind( + static_cast< + std::underlying_type_t>( + node->getNameTableKind())); return DICompileUnitAttr::get( context, getOrCreateDistinctID(node), node->getSourceLanguage(), translate(node->getFile()), getStringAttrOrNull(node->getRawProducer()), - node->isOptimized(), emissionKind.value()); + node->isOptimized(), emissionKind.value(), nameTableKind.value()); } DICompositeTypeAttr DebugImporter::translateImpl(llvm::DICompositeType *node) { @@ -246,42 +255,13 @@ DINodeAttr DebugImporter::translate(llvm::DINode *node) { if (DINodeAttr attr = nodeToAttr.lookup(node)) return attr; - // If the node type is capable of being recursive, check if it's seen before. - auto recSelfCtor = getRecSelfConstructor(node); - if (recSelfCtor) { - // If a cyclic dependency is detected since the same node is being traversed - // twice, emit a recursive self type, and mark the duplicate node on the - // translationStack so it can emit a recursive decl type. - auto [iter, inserted] = translationStack.try_emplace(node, nullptr); - if (!inserted) { - // The original node may have already been assigned a recursive ID from - // a different self-reference. Use that if possible. - DistinctAttr recId = iter->second; - if (!recId) { - recId = DistinctAttr::create(UnitAttr::get(context)); - iter->second = recId; - } - unboundRecursiveSelfRefs.back().insert(recId); - return cast(recSelfCtor(recId)); - } - } - - unboundRecursiveSelfRefs.emplace_back(); - - auto guard = llvm::make_scope_exit([&]() { - if (recSelfCtor) - translationStack.pop_back(); + // Register with the recursive translator. If it can be handled without + // recursing into it, return the result immediately. + if (DINodeAttr attr = recursionPruner.pruneOrPushTranslationStack(node)) + return attr; - // Copy unboundRecursiveSelfRefs down to the previous level. - if (unboundRecursiveSelfRefs.size() == 1) - assert(unboundRecursiveSelfRefs.back().empty() && - "internal error: unbound recursive self reference at top level."); - else - unboundRecursiveSelfRefs[unboundRecursiveSelfRefs.size() - 2].insert( - unboundRecursiveSelfRefs.back().begin(), - unboundRecursiveSelfRefs.back().end()); - unboundRecursiveSelfRefs.pop_back(); - }); + auto guard = llvm::make_scope_exit( + [&]() { recursionPruner.popTranslationStack(node); }); // Convert the debug metadata if possible. auto translateNode = [this](llvm::DINode *node) -> DINodeAttr { @@ -318,22 +298,130 @@ DINodeAttr DebugImporter::translate(llvm::DINode *node) { return nullptr; }; if (DINodeAttr attr = translateNode(node)) { - // If this node was marked as recursive, set its recId. - if (auto recType = dyn_cast(attr)) { - if (DistinctAttr recId = translationStack.lookup(node)) { - attr = cast(recType.withRecId(recId)); - // Remove the unbound recursive ID from the set of unbound self - // references in the translation stack. - unboundRecursiveSelfRefs.back().erase(recId); + auto [result, isSelfContained] = + recursionPruner.finalizeTranslation(node, attr); + // Only cache fully self-contained nodes. + if (isSelfContained) + nodeToAttr.try_emplace(node, result); + return result; + } + return nullptr; +} + +//===----------------------------------------------------------------------===// +// RecursionPruner +//===----------------------------------------------------------------------===// + +/// Get the `getRecSelf` constructor for the translated type of `node` if its +/// translated DITypeAttr supports recursion. Otherwise, returns nullptr. +static function_ref +getRecSelfConstructor(llvm::DINode *node) { + using CtorType = function_ref; + return TypeSwitch(node) + .Case([&](llvm::DICompositeType *) { + return CtorType(DICompositeTypeAttr::getRecSelf); + }) + .Default(CtorType()); +} + +DINodeAttr DebugImporter::RecursionPruner::pruneOrPushTranslationStack( + llvm::DINode *node) { + // If the node type is capable of being recursive, check if it's seen + // before. + auto recSelfCtor = getRecSelfConstructor(node); + if (recSelfCtor) { + // If a cyclic dependency is detected since the same node is being + // traversed twice, emit a recursive self type, and mark the duplicate + // node on the translationStack so it can emit a recursive decl type. + auto [iter, inserted] = translationStack.try_emplace(node); + if (!inserted) { + // The original node may have already been assigned a recursive ID from + // a different self-reference. Use that if possible. + DIRecursiveTypeAttrInterface recSelf = iter->second.recSelf; + if (!recSelf) { + DistinctAttr recId = nodeToRecId.lookup(node); + if (!recId) { + recId = DistinctAttr::create(UnitAttr::get(context)); + nodeToRecId[node] = recId; + } + recSelf = recSelfCtor(recId); + iter->second.recSelf = recSelf; } + // Inject the self-ref into the previous layer. + translationStack.back().second.unboundSelfRefs.insert(recSelf); + return cast(recSelf); } + } - // Only cache fully self-contained nodes. - if (unboundRecursiveSelfRefs.back().empty()) - nodeToAttr.try_emplace(node, attr); - return attr; + return lookup(node); +} + +std::pair +DebugImporter::RecursionPruner::finalizeTranslation(llvm::DINode *node, + DINodeAttr result) { + // If `node` is not a potentially recursive type, it will not be on the + // translation stack. Nothing to set in this case. + if (translationStack.empty()) + return {result, true}; + if (translationStack.back().first != node) + return {result, translationStack.back().second.unboundSelfRefs.empty()}; + + TranslationState &state = translationStack.back().second; + + // If this node is actually recursive, set the recId onto `result`. + if (DIRecursiveTypeAttrInterface recSelf = state.recSelf) { + auto recType = cast(result); + result = cast(recType.withRecId(recSelf.getRecId())); + // Remove this recSelf from the set of unbound selfRefs. + state.unboundSelfRefs.erase(recSelf); } - return nullptr; + + // Insert the result into our internal cache if it's not self-contained. + if (!state.unboundSelfRefs.empty()) { + [[maybe_unused]] auto [_, inserted] = dependentCache.try_emplace( + node, DependentTranslation{result, state.unboundSelfRefs}); + assert(inserted && "invalid state: caching the same DINode twice"); + return {result, false}; + } + return {result, true}; +} + +void DebugImporter::RecursionPruner::popTranslationStack(llvm::DINode *node) { + // If `node` is not a potentially recursive type, it will not be on the + // translation stack. Nothing to handle in this case. + if (translationStack.empty() || translationStack.back().first != node) + return; + + // At the end of the stack, all unbound self-refs must be resolved already, + // and the entire cache should be accounted for. + TranslationState &currLayerState = translationStack.back().second; + if (translationStack.size() == 1) { + assert(currLayerState.unboundSelfRefs.empty() && + "internal error: unbound recursive self reference at top level."); + translationStack.pop_back(); + return; + } + + // Copy unboundSelfRefs down to the previous level. + TranslationState &nextLayerState = (++translationStack.rbegin())->second; + nextLayerState.unboundSelfRefs.insert(currLayerState.unboundSelfRefs.begin(), + currLayerState.unboundSelfRefs.end()); + translationStack.pop_back(); +} + +DINodeAttr DebugImporter::RecursionPruner::lookup(llvm::DINode *node) { + auto cacheIter = dependentCache.find(node); + if (cacheIter == dependentCache.end()) + return {}; + + DependentTranslation &entry = cacheIter->second; + if (llvm::set_is_subset(entry.unboundSelfRefs, + translationStack.back().second.unboundSelfRefs)) + return entry.attr; + + // Stale cache entry. + dependentCache.erase(cacheIter); + return {}; } //===----------------------------------------------------------------------===// @@ -394,13 +482,3 @@ DistinctAttr DebugImporter::getOrCreateDistinctID(llvm::DINode *node) { id = DistinctAttr::create(UnitAttr::get(context)); return id; } - -function_ref -DebugImporter::getRecSelfConstructor(llvm::DINode *node) { - using CtorType = function_ref; - return TypeSwitch(node) - .Case([&](llvm::DICompositeType *concreteNode) { - return CtorType(DICompositeTypeAttr::getRecSelf); - }) - .Default(CtorType()); -} diff --git a/mlir/lib/Target/LLVMIR/DebugImporter.h b/mlir/lib/Target/LLVMIR/DebugImporter.h index bcf628fc4234f..8b22dc6345677 100644 --- a/mlir/lib/Target/LLVMIR/DebugImporter.h +++ b/mlir/lib/Target/LLVMIR/DebugImporter.h @@ -29,8 +29,7 @@ namespace detail { class DebugImporter { public: - DebugImporter(ModuleOp mlirModule) - : context(mlirModule.getContext()), mlirModule(mlirModule) {} + DebugImporter(ModuleOp mlirModule); /// Translates the given LLVM debug location to an MLIR location. Location translateLoc(llvm::DILocation *loc); @@ -86,24 +85,102 @@ class DebugImporter { /// for it, or create a new one if not. DistinctAttr getOrCreateDistinctID(llvm::DINode *node); - /// Get the `getRecSelf` constructor for the translated type of `node` if its - /// translated DITypeAttr supports recursion. Otherwise, returns nullptr. - function_ref - getRecSelfConstructor(llvm::DINode *node); - /// A mapping between LLVM debug metadata and the corresponding attribute. DenseMap nodeToAttr; /// A mapping between distinct LLVM debug metadata nodes and the corresponding /// distinct id attribute. DenseMap nodeToDistinctAttr; - /// A stack that stores the metadata nodes that are being traversed. The stack - /// is used to detect cyclic dependencies during the metadata translation. - /// A node is pushed with a null value. If it is ever seen twice, it is given - /// a recursive id attribute, indicating that it is a recursive node. - llvm::MapVector translationStack; - /// All the unbound recursive self references in the translation stack. - SmallVector> unboundRecursiveSelfRefs; + /// Translation helper for recursive DINodes. + /// Works alongside a stack-based DINode translator (the "main translator") + /// for gracefully handling DINodes that are recursive. + /// + /// Usage: + /// - Before translating a node, call `pruneOrPushTranslationStack` to see if + /// the pruner can preempt this translation. If this is a node that the + /// pruner already knows how to handle, it will return the translated + /// DINodeAttr. + /// - After a node is successfully translated by the main translator, call + /// `finalizeTranslation` to save the translated result with the pruner, and + /// give it a chance to further modify the result. + /// - Regardless of success or failure by the main translator, always call + /// `popTranslationStack` at the end of translating a node. This is + /// necessary to keep the internal book-keeping in sync. + /// + /// This helper maintains an internal cache so that no recursive type will + /// be translated more than once by the main translator. + /// This internal cache is different from the cache maintained by the main + /// translator because it may store nodes that are not self-contained (i.e. + /// contain unbounded recursive self-references). + class RecursionPruner { + public: + RecursionPruner(MLIRContext *context) : context(context) {} + + /// If this node is a recursive instance that was previously seen, returns a + /// self-reference. If this node was previously cached, returns the cached + /// result. Otherwise, returns null attr, and a translation stack frame is + /// created for this node. Expects `finalizeTranslation` & + /// `popTranslationStack` to be called on this node later. + DINodeAttr pruneOrPushTranslationStack(llvm::DINode *node); + + /// Register the translated result of `node`. Returns the finalized result + /// (with recId if recursive) and whether the result is self-contained + /// (i.e. contains no unbound self-refs). + std::pair finalizeTranslation(llvm::DINode *node, + DINodeAttr result); + + /// Pop off a frame from the translation stack after a node is done being + /// translated. + void popTranslationStack(llvm::DINode *node); + + private: + /// Returns the cached result (if exists) or null. + /// The cache entry will be removed if not all of its dependent self-refs + /// exists. + DINodeAttr lookup(llvm::DINode *node); + + MLIRContext *context; + + /// A cached translation that contains the translated attribute as well + /// as any unbound self-references that it depends on. + struct DependentTranslation { + /// The translated attr. May contain unbound self-references for other + /// recursive attrs. + DINodeAttr attr; + /// The set of unbound self-refs that this cached entry refers to. All + /// these self-refs must exist for the cached entry to be valid. + DenseSet unboundSelfRefs; + }; + /// A mapping between LLVM debug metadata and the corresponding attribute. + /// Only contains those with unboundSelfRefs. Fully self-contained attrs + /// will be cached by the outer main translator. + DenseMap dependentCache; + + /// Each potentially recursive node will have a TranslationState pushed onto + /// the `translationStack` to keep track of whether this node is actually + /// recursive (i.e. has self-references inside), and other book-keeping. + struct TranslationState { + /// The rec-self if this node is indeed a recursive node (i.e. another + /// instance of itself is seen while translating it). Null if this node + /// has not been seen again deeper in the translation stack. + DIRecursiveTypeAttrInterface recSelf; + /// All the unbound recursive self references in this layer of the + /// translation stack. + DenseSet unboundSelfRefs; + }; + /// A stack that stores the metadata nodes that are being traversed. The + /// stack is used to handle cyclic dependencies during metadata translation. + /// Each node is pushed with an empty TranslationState. If it is ever seen + /// later when the stack is deeper, the node is recursive, and its + /// TranslationState is assigned a recSelf. + llvm::MapVector translationStack; + + /// A mapping between DINodes that are recursive, and their assigned recId. + /// This is kept so that repeated occurrences of the same node can reuse the + /// same ID and be deduplicated. + DenseMap nodeToRecId; + }; + RecursionPruner recursionPruner; MLIRContext *context; ModuleOp mlirModule; diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index 642359a23756a..46e2e7f2ba5dc 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -104,7 +104,10 @@ llvm::DICompileUnit *DebugTranslation::translateImpl(DICompileUnitAttr attr) { attr.getIsOptimized(), /*Flags=*/"", /*RV=*/0, /*SplitName=*/{}, static_cast( - attr.getEmissionKind())); + attr.getEmissionKind()), + 0, true, false, + static_cast( + attr.getNameTableKind())); } /// Returns a new `DINodeT` that is either distinct or not, depending on @@ -216,18 +219,15 @@ DebugTranslation::translateImpl(DIGlobalVariableAttr attr) { llvm::DIType * DebugTranslation::translateRecursive(DIRecursiveTypeAttrInterface attr) { DistinctAttr recursiveId = attr.getRecId(); - if (attr.isRecSelf()) { - auto *iter = recursiveTypeMap.find(recursiveId); - assert(iter != recursiveTypeMap.end() && "unbound DI recursive self type"); + if (auto *iter = recursiveTypeMap.find(recursiveId); + iter != recursiveTypeMap.end()) { return iter->second; + } else { + assert(!attr.isRecSelf() && "unbound DI recursive self type"); } auto setRecursivePlaceholder = [&](llvm::DIType *placeholder) { - [[maybe_unused]] auto [iter, inserted] = - recursiveTypeMap.try_emplace(recursiveId, placeholder); - (void)iter; - (void)inserted; - assert(inserted && "illegal reuse of recursive id"); + recursiveTypeMap.try_emplace(recursiveId, placeholder); }; llvm::DIType *result = diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 08ec57803aff8..a59677c02fc39 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3116,6 +3116,174 @@ convertDeclareTargetAttr(Operation *op, mlir::omp::DeclareTargetAttr attribute, return success(); } +// Returns true if the operation is inside a TargetOp or +// is part of a declare target function. +static bool isTargetDeviceOp(Operation *op) { + // Assumes no reverse offloading + if (op->getParentOfType()) + return true; + + if (auto parentFn = op->getParentOfType()) + if (auto declareTargetIface = + llvm::dyn_cast( + parentFn.getOperation())) + if (declareTargetIface.isDeclareTarget() && + declareTargetIface.getDeclareTargetDeviceType() != + mlir::omp::DeclareTargetDeviceType::host) + return true; + + return false; +} + +/// Given an OpenMP MLIR operation, create the corresponding LLVM IR +/// (including OpenMP runtime calls). +static LogicalResult +convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + return llvm::TypeSwitch(op) + .Case([&](omp::BarrierOp) { + ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); + return success(); + }) + .Case([&](omp::TaskwaitOp) { + ompBuilder->createTaskwait(builder.saveIP()); + return success(); + }) + .Case([&](omp::TaskyieldOp) { + ompBuilder->createTaskyield(builder.saveIP()); + return success(); + }) + .Case([&](omp::FlushOp) { + // No support in Openmp runtime function (__kmpc_flush) to accept + // the argument list. + // OpenMP standard states the following: + // "An implementation may implement a flush with a list by ignoring + // the list, and treating it the same as a flush without a list." + // + // The argument list is discarded so that, flush with a list is treated + // same as a flush without a list. + ompBuilder->createFlush(builder.saveIP()); + return success(); + }) + .Case([&](omp::ParallelOp op) { + return convertOmpParallel(op, builder, moduleTranslation); + }) + .Case([&](omp::ReductionOp reductionOp) { + return convertOmpReductionOp(reductionOp, builder, moduleTranslation); + }) + .Case([&](omp::MasterOp) { + return convertOmpMaster(*op, builder, moduleTranslation); + }) + .Case([&](omp::CriticalOp) { + return convertOmpCritical(*op, builder, moduleTranslation); + }) + .Case([&](omp::OrderedRegionOp) { + return convertOmpOrderedRegion(*op, builder, moduleTranslation); + }) + .Case([&](omp::OrderedOp) { + return convertOmpOrdered(*op, builder, moduleTranslation); + }) + .Case([&](omp::WsloopOp) { + return convertOmpWsloop(*op, builder, moduleTranslation); + }) + .Case([&](omp::SimdLoopOp) { + return convertOmpSimdLoop(*op, builder, moduleTranslation); + }) + .Case([&](omp::AtomicReadOp) { + return convertOmpAtomicRead(*op, builder, moduleTranslation); + }) + .Case([&](omp::AtomicWriteOp) { + return convertOmpAtomicWrite(*op, builder, moduleTranslation); + }) + .Case([&](omp::AtomicUpdateOp op) { + return convertOmpAtomicUpdate(op, builder, moduleTranslation); + }) + .Case([&](omp::AtomicCaptureOp op) { + return convertOmpAtomicCapture(op, builder, moduleTranslation); + }) + .Case([&](omp::SectionsOp) { + return convertOmpSections(*op, builder, moduleTranslation); + }) + .Case([&](omp::SingleOp op) { + return convertOmpSingle(op, builder, moduleTranslation); + }) + .Case([&](omp::TeamsOp op) { + return convertOmpTeams(op, builder, moduleTranslation); + }) + .Case([&](omp::TaskOp op) { + return convertOmpTaskOp(op, builder, moduleTranslation); + }) + .Case([&](omp::TaskgroupOp op) { + return convertOmpTaskgroupOp(op, builder, moduleTranslation); + }) + .Case([](auto op) { + // `yield` and `terminator` can be just omitted. The block structure + // was created in the region that handles their parent operation. + // `declare_reduction` will be used by reductions and is not + // converted directly, skip it. + // `critical.declare` is only used to declare names of critical + // sections which will be used by `critical` ops and hence can be + // ignored for lowering. The OpenMP IRBuilder will create unique + // name for critical section names. + return success(); + }) + .Case([&](omp::ThreadprivateOp) { + return convertOmpThreadprivate(*op, builder, moduleTranslation); + }) + .Case([&](auto op) { + return convertOmpTargetData(op, builder, moduleTranslation); + }) + .Case([&](omp::TargetOp) { + return convertOmpTarget(*op, builder, moduleTranslation); + }) + .Case( + [&](auto op) { + // No-op, should be handled by relevant owning operations e.g. + // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc. + // and then discarded + return success(); + }) + .Default([&](Operation *inst) { + return inst->emitError("unsupported OpenMP operation: ") + << inst->getName(); + }); +} + +static LogicalResult +convertTargetDeviceOp(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + return convertHostOrTargetOperation(op, builder, moduleTranslation); +} + +static LogicalResult +convertTargetOpsInNest(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + if (isa(op)) + return convertOmpTarget(*op, builder, moduleTranslation); + if (isa(op)) + return convertOmpTargetData(op, builder, moduleTranslation); + bool interrupted = + op->walk([&](Operation *oper) { + if (isa(oper)) { + if (failed(convertOmpTarget(*oper, builder, moduleTranslation))) + return WalkResult::interrupt(); + return WalkResult::skip(); + } + if (isa(oper)) { + if (failed(convertOmpTargetData(oper, builder, moduleTranslation))) + return WalkResult::interrupt(); + return WalkResult::skip(); + } + return WalkResult::advance(); + }).wasInterrupted(); + return failure(interrupted); +} + namespace { /// Implementation of the dialect interface that converts operations belonging @@ -3131,8 +3299,8 @@ class OpenMPDialectLLVMIRTranslationInterface convertOperation(Operation *op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) const final; - /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, runtime - /// calls, or operation amendments + /// Given an OpenMP MLIR attribute, create the corresponding LLVM-IR, + /// runtime calls, or operation amendments LogicalResult amendOperation(Operation *op, ArrayRef instructions, NamedAttribute attribute, @@ -3237,116 +3405,15 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( LLVM::ModuleTranslation &moduleTranslation) const { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + if (ompBuilder->Config.isTargetDevice()) { + if (isTargetDeviceOp(op)) { + return convertTargetDeviceOp(op, builder, moduleTranslation); + } else { + return convertTargetOpsInNest(op, builder, moduleTranslation); + } + } - return llvm::TypeSwitch(op) - .Case([&](omp::BarrierOp) { - ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); - return success(); - }) - .Case([&](omp::TaskwaitOp) { - ompBuilder->createTaskwait(builder.saveIP()); - return success(); - }) - .Case([&](omp::TaskyieldOp) { - ompBuilder->createTaskyield(builder.saveIP()); - return success(); - }) - .Case([&](omp::FlushOp) { - // No support in Openmp runtime function (__kmpc_flush) to accept - // the argument list. - // OpenMP standard states the following: - // "An implementation may implement a flush with a list by ignoring - // the list, and treating it the same as a flush without a list." - // - // The argument list is discarded so that, flush with a list is treated - // same as a flush without a list. - ompBuilder->createFlush(builder.saveIP()); - return success(); - }) - .Case([&](omp::ParallelOp op) { - return convertOmpParallel(op, builder, moduleTranslation); - }) - .Case([&](omp::ReductionOp reductionOp) { - return convertOmpReductionOp(reductionOp, builder, moduleTranslation); - }) - .Case([&](omp::MasterOp) { - return convertOmpMaster(*op, builder, moduleTranslation); - }) - .Case([&](omp::CriticalOp) { - return convertOmpCritical(*op, builder, moduleTranslation); - }) - .Case([&](omp::OrderedRegionOp) { - return convertOmpOrderedRegion(*op, builder, moduleTranslation); - }) - .Case([&](omp::OrderedOp) { - return convertOmpOrdered(*op, builder, moduleTranslation); - }) - .Case([&](omp::WsloopOp) { - return convertOmpWsloop(*op, builder, moduleTranslation); - }) - .Case([&](omp::SimdLoopOp) { - return convertOmpSimdLoop(*op, builder, moduleTranslation); - }) - .Case([&](omp::AtomicReadOp) { - return convertOmpAtomicRead(*op, builder, moduleTranslation); - }) - .Case([&](omp::AtomicWriteOp) { - return convertOmpAtomicWrite(*op, builder, moduleTranslation); - }) - .Case([&](omp::AtomicUpdateOp op) { - return convertOmpAtomicUpdate(op, builder, moduleTranslation); - }) - .Case([&](omp::AtomicCaptureOp op) { - return convertOmpAtomicCapture(op, builder, moduleTranslation); - }) - .Case([&](omp::SectionsOp) { - return convertOmpSections(*op, builder, moduleTranslation); - }) - .Case([&](omp::SingleOp op) { - return convertOmpSingle(op, builder, moduleTranslation); - }) - .Case([&](omp::TeamsOp op) { - return convertOmpTeams(op, builder, moduleTranslation); - }) - .Case([&](omp::TaskOp op) { - return convertOmpTaskOp(op, builder, moduleTranslation); - }) - .Case([&](omp::TaskgroupOp op) { - return convertOmpTaskgroupOp(op, builder, moduleTranslation); - }) - .Case([](auto op) { - // `yield` and `terminator` can be just omitted. The block structure - // was created in the region that handles their parent operation. - // `declare_reduction` will be used by reductions and is not - // converted directly, skip it. - // `critical.declare` is only used to declare names of critical - // sections which will be used by `critical` ops and hence can be - // ignored for lowering. The OpenMP IRBuilder will create unique - // name for critical section names. - return success(); - }) - .Case([&](omp::ThreadprivateOp) { - return convertOmpThreadprivate(*op, builder, moduleTranslation); - }) - .Case([&](auto op) { - return convertOmpTargetData(op, builder, moduleTranslation); - }) - .Case([&](omp::TargetOp) { - return convertOmpTarget(*op, builder, moduleTranslation); - }) - .Case( - [&](auto op) { - // No-op, should be handled by relevant owning operations e.g. - // TargetOp, TargetEnterDataOp, TargetExitDataOp, TargetDataOp etc. - // and then discarded - return success(); - }) - .Default([&](Operation *inst) { - return inst->emitError("unsupported OpenMP operation: ") - << inst->getName(); - }); + return convertHostOrTargetOperation(op, builder, moduleTranslation); } void mlir::registerOpenMPDialectTranslation(DialectRegistry ®istry) { diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index 6e70d52fa760b..af998b99d511f 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -123,12 +123,18 @@ static SmallVector getPositionFromIndices(ArrayRef indices) { /// access to the private module import methods. static LogicalResult convertInstructionImpl(OpBuilder &odsBuilder, llvm::Instruction *inst, - ModuleImport &moduleImport) { + ModuleImport &moduleImport, + LLVMImportInterface &iface) { // Copy the operands to an LLVM operands array reference for conversion. SmallVector operands(inst->operands()); ArrayRef llvmOperands(operands); // Convert all instructions that provide an MLIR builder. + if (iface.isConvertibleInstruction(inst->getOpcode())) + return iface.convertInstruction(odsBuilder, inst, llvmOperands, + moduleImport); + // TODO: Implement the `convertInstruction` hooks in the + // `LLVMDialectLLVMIRImportInterface` and move the following include there. #include "mlir/Dialect/LLVMIR/LLVMOpFromLLVMIRConversions.inc" return failure(); } @@ -1596,7 +1602,7 @@ LogicalResult ModuleImport::convertInstruction(llvm::Instruction *inst) { } // Convert all instructions that have an mlirBuilder. - if (succeeded(convertInstructionImpl(builder, inst, *this))) + if (succeeded(convertInstructionImpl(builder, inst, *this, iface))) return success(); return emitError(loc) << "unhandled instruction: " << diag(*inst); diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp index 80e3b79016329..abe565ea862f8 100644 --- a/mlir/lib/Transforms/Mem2Reg.cpp +++ b/mlir/lib/Transforms/Mem2Reg.cpp @@ -202,6 +202,7 @@ class MemorySlotPromoter { /// Contains the reaching definition at this operation. Reaching definitions /// are only computed for promotable memory operations with blocking uses. DenseMap reachingDefs; + DenseMap replacedValuesMap; DominanceInfo &dominance; MemorySlotPromotionInfo info; const Mem2RegStatistics &statistics; @@ -438,6 +439,7 @@ Value MemorySlotPromoter::computeReachingDefInBlock(Block *block, assert(stored && "a memory operation storing to a slot must provide a " "new definition of the slot"); reachingDef = stored; + replacedValuesMap[memOp] = stored; } } } @@ -552,6 +554,10 @@ void MemorySlotPromoter::removeBlockingUses() { dominanceSort(usersToRemoveUses, *slot.ptr.getParentBlock()->getParent()); llvm::SmallVector toErase; + // List of all replaced values in the slot. + llvm::SmallVector> replacedValuesList; + // Ops to visit with the `visitReplacedValues` method. + llvm::SmallVector toVisit; for (Operation *toPromote : llvm::reverse(usersToRemoveUses)) { if (auto toPromoteMemOp = dyn_cast(toPromote)) { Value reachingDef = reachingDefs.lookup(toPromoteMemOp); @@ -565,7 +571,9 @@ void MemorySlotPromoter::removeBlockingUses() { slot, info.userToBlockingUses[toPromote], rewriter, reachingDef) == DeletionKind::Delete) toErase.push_back(toPromote); - + if (toPromoteMemOp.storesTo(slot)) + if (Value replacedValue = replacedValuesMap[toPromoteMemOp]) + replacedValuesList.push_back({toPromoteMemOp, replacedValue}); continue; } @@ -574,6 +582,12 @@ void MemorySlotPromoter::removeBlockingUses() { if (toPromoteBasic.removeBlockingUses(info.userToBlockingUses[toPromote], rewriter) == DeletionKind::Delete) toErase.push_back(toPromote); + if (toPromoteBasic.requiresReplacedValues()) + toVisit.push_back(toPromoteBasic); + } + for (PromotableOpInterface op : toVisit) { + rewriter.setInsertionPointAfter(op); + op.visitReplacedValues(replacedValuesList, rewriter); } for (Operation *toEraseOp : toErase) diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp index bbecbdb856693..cfd4f9c03aaff 100644 --- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp +++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp @@ -243,9 +243,8 @@ bool Worklist::empty() const { void Worklist::push(Operation *op) { assert(op && "cannot push nullptr to worklist"); // Check to see if the worklist already contains this op. - if (map.count(op)) + if (!map.insert({op, list.size()}).second) return; - map[op] = list.size(); list.push_back(op); } diff --git a/mlir/test/CAPI/llvm.c b/mlir/test/CAPI/llvm.c index 9c3c7da46c4cd..25f900e521cf9 100644 --- a/mlir/test/CAPI/llvm.c +++ b/mlir/test/CAPI/llvm.c @@ -264,9 +264,9 @@ static void testDebugInfoAttributes(MlirContext ctx) { // CHECK: #llvm.di_file<"foo" in "bar"> mlirAttributeDump(file); - MlirAttribute compile_unit = - mlirLLVMDICompileUnitAttrGet(ctx, id, LLVMDWARFSourceLanguageC99, file, - foo, false, MlirLLVMDIEmissionKindFull); + MlirAttribute compile_unit = mlirLLVMDICompileUnitAttrGet( + ctx, id, LLVMDWARFSourceLanguageC99, file, foo, false, + MlirLLVMDIEmissionKindFull, MlirLLVMDINameTableKindDefault); // CHECK: #llvm.di_compile_unit<{{.*}}> mlirAttributeDump(compile_unit); diff --git a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir index bac94aae6b746..f5d9499eadda4 100644 --- a/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir +++ b/mlir/test/Conversion/ComplexToStandard/convert-to-standard.mlir @@ -1880,3 +1880,187 @@ func.func @complex_sin_with_fmf(%arg: complex) -> complex { // CHECK-DAG: %[[RESULT_IMAG:.*]] = arith.mulf %[[EXP_DIFF]], %[[COS]] fastmath // CHECK-DAG: %[[RESULT:.*]] = complex.create %[[RESULT_REAL]], %[[RESULT_IMAG]] : complex // CHECK: return %[[RESULT]] + +// ----- + +// CHECK-LABEL: func @complex_sign_with_fmf +// CHECK-SAME: %[[ARG:.*]]: complex +func.func @complex_sign_with_fmf(%arg: complex) -> complex { + %sign = complex.sign %arg fastmath : complex + return %sign : complex +} + +// CHECK: %[[REAL:.*]] = complex.re %[[ARG]] : complex +// CHECK: %[[IMAG:.*]] = complex.im %[[ARG]] : complex +// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[REAL_IS_ZERO:.*]] = arith.cmpf oeq, %[[REAL]], %[[ZERO]] : f32 +// CHECK: %[[IMAG_IS_ZERO:.*]] = arith.cmpf oeq, %[[IMAG]], %[[ZERO]] : f32 +// CHECK: %[[IS_ZERO:.*]] = arith.andi %[[REAL_IS_ZERO]], %[[IMAG_IS_ZERO]] : i1 +// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[REAL2:.*]] = complex.re %[[ARG]] : complex +// CHECK: %[[IMAG2:.*]] = complex.im %[[ARG]] : complex +// CHECK: %[[IS_REAL_ZERO:.*]] = arith.cmpf oeq, %[[REAL2]], %[[ZERO]] : f32 +// CHECK: %[[IS_IMAG_ZERO:.*]] = arith.cmpf oeq, %[[IMAG2]], %[[ZERO]] : f32 +// CHECK: %[[IMAG_DIV_REAL:.*]] = arith.divf %[[IMAG2]], %[[REAL2]] fastmath : f32 +// CHECK: %[[IMAG_SQ:.*]] = arith.mulf %[[IMAG_DIV_REAL]], %[[IMAG_DIV_REAL]] fastmath : f32 +// CHECK: %[[IMAG_SQ_PLUS_ONE:.*]] = arith.addf %[[IMAG_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[IMAG_SQRT:.*]] = math.sqrt %[[IMAG_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[REAL_ABS:.*]] = math.absf %[[REAL2]] fastmath : f32 +// CHECK: %[[ABS_IMAG:.*]] = arith.mulf %[[IMAG_SQRT]], %[[REAL_ABS]] fastmath : f32 +// CHECK: %[[REAL_DIV_IMAG:.*]] = arith.divf %[[REAL2]], %[[IMAG2]] fastmath : f32 +// CHECK: %[[REAL_SQ:.*]] = arith.mulf %[[REAL_DIV_IMAG]], %[[REAL_DIV_IMAG]] fastmath : f32 +// CHECK: %[[REAL_SQ_PLUS_ONE:.*]] = arith.addf %[[REAL_SQ]], %[[ONE]] fastmath : f32 +// CHECK: %[[REAL_SQRT:.*]] = math.sqrt %[[REAL_SQ_PLUS_ONE]] fastmath : f32 +// CHECK: %[[IMAG_ABS:.*]] = math.absf %[[IMAG2]] fastmath : f32 +// CHECK: %[[ABS_REAL:.*]] = arith.mulf %[[REAL_SQRT]], %[[IMAG_ABS]] fastmath : f32 +// CHECK: %[[REAL_GT_IMAG:.*]] = arith.cmpf ogt, %[[REAL2]], %[[IMAG2]] : f32 +// CHECK: %[[ABS1:.*]] = arith.select %[[REAL_GT_IMAG]], %[[ABS_IMAG]], %[[ABS_REAL]] : f32 +// CHECK: %[[ABS2:.*]] = arith.select %[[IS_IMAG_ZERO]], %[[REAL_ABS]], %[[ABS1]] : f32 +// CHECK: %[[NORM:.*]] = arith.select %[[IS_REAL_ZERO]], %[[IMAG_ABS]], %[[ABS2]] : f32 +// CHECK: %[[REAL_SIGN:.*]] = arith.divf %[[REAL]], %[[NORM]] fastmath : f32 +// CHECK: %[[IMAG_SIGN:.*]] = arith.divf %[[IMAG]], %[[NORM]] fastmath : f32 +// CHECK: %[[SIGN:.*]] = complex.create %[[REAL_SIGN]], %[[IMAG_SIGN]] : complex +// CHECK: %[[RESULT:.*]] = arith.select %[[IS_ZERO]], %[[ARG]], %[[SIGN]] : complex +// CHECK: return %[[RESULT]] : complex + +// ----- + +// CHECK-LABEL: func @complex_tan_with_fmf +// CHECK-SAME: %[[ARG:.*]]: complex +func.func @complex_tan_with_fmf(%arg: complex) -> complex { + %tan = complex.tan %arg fastmath : complex + return %tan : complex +} + +// CHECK-DAG: %[[REAL:.*]] = complex.re %[[ARG]] +// CHECK-DAG: %[[IMAG:.*]] = complex.im %[[ARG]] +// CHECK-DAG: %[[HALF:.*]] = arith.constant 5.000000e-01 : f32 +// CHECK-DAG: %[[EXP:.*]] = math.exp %[[IMAG]] fastmath : f32 +// CHECK-DAG: %[[HALF_EXP:.*]] = arith.mulf %[[HALF]], %[[EXP]] fastmath +// CHECK-DAG: %[[HALF_REXP:.*]] = arith.divf %[[HALF]], %[[EXP]] fastmath +// CHECK-DAG: %[[SIN:.*]] = math.sin %[[REAL]] fastmath : f32 +// CHECK-DAG: %[[COS:.*]] = math.cos %[[REAL]] fastmath : f32 +// CHECK-DAG: %[[EXP_SUM:.*]] = arith.addf %[[HALF_REXP]], %[[HALF_EXP]] fastmath +// CHECK-DAG: %[[COS_REAL:.*]] = arith.mulf %[[EXP_SUM]], %[[COS]] fastmath +// CHECK-DAG: %[[EXP_DIFF:.*]] = arith.subf %[[HALF_REXP]], %[[HALF_EXP]] fastmath +// CHECK-DAG: %[[COS_IMAG:.*]] = arith.mulf %[[EXP_DIFF]], %[[SIN]] fastmath +// CHECK-DAG: %[[COS_COMP:.*]] = complex.create %[[COS_REAL]], %[[COS_IMAG]] : complex + +// CHECK-DAG: %[[REAL:.*]] = complex.re %[[ARG]] +// CHECK-DAG: %[[IMAG:.*]] = complex.im %[[ARG]] +// CHECK-DAG: %[[HALF:.*]] = arith.constant 5.000000e-01 : f32 +// CHECK-DAG: %[[EXP:.*]] = math.exp %[[IMAG]] fastmath : f32 +// CHECK-DAG: %[[HALF_EXP:.*]] = arith.mulf %[[HALF]], %[[EXP]] fastmath +// CHECK-DAG: %[[HALF_REXP:.*]] = arith.divf %[[HALF]], %[[EXP]] fastmath +// CHECK-DAG: %[[SIN:.*]] = math.sin %[[REAL]] fastmath : f32 +// CHECK-DAG: %[[COS:.*]] = math.cos %[[REAL]] fastmath : f32 +// CHECK-DAG: %[[EXP_SUM:.*]] = arith.addf %[[HALF_EXP]], %[[HALF_REXP]] fastmath +// CHECK-DAG: %[[SIN_REAL:.*]] = arith.mulf %[[EXP_SUM]], %[[SIN]] fastmath +// CHECK-DAG: %[[EXP_DIFF:.*]] = arith.subf %[[HALF_EXP]], %[[HALF_REXP]] fastmath +// CHECK-DAG: %[[SIN_IMAG:.*]] = arith.mulf %[[EXP_DIFF]], %[[COS]] fastmath +// CHECK-DAG: %[[SIN_COMP:.*]] = complex.create %[[SIN_REAL]], %[[SIN_IMAG]] : complex + +// CHECK: %[[LHS_REAL:.*]] = complex.re %[[SIN_COMP]] : complex +// CHECK: %[[LHS_IMAG:.*]] = complex.im %[[SIN_COMP]] : complex +// CHECK: %[[RHS_REAL:.*]] = complex.re %[[COS_COMP]] : complex +// CHECK: %[[RHS_IMAG:.*]] = complex.im %[[COS_COMP]] : complex + +// CHECK: %[[RHS_REAL_IMAG_RATIO:.*]] = arith.divf %[[RHS_REAL]], %[[RHS_IMAG]] fastmath : f32 +// CHECK: %[[RHS_REAL_TIMES_RHS_REAL_IMAG_RATIO:.*]] = arith.mulf %[[RHS_REAL_IMAG_RATIO]], %[[RHS_REAL]] fastmath : f32 +// CHECK: %[[RHS_REAL_IMAG_DENOM:.*]] = arith.addf %[[RHS_IMAG]], %[[RHS_REAL_TIMES_RHS_REAL_IMAG_RATIO]] fastmath : f32 +// CHECK: %[[LHS_REAL_TIMES_RHS_REAL_IMAG_RATIO:.*]] = arith.mulf %[[LHS_REAL]], %[[RHS_REAL_IMAG_RATIO]] fastmath : f32 +// CHECK: %[[REAL_NUMERATOR_1:.*]] = arith.addf %[[LHS_REAL_TIMES_RHS_REAL_IMAG_RATIO]], %[[LHS_IMAG]] fastmath : f32 +// CHECK: %[[RESULT_REAL_1:.*]] = arith.divf %[[REAL_NUMERATOR_1]], %[[RHS_REAL_IMAG_DENOM]] fastmath : f32 +// CHECK: %[[LHS_IMAG_TIMES_RHS_REAL_IMAG_RATIO:.*]] = arith.mulf %[[LHS_IMAG]], %[[RHS_REAL_IMAG_RATIO]] fastmath : f32 +// CHECK: %[[IMAG_NUMERATOR_1:.*]] = arith.subf %[[LHS_IMAG_TIMES_RHS_REAL_IMAG_RATIO]], %[[LHS_REAL]] fastmath : f32 +// CHECK: %[[RESULT_IMAG_1:.*]] = arith.divf %[[IMAG_NUMERATOR_1]], %[[RHS_REAL_IMAG_DENOM]] fastmath : f32 + +// CHECK: %[[RHS_IMAG_REAL_RATIO:.*]] = arith.divf %[[RHS_IMAG]], %[[RHS_REAL]] fastmath : f32 +// CHECK: %[[RHS_IMAG_TIMES_RHS_IMAG_REAL_RATIO:.*]] = arith.mulf %[[RHS_IMAG_REAL_RATIO]], %[[RHS_IMAG]] fastmath : f32 +// CHECK: %[[RHS_IMAG_REAL_DENOM:.*]] = arith.addf %[[RHS_REAL]], %[[RHS_IMAG_TIMES_RHS_IMAG_REAL_RATIO]] fastmath : f32 +// CHECK: %[[LHS_IMAG_TIMES_RHS_IMAG_REAL_RATIO:.*]] = arith.mulf %[[LHS_IMAG]], %[[RHS_IMAG_REAL_RATIO]] fastmath : f32 +// CHECK: %[[REAL_NUMERATOR_2:.*]] = arith.addf %[[LHS_REAL]], %[[LHS_IMAG_TIMES_RHS_IMAG_REAL_RATIO]] fastmath : f32 +// CHECK: %[[RESULT_REAL_2:.*]] = arith.divf %[[REAL_NUMERATOR_2]], %[[RHS_IMAG_REAL_DENOM]] fastmath : f32 +// CHECK: %[[LHS_REAL_TIMES_RHS_IMAG_REAL_RATIO:.*]] = arith.mulf %[[LHS_REAL]], %[[RHS_IMAG_REAL_RATIO]] fastmath : f32 +// CHECK: %[[IMAG_NUMERATOR_2:.*]] = arith.subf %[[LHS_IMAG]], %[[LHS_REAL_TIMES_RHS_IMAG_REAL_RATIO]] fastmath : f32 +// CHECK: %[[RESULT_IMAG_2:.*]] = arith.divf %[[IMAG_NUMERATOR_2]], %[[RHS_IMAG_REAL_DENOM]] fastmath : f32 + +// Case 1. Zero denominator, numerator contains at most one NaN value. +// CHECK: %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[RHS_REAL_ABS:.*]] = math.absf %[[RHS_REAL]] fastmath : f32 +// CHECK: %[[RHS_REAL_ABS_IS_ZERO:.*]] = arith.cmpf oeq, %[[RHS_REAL_ABS]], %[[ZERO]] : f32 +// CHECK: %[[RHS_IMAG_ABS:.*]] = math.absf %[[RHS_IMAG]] fastmath : f32 +// CHECK: %[[RHS_IMAG_ABS_IS_ZERO:.*]] = arith.cmpf oeq, %[[RHS_IMAG_ABS]], %[[ZERO]] : f32 +// CHECK: %[[LHS_REAL_IS_NOT_NAN:.*]] = arith.cmpf ord, %[[LHS_REAL]], %[[ZERO]] : f32 +// CHECK: %[[LHS_IMAG_IS_NOT_NAN:.*]] = arith.cmpf ord, %[[LHS_IMAG]], %[[ZERO]] : f32 +// CHECK: %[[LHS_CONTAINS_NOT_NAN_VALUE:.*]] = arith.ori %[[LHS_REAL_IS_NOT_NAN]], %[[LHS_IMAG_IS_NOT_NAN]] : i1 +// CHECK: %[[RHS_IS_ZERO:.*]] = arith.andi %[[RHS_REAL_ABS_IS_ZERO]], %[[RHS_IMAG_ABS_IS_ZERO]] : i1 +// CHECK: %[[RESULT_IS_INFINITY:.*]] = arith.andi %[[LHS_CONTAINS_NOT_NAN_VALUE]], %[[RHS_IS_ZERO]] : i1 +// CHECK: %[[INF:.*]] = arith.constant 0x7F800000 : f32 +// CHECK: %[[INF_WITH_SIGN_OF_RHS_REAL:.*]] = math.copysign %[[INF]], %[[RHS_REAL]] : f32 +// CHECK: %[[INFINITY_RESULT_REAL:.*]] = arith.mulf %[[INF_WITH_SIGN_OF_RHS_REAL]], %[[LHS_REAL]] fastmath : f32 +// CHECK: %[[INFINITY_RESULT_IMAG:.*]] = arith.mulf %[[INF_WITH_SIGN_OF_RHS_REAL]], %[[LHS_IMAG]] fastmath : f32 + +// Case 2. Infinite numerator, finite denominator. +// CHECK: %[[RHS_REAL_FINITE:.*]] = arith.cmpf one, %[[RHS_REAL_ABS]], %[[INF]] : f32 +// CHECK: %[[RHS_IMAG_FINITE:.*]] = arith.cmpf one, %[[RHS_IMAG_ABS]], %[[INF]] : f32 +// CHECK: %[[RHS_IS_FINITE:.*]] = arith.andi %[[RHS_REAL_FINITE]], %[[RHS_IMAG_FINITE]] : i1 +// CHECK: %[[LHS_REAL_ABS:.*]] = math.absf %[[LHS_REAL]] fastmath : f32 +// CHECK: %[[LHS_REAL_INFINITE:.*]] = arith.cmpf oeq, %[[LHS_REAL_ABS]], %[[INF]] : f32 +// CHECK: %[[LHS_IMAG_ABS:.*]] = math.absf %[[LHS_IMAG]] fastmath : f32 +// CHECK: %[[LHS_IMAG_INFINITE:.*]] = arith.cmpf oeq, %[[LHS_IMAG_ABS]], %[[INF]] : f32 +// CHECK: %[[LHS_IS_INFINITE:.*]] = arith.ori %[[LHS_REAL_INFINITE]], %[[LHS_IMAG_INFINITE]] : i1 +// CHECK: %[[INF_NUM_FINITE_DENOM:.*]] = arith.andi %[[LHS_IS_INFINITE]], %[[RHS_IS_FINITE]] : i1 +// CHECK: %[[ONE:.*]] = arith.constant 1.000000e+00 : f32 +// CHECK: %[[LHS_REAL_IS_INF:.*]] = arith.select %[[LHS_REAL_INFINITE]], %[[ONE]], %[[ZERO]] : f32 +// CHECK: %[[LHS_REAL_IS_INF_WITH_SIGN:.*]] = math.copysign %[[LHS_REAL_IS_INF]], %[[LHS_REAL]] : f32 +// CHECK: %[[LHS_IMAG_IS_INF:.*]] = arith.select %[[LHS_IMAG_INFINITE]], %[[ONE]], %[[ZERO]] : f32 +// CHECK: %[[LHS_IMAG_IS_INF_WITH_SIGN:.*]] = math.copysign %[[LHS_IMAG_IS_INF]], %[[LHS_IMAG]] : f32 +// CHECK: %[[LHS_REAL_IS_INF_WITH_SIGN_TIMES_RHS_REAL:.*]] = arith.mulf %[[LHS_REAL_IS_INF_WITH_SIGN]], %[[RHS_REAL]] fastmath : f32 +// CHECK: %[[LHS_IMAG_IS_INF_WITH_SIGN_TIMES_RHS_IMAG:.*]] = arith.mulf %[[LHS_IMAG_IS_INF_WITH_SIGN]], %[[RHS_IMAG]] fastmath : f32 +// CHECK: %[[INF_MULTIPLICATOR_1:.*]] = arith.addf %[[LHS_REAL_IS_INF_WITH_SIGN_TIMES_RHS_REAL]], %[[LHS_IMAG_IS_INF_WITH_SIGN_TIMES_RHS_IMAG]] fastmath : f32 +// CHECK: %[[RESULT_REAL_3:.*]] = arith.mulf %[[INF]], %[[INF_MULTIPLICATOR_1]] fastmath : f32 +// CHECK: %[[LHS_REAL_IS_INF_WITH_SIGN_TIMES_RHS_IMAG:.*]] = arith.mulf %[[LHS_REAL_IS_INF_WITH_SIGN]], %[[RHS_IMAG]] fastmath : f32 +// CHECK: %[[LHS_IMAG_IS_INF_WITH_SIGN_TIMES_RHS_REAL:.*]] = arith.mulf %[[LHS_IMAG_IS_INF_WITH_SIGN]], %[[RHS_REAL]] fastmath : f32 +// CHECK: %[[INF_MULTIPLICATOR_2:.*]] = arith.subf %[[LHS_IMAG_IS_INF_WITH_SIGN_TIMES_RHS_REAL]], %[[LHS_REAL_IS_INF_WITH_SIGN_TIMES_RHS_IMAG]] fastmath : f32 +// CHECK: %[[RESULT_IMAG_3:.*]] = arith.mulf %[[INF]], %[[INF_MULTIPLICATOR_2]] fastmath : f32 + +// Case 3. Finite numerator, infinite denominator. +// CHECK: %[[LHS_REAL_FINITE:.*]] = arith.cmpf one, %[[LHS_REAL_ABS]], %[[INF]] : f32 +// CHECK: %[[LHS_IMAG_FINITE:.*]] = arith.cmpf one, %[[LHS_IMAG_ABS]], %[[INF]] : f32 +// CHECK: %[[LHS_IS_FINITE:.*]] = arith.andi %[[LHS_REAL_FINITE]], %[[LHS_IMAG_FINITE]] : i1 +// CHECK: %[[RHS_REAL_INFINITE:.*]] = arith.cmpf oeq, %[[RHS_REAL_ABS]], %[[INF]] : f32 +// CHECK: %[[RHS_IMAG_INFINITE:.*]] = arith.cmpf oeq, %[[RHS_IMAG_ABS]], %[[INF]] : f32 +// CHECK: %[[RHS_IS_INFINITE:.*]] = arith.ori %[[RHS_REAL_INFINITE]], %[[RHS_IMAG_INFINITE]] : i1 +// CHECK: %[[FINITE_NUM_INFINITE_DENOM:.*]] = arith.andi %[[LHS_IS_FINITE]], %[[RHS_IS_INFINITE]] : i1 +// CHECK: %[[RHS_REAL_IS_INF:.*]] = arith.select %[[RHS_REAL_INFINITE]], %[[ONE]], %[[ZERO]] : f32 +// CHECK: %[[RHS_REAL_IS_INF_WITH_SIGN:.*]] = math.copysign %[[RHS_REAL_IS_INF]], %[[RHS_REAL]] : f32 +// CHECK: %[[RHS_IMAG_IS_INF:.*]] = arith.select %[[RHS_IMAG_INFINITE]], %[[ONE]], %[[ZERO]] : f32 +// CHECK: %[[RHS_IMAG_IS_INF_WITH_SIGN:.*]] = math.copysign %[[RHS_IMAG_IS_INF]], %[[RHS_IMAG]] : f32 +// CHECK: %[[RHS_REAL_IS_INF_WITH_SIGN_TIMES_LHS_REAL:.*]] = arith.mulf %[[LHS_REAL]], %[[RHS_REAL_IS_INF_WITH_SIGN]] fastmath : f32 +// CHECK: %[[RHS_IMAG_IS_INF_WITH_SIGN_TIMES_LHS_IMAG:.*]] = arith.mulf %[[LHS_IMAG]], %[[RHS_IMAG_IS_INF_WITH_SIGN]] fastmath : f32 +// CHECK: %[[ZERO_MULTIPLICATOR_1:.*]] = arith.addf %[[RHS_REAL_IS_INF_WITH_SIGN_TIMES_LHS_REAL]], %[[RHS_IMAG_IS_INF_WITH_SIGN_TIMES_LHS_IMAG]] fastmath : f32 +// CHECK: %[[RESULT_REAL_4:.*]] = arith.mulf %[[ZERO]], %[[ZERO_MULTIPLICATOR_1]] fastmath : f32 +// CHECK: %[[RHS_REAL_IS_INF_WITH_SIGN_TIMES_LHS_IMAG:.*]] = arith.mulf %[[LHS_IMAG]], %[[RHS_REAL_IS_INF_WITH_SIGN]] fastmath : f32 +// CHECK: %[[RHS_IMAG_IS_INF_WITH_SIGN_TIMES_LHS_REAL:.*]] = arith.mulf %[[LHS_REAL]], %[[RHS_IMAG_IS_INF_WITH_SIGN]] fastmath : f32 +// CHECK: %[[ZERO_MULTIPLICATOR_2:.*]] = arith.subf %[[RHS_REAL_IS_INF_WITH_SIGN_TIMES_LHS_IMAG]], %[[RHS_IMAG_IS_INF_WITH_SIGN_TIMES_LHS_REAL]] fastmath : f32 +// CHECK: %[[RESULT_IMAG_4:.*]] = arith.mulf %[[ZERO]], %[[ZERO_MULTIPLICATOR_2]] fastmath : f32 + +// CHECK: %[[REAL_ABS_SMALLER_THAN_IMAG_ABS:.*]] = arith.cmpf olt, %[[RHS_REAL_ABS]], %[[RHS_IMAG_ABS]] : f32 +// CHECK: %[[RESULT_REAL:.*]] = arith.select %[[REAL_ABS_SMALLER_THAN_IMAG_ABS]], %[[RESULT_REAL_1]], %[[RESULT_REAL_2]] : f32 +// CHECK: %[[RESULT_IMAG:.*]] = arith.select %[[REAL_ABS_SMALLER_THAN_IMAG_ABS]], %[[RESULT_IMAG_1]], %[[RESULT_IMAG_2]] : f32 +// CHECK: %[[RESULT_REAL_SPECIAL_CASE_3:.*]] = arith.select %[[FINITE_NUM_INFINITE_DENOM]], %[[RESULT_REAL_4]], %[[RESULT_REAL]] : f32 +// CHECK: %[[RESULT_IMAG_SPECIAL_CASE_3:.*]] = arith.select %[[FINITE_NUM_INFINITE_DENOM]], %[[RESULT_IMAG_4]], %[[RESULT_IMAG]] : f32 +// CHECK: %[[RESULT_REAL_SPECIAL_CASE_2:.*]] = arith.select %[[INF_NUM_FINITE_DENOM]], %[[RESULT_REAL_3]], %[[RESULT_REAL_SPECIAL_CASE_3]] : f32 +// CHECK: %[[RESULT_IMAG_SPECIAL_CASE_2:.*]] = arith.select %[[INF_NUM_FINITE_DENOM]], %[[RESULT_IMAG_3]], %[[RESULT_IMAG_SPECIAL_CASE_3]] : f32 +// CHECK: %[[RESULT_REAL_SPECIAL_CASE_1:.*]] = arith.select %[[RESULT_IS_INFINITY]], %[[INFINITY_RESULT_REAL]], %[[RESULT_REAL_SPECIAL_CASE_2]] : f32 +// CHECK: %[[RESULT_IMAG_SPECIAL_CASE_1:.*]] = arith.select %[[RESULT_IS_INFINITY]], %[[INFINITY_RESULT_IMAG]], %[[RESULT_IMAG_SPECIAL_CASE_2]] : f32 +// CHECK: %[[RESULT_REAL_IS_NAN:.*]] = arith.cmpf uno, %[[RESULT_REAL]], %[[ZERO]] : f32 +// CHECK: %[[RESULT_IMAG_IS_NAN:.*]] = arith.cmpf uno, %[[RESULT_IMAG]], %[[ZERO]] : f32 +// CHECK: %[[RESULT_IS_NAN:.*]] = arith.andi %[[RESULT_REAL_IS_NAN]], %[[RESULT_IMAG_IS_NAN]] : i1 +// CHECK: %[[RESULT_REAL_WITH_SPECIAL_CASES:.*]] = arith.select %[[RESULT_IS_NAN]], %[[RESULT_REAL_SPECIAL_CASE_1]], %[[RESULT_REAL]] : f32 +// CHECK: %[[RESULT_IMAG_WITH_SPECIAL_CASES:.*]] = arith.select %[[RESULT_IS_NAN]], %[[RESULT_IMAG_SPECIAL_CASE_1]], %[[RESULT_IMAG]] : f32 +// CHECK: %[[RESULT:.*]] = complex.create %[[RESULT_REAL_WITH_SPECIAL_CASES]], %[[RESULT_IMAG_WITH_SPECIAL_CASES]] : complex +// CHECK: return %[[RESULT]] : complex + diff --git a/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir b/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir index ffc2939afe7ff..ffef12250595f 100644 --- a/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir +++ b/mlir/test/Conversion/MathToLibm/convert-to-libm.mlir @@ -14,8 +14,24 @@ // CHECK-DAG: @atanhf(f32) -> f32 attributes {llvm.readnone} // CHECK-DAG: @erf(f64) -> f64 attributes {llvm.readnone} // CHECK-DAG: @erff(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @exp(f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @expf(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @exp2(f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @exp2f(f32) -> f32 attributes {llvm.readnone} // CHECK-DAG: @expm1(f64) -> f64 attributes {llvm.readnone} // CHECK-DAG: @expm1f(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @log(f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @logf(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @log2(f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @log2f(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @log10(f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @log10f(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @log1p(f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @log1pf(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @fabs(f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @fabsf(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @fma(f64, f64, f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @fmaf(f32, f32, f32) -> f32 attributes {llvm.readnone} // CHECK-DAG: @atan2(f64, f64) -> f64 attributes {llvm.readnone} // CHECK-DAG: @atan2f(f32, f32) -> f32 attributes {llvm.readnone} // CHECK-DAG: @cbrt(f64) -> f64 attributes {llvm.readnone} @@ -40,6 +56,47 @@ // CHECK-DAG: @floorf(f32) -> f32 attributes {llvm.readnone} // CHECK-DAG: @ceil(f64) -> f64 attributes {llvm.readnone} // CHECK-DAG: @ceilf(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @sqrt(f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @sqrtf(f32) -> f32 attributes {llvm.readnone} +// CHECK-DAG: @pow(f64, f64) -> f64 attributes {llvm.readnone} +// CHECK-DAG: @powf(f32, f32) -> f32 attributes {llvm.readnone} + +// CHECK-LABEL: func @absf_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +func.func @absf_caller(%float: f32, %double: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @fabsf(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.absf %float : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @fabs(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.absf %double : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +// CHECK-LABEL: func @absf_vec_caller( +// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @fabsf(%[[IN0_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @fabsf(%[[IN1_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @fabs(%[[IN0_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @fabs(%[[IN1_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } +func.func @absf_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.absf %float : vector<2xf32> + %double_result = math.absf %double : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} // CHECK-LABEL: func @acos_caller // CHECK-SAME: %[[FLOAT:.*]]: f32 @@ -379,6 +436,191 @@ func.func @erf_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vec return %float_result, %double_result : vector<2xf32>, vector<2xf64> } +// CHECK-LABEL: func @exp_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +func.func @exp_caller(%float: f32, %double: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @expf(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.exp %float : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @exp(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.exp %double : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +func.func @exp_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.exp %float : vector<2xf32> + %double_result = math.exp %double : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} +// CHECK-LABEL: func @exp_vec_caller( +// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @expf(%[[IN0_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @expf(%[[IN1_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @exp(%[[IN0_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @exp(%[[IN1_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } + +// CHECK-LABEL: func @exp2_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +func.func @exp2_caller(%float: f32, %double: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @exp2f(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.exp2 %float : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @exp2(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.exp2 %double : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +func.func @exp2_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.exp2 %float : vector<2xf32> + %double_result = math.exp2 %double : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} +// CHECK-LABEL: func @exp2_vec_caller( +// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @exp2f(%[[IN0_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @exp2f(%[[IN1_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @exp2(%[[IN0_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @exp2(%[[IN1_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } + +// CHECK-LABEL: func @log_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +func.func @log_caller(%float: f32, %double: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @logf(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.log %float : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @log(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.log %double : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +func.func @log_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.log %float : vector<2xf32> + %double_result = math.log %double : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} +// CHECK-LABEL: func @log_vec_caller( +// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @logf(%[[IN0_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @logf(%[[IN1_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @log(%[[IN0_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @log(%[[IN1_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } + +// CHECK-LABEL: func @log2_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +func.func @log2_caller(%float: f32, %double: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @log2f(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.log2 %float : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @log2(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.log2 %double : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +func.func @log2_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.log2 %float : vector<2xf32> + %double_result = math.log2 %double : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} +// CHECK-LABEL: func @log2_vec_caller( +// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @log2f(%[[IN0_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @log2f(%[[IN1_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @log2(%[[IN0_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @log2(%[[IN1_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } + +// CHECK-LABEL: func @log10_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +func.func @log10_caller(%float: f32, %double: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @log10f(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.log10 %float : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @log10(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.log10 %double : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +func.func @log10_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.log10 %float : vector<2xf32> + %double_result = math.log10 %double : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} +// CHECK-LABEL: func @log10_vec_caller( +// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @log10f(%[[IN0_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @log10f(%[[IN1_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @log10(%[[IN0_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @log10(%[[IN1_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } + // CHECK-LABEL: func @expm1_caller // CHECK-SAME: %[[FLOAT:.*]]: f32 // CHECK-SAME: %[[DOUBLE:.*]]: f64 @@ -438,6 +680,52 @@ func.func @expm1_multidim_vec_caller(%float: vector<2x2xf32>) -> (vector<2x2xf32 // CHECK: return %[[VAL_4]] : vector<2x2xf32> // CHECK: } +// CHECK-LABEL: func @fma_caller( +// CHECK-SAME: %[[FLOATA:.*]]: f32, %[[FLOATB:.*]]: f32, %[[FLOATC:.*]]: f32 +// CHECK-SAME: %[[DOUBLEA:.*]]: f64, %[[DOUBLEB:.*]]: f64, %[[DOUBLEC:.*]]: f64 +func.func @fma_caller(%float_a: f32, %float_b: f32, %float_c: f32, %double_a: f64, %double_b: f64, %double_c: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @fmaf(%[[FLOATA]], %[[FLOATB]], %[[FLOATC]]) : (f32, f32, f32) -> f32 + %float_result = math.fma %float_a, %float_b, %float_c : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @fma(%[[DOUBLEA]], %[[DOUBLEB]], %[[DOUBLEC]]) : (f64, f64, f64) -> f64 + %double_result = math.fma %double_a, %double_b, %double_c : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +func.func @fma_vec_caller(%float_a: vector<2xf32>, %float_b: vector<2xf32>, %float_c: vector<2xf32>, %double_a: vector<2xf64>, %double_b: vector<2xf64>, %double_c: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.fma %float_a, %float_b, %float_c : vector<2xf32> + %double_result = math.fma %double_a, %double_b, %double_c : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} +// CHECK-LABEL: func @fma_vec_caller( +// CHECK-SAME: %[[VAL_0A:.*]]: vector<2xf32>, %[[VAL_0B:.*]]: vector<2xf32>, %[[VAL_0C:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1A:.*]]: vector<2xf64>, %[[VAL_1B:.*]]: vector<2xf64>, %[[VAL_1C:.*]]: vector<2xf64> +// CHECK-SAME: ) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32A:.*]] = vector.extract %[[VAL_0A]][0] : f32 from vector<2xf32> +// CHECK: %[[IN0_F32B:.*]] = vector.extract %[[VAL_0B]][0] : f32 from vector<2xf32> +// CHECK: %[[IN0_F32C:.*]] = vector.extract %[[VAL_0C]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @fmaf(%[[IN0_F32A]], %[[IN0_F32B]], %[[IN0_F32C]]) : (f32, f32, f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32A:.*]] = vector.extract %[[VAL_0A]][1] : f32 from vector<2xf32> +// CHECK: %[[IN1_F32B:.*]] = vector.extract %[[VAL_0B]][1] : f32 from vector<2xf32> +// CHECK: %[[IN1_F32C:.*]] = vector.extract %[[VAL_0C]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @fmaf(%[[IN1_F32A]], %[[IN1_F32B]], %[[IN1_F32C]]) : (f32, f32, f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64A:.*]] = vector.extract %[[VAL_1A]][0] : f64 from vector<2xf64> +// CHECK: %[[IN0_F64B:.*]] = vector.extract %[[VAL_1B]][0] : f64 from vector<2xf64> +// CHECK: %[[IN0_F64C:.*]] = vector.extract %[[VAL_1C]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @fma(%[[IN0_F64A]], %[[IN0_F64B]], %[[IN0_F64C]]) : (f64, f64, f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64A:.*]] = vector.extract %[[VAL_1A]][1] : f64 from vector<2xf64> +// CHECK: %[[IN1_F64B:.*]] = vector.extract %[[VAL_1B]][1] : f64 from vector<2xf64> +// CHECK: %[[IN1_F64C:.*]] = vector.extract %[[VAL_1C]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @fma(%[[IN1_F64A]], %[[IN1_F64B]], %[[IN1_F64C]]) : (f64, f64, f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } + // CHECK-LABEL: func @round_caller // CHECK-SAME: %[[FLOAT:.*]]: f32 // CHECK-SAME: %[[DOUBLE:.*]]: f64 @@ -673,3 +961,82 @@ func.func @ceil_caller(%float: f32, %double: f64) -> (f32, f64) { // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] return %float_result, %double_result : f32, f64 } + +// CHECK-LABEL: func @sqrt_caller +// CHECK-SAME: %[[FLOAT:.*]]: f32 +// CHECK-SAME: %[[DOUBLE:.*]]: f64 +func.func @sqrt_caller(%float: f32, %double: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @sqrtf(%[[FLOAT]]) : (f32) -> f32 + %float_result = math.sqrt %float : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @sqrt(%[[DOUBLE]]) : (f64) -> f64 + %double_result = math.sqrt %double : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +func.func @sqrt_vec_caller(%float: vector<2xf32>, %double: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.sqrt %float : vector<2xf32> + %double_result = math.sqrt %double : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} +// CHECK-LABEL: func @sqrt_vec_caller( +// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32:.*]] = vector.extract %[[VAL_0]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @sqrtf(%[[IN0_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32:.*]] = vector.extract %[[VAL_0]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @sqrtf(%[[IN1_F32]]) : (f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64:.*]] = vector.extract %[[VAL_1]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @sqrt(%[[IN0_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64:.*]] = vector.extract %[[VAL_1]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @sqrt(%[[IN1_F64]]) : (f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } + +// CHECK-LABEL: func @powf_caller( +// CHECK-SAME: %[[FLOATA:.*]]: f32, %[[FLOATB:.*]]: f32 +// CHECK-SAME: %[[DOUBLEA:.*]]: f64, %[[DOUBLEB:.*]]: f64 +func.func @powf_caller(%float_a: f32, %float_b: f32, %double_a: f64, %double_b: f64) -> (f32, f64) { + // CHECK-DAG: %[[FLOAT_RESULT:.*]] = call @powf(%[[FLOATA]], %[[FLOATB]]) : (f32, f32) -> f32 + %float_result = math.powf %float_a, %float_b : f32 + // CHECK-DAG: %[[DOUBLE_RESULT:.*]] = call @pow(%[[DOUBLEA]], %[[DOUBLEB]]) : (f64, f64) -> f64 + %double_result = math.powf %double_a, %double_b : f64 + // CHECK: return %[[FLOAT_RESULT]], %[[DOUBLE_RESULT]] + return %float_result, %double_result : f32, f64 +} + +func.func @powf_vec_caller(%float_a: vector<2xf32>, %float_b: vector<2xf32>, %double_a: vector<2xf64>, %double_b: vector<2xf64>) -> (vector<2xf32>, vector<2xf64>) { + %float_result = math.powf %float_a, %float_b : vector<2xf32> + %double_result = math.powf %double_a, %double_b : vector<2xf64> + return %float_result, %double_result : vector<2xf32>, vector<2xf64> +} +// CHECK-LABEL: func @powf_vec_caller( +// CHECK-SAME: %[[VAL_0A:.*]]: vector<2xf32>, %[[VAL_0B:.*]]: vector<2xf32>, +// CHECK-SAME: %[[VAL_1A:.*]]: vector<2xf64>, %[[VAL_1B:.*]]: vector<2xf64> +// CHECK-SAME: ) -> (vector<2xf32>, vector<2xf64>) { +// CHECK-DAG: %[[CVF:.*]] = arith.constant dense<0.000000e+00> : vector<2xf32> +// CHECK-DAG: %[[CVD:.*]] = arith.constant dense<0.000000e+00> : vector<2xf64> +// CHECK: %[[IN0_F32A:.*]] = vector.extract %[[VAL_0A]][0] : f32 from vector<2xf32> +// CHECK: %[[IN0_F32B:.*]] = vector.extract %[[VAL_0B]][0] : f32 from vector<2xf32> +// CHECK: %[[OUT0_F32:.*]] = call @powf(%[[IN0_F32A]], %[[IN0_F32B]]) : (f32, f32) -> f32 +// CHECK: %[[VAL_8:.*]] = vector.insert %[[OUT0_F32]], %[[CVF]] [0] : f32 into vector<2xf32> +// CHECK: %[[IN1_F32A:.*]] = vector.extract %[[VAL_0A]][1] : f32 from vector<2xf32> +// CHECK: %[[IN1_F32B:.*]] = vector.extract %[[VAL_0B]][1] : f32 from vector<2xf32> +// CHECK: %[[OUT1_F32:.*]] = call @powf(%[[IN1_F32A]], %[[IN1_F32B]]) : (f32, f32) -> f32 +// CHECK: %[[VAL_11:.*]] = vector.insert %[[OUT1_F32]], %[[VAL_8]] [1] : f32 into vector<2xf32> +// CHECK: %[[IN0_F64A:.*]] = vector.extract %[[VAL_1A]][0] : f64 from vector<2xf64> +// CHECK: %[[IN0_F64B:.*]] = vector.extract %[[VAL_1B]][0] : f64 from vector<2xf64> +// CHECK: %[[OUT0_F64:.*]] = call @pow(%[[IN0_F64A]], %[[IN0_F64B]]) : (f64, f64) -> f64 +// CHECK: %[[VAL_14:.*]] = vector.insert %[[OUT0_F64]], %[[CVD]] [0] : f64 into vector<2xf64> +// CHECK: %[[IN1_F64A:.*]] = vector.extract %[[VAL_1A]][1] : f64 from vector<2xf64> +// CHECK: %[[IN1_F64B:.*]] = vector.extract %[[VAL_1B]][1] : f64 from vector<2xf64> +// CHECK: %[[OUT1_F64:.*]] = call @pow(%[[IN1_F64A]], %[[IN1_F64B]]) : (f64, f64) -> f64 +// CHECK: %[[VAL_17:.*]] = vector.insert %[[OUT1_F64]], %[[VAL_14]] [1] : f64 into vector<2xf64> +// CHECK: return %[[VAL_11]], %[[VAL_17]] : vector<2xf32>, vector<2xf64> +// CHECK: } diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir index e94e51d49a98b..1712d3d745b76 100644 --- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir +++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir @@ -738,6 +738,18 @@ func.func @extract_element_with_value_1d(%arg0: vector<16xf32>, %arg1: index) -> // ----- +func.func @extract_element_with_value_2d(%arg0: vector<1x16xf32>, %arg1: index) -> f32 { + %0 = vector.extract %arg0[0, %arg1]: f32 from vector<1x16xf32> + return %0 : f32 +} + +// Multi-dim vectors are not supported but this test shouldn't crash. + +// CHECK-LABEL: @extract_element_with_value_2d( +// CHECK: vector.extract + +// ----- + // CHECK-LABEL: @insert_element_0d // CHECK-SAME: %[[A:.*]]: f32, func.func @insert_element_0d(%a: f32, %b: vector) -> vector { @@ -853,6 +865,19 @@ func.func @insert_element_with_value_1d(%arg0: vector<16xf32>, %arg1: f32, %arg2 // ----- +func.func @insert_element_with_value_2d(%base: vector<1x16xf32>, %value: f32, %idx: index) + -> vector<1x16xf32> { + %0 = vector.insert %value, %base[0, %idx]: f32 into vector<1x16xf32> + return %0 : vector<1x16xf32> +} + +// Multi-dim vectors are not supported but this test shouldn't crash. + +// CHECK-LABEL: @insert_element_with_value_2d( +// CHECK: vector.insert + +// ----- + func.func @vector_type_cast(%arg0: memref<8x8x8xf32>) -> memref> { %0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref> return %0 : memref> diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir index 9c17fb24be690..ae0adf5a0a02d 100644 --- a/mlir/test/Dialect/Affine/loop-coalescing.mlir +++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing %s | FileCheck %s +// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse %s | FileCheck %s // CHECK-LABEL: @one_3d_nest func.func @one_3d_nest() { @@ -239,19 +239,15 @@ func.func @coalesce_affine_for(%arg0: memref) { } return } -// CHECK: %[[T0:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T1:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T2:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T3:.*]] = affine.apply #[[IDENTITY]]()[%[[T0]]] -// CHECK-DAG: %[[T4:.*]] = affine.apply #[[IDENTITY]]()[%[[T1]]] -// CHECK-DAG: %[[T5:.*]] = affine.apply #[[PRODUCT]](%[[T3]])[%[[T4]]] -// CHECK-DAG: %[[T6:.*]] = affine.apply #[[IDENTITY]]()[%[[T2]]] -// CHECK-DAG: %[[T7:.*]] = affine.apply #[[PRODUCT]](%[[T5]])[%[[T6]]] -// CHECK: affine.for %[[IV:.*]] = 0 to %[[T7]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T6]]] -// CHECK-DAG: %[[T9:.*]] = affine.apply #[[FLOOR]](%[[IV]])[%[[T6]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T4]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[FLOOR]](%[[T9]])[%[[T4]]] +// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref +// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T0]]] +// CHECK: affine.for %[[IV:.*]] = 0 to %[[T2]] +// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T0]]] +// CHECK-DAG: %[[T9:.*]] = affine.apply #[[FLOOR]](%[[IV]])[%[[T0]]] +// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T0]]] +// CHECK-DAG: %[[I:.*]] = affine.apply #[[FLOOR]](%[[T9]])[%[[T0]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -277,18 +273,16 @@ func.func @coalesce_affine_for(%arg0: memref) { } return } -// CHECK: %[[T0:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T1:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T2:.*]] = affine.apply #[[IDENTITY]]()[%[[T0]]] -// CHECK-DAG: %[[T3:.*]] = affine.apply #[[IDENTITY]]()[%[[T1]]] -// CHECK-DAG: %[[T4:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T3]]] -// CHECK-DAG: %[[T5:.*]] = affine.apply #[[SIXTY_FOUR]]() -// CHECK-DAG: %[[T6:.*]] = affine.apply #[[PRODUCT]](%[[T4]])[%[[T5]]] -// CHECK: affine.for %[[IV:.*]] = 0 to %[[T6]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T5]]] -// CHECK-DAG: %[[T8:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T5]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T8]])[%[[T3]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T8]])[%[[T3]]] +// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref +// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply #[[SIXTY_FOUR]]() +// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T2]]] +// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]] +// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T2]]] +// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T2]]] +// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T0]]] +// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T0]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -316,19 +310,16 @@ func.func @coalesce_affine_for(%arg0: memref) { } return } -// CHECK: %[[T0:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T1:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T2:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T3:.*]] = affine.min #[[MAP0]]()[%[[T0]]] -// CHECK-DAG: %[[T4:.*]] = affine.apply #[[IDENTITY]]()[%[[T1]]] -// CHECK-DAG: %[[T5:.*]] = affine.apply #[[PRODUCT]](%[[T3]])[%[[T4]]] -// CHECK-DAG: %[[T6:.*]] = affine.apply #[[IDENTITY]]()[%[[T2]]] -// CHECK-DAG: %[[T7:.*]] = affine.apply #[[PRODUCT]](%[[T5]])[%[[T6]]] -// CHECK: affine.for %[[IV:.*]] = 0 to %[[T7]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T6]]] -// CHECK-DAG: %[[T9:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T6]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T4]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T9]])[%[[T4]]] +// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref +// CHECK-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]] +// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T1]]] +// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]] +// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T1]]] +// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T1]]] +// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T1]]] +// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T1]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -342,12 +333,14 @@ func.func @coalesce_affine_for(%arg0: memref) { func.func @test_loops_do_not_get_coalesced() { affine.for %i = 0 to 7 { affine.for %j = #map0(%i) to min #map1(%i) { + "use"(%i, %j) : (index, index) -> () } } return } // CHECK: affine.for %[[IV0:.*]] = 0 to 7 // CHECK-NEXT: affine.for %[[IV1:.*]] = #[[MAP0]](%[[IV0]]) to min #[[MAP1]](%[[IV0]]) +// CHECK-NEXT: "use"(%[[IV0]], %[[IV1]]) // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: return diff --git a/mlir/test/Dialect/Arith/invalid.mlir b/mlir/test/Dialect/Arith/invalid.mlir index 6d8ac0ada52be..ada849220bb83 100644 --- a/mlir/test/Dialect/Arith/invalid.mlir +++ b/mlir/test/Dialect/Arith/invalid.mlir @@ -64,6 +64,24 @@ func.func @constant_out_of_range() { // ----- +func.func @constant_invalid_scalable_1d_vec_initialization() { +^bb0: + // expected-error@+1 {{'arith.constant' op intializing scalable vectors with elements attribute is not supported unless it's a vector splat}} + %c = arith.constant dense<[0, 1]> : vector<[2] x i32> + return +} + +// ----- + +func.func @constant_invalid_scalable_2d_vec_initialization() { +^bb0: + // expected-error@+1 {{'arith.constant' op intializing scalable vectors with elements attribute is not supported unless it's a vector splat}} + %c = arith.constant dense<[[3, 3], [1, 1]]> : vector<2 x [2] x i32> + return +} + +// ----- + func.func @constant_wrong_type() { ^bb: %x = "arith.constant"(){value = 10.} : () -> f32 // expected-error {{'arith.constant' op failed to verify that all of {value, result} have same type}} diff --git a/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir index 83d5f1c9c9e86..8fb3ba1a1ecce 100644 --- a/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir @@ -74,3 +74,34 @@ func.func @arith_const() -> index { %0 = "test.reify_bound"(%c5) : (index) -> (index) return %0 : index } + +// ----- + +// CHECK-LABEL: func @arith_select( +func.func @arith_select(%c: i1) -> (index, index) { + // CHECK: arith.constant 5 : index + %c5 = arith.constant 5 : index + // CHECK: arith.constant 9 : index + %c9 = arith.constant 9 : index + %r = arith.select %c, %c5, %c9 : index + // CHECK: %[[c5:.*]] = arith.constant 5 : index + // CHECK: %[[c10:.*]] = arith.constant 10 : index + %0 = "test.reify_bound"(%r) {type = "LB"} : (index) -> (index) + %1 = "test.reify_bound"(%r) {type = "UB"} : (index) -> (index) + // CHECK: return %[[c5]], %[[c10]] + return %0, %1 : index, index +} + +// ----- + +// CHECK-LABEL: func @arith_select_elementwise( +// CHECK-SAME: %[[a:.*]]: tensor, %[[b:.*]]: tensor, %[[c:.*]]: tensor) +func.func @arith_select_elementwise(%a: tensor, %b: tensor, %c: tensor) -> index { + %r = arith.select %c, %a, %b : tensor, tensor + // CHECK: %[[c0:.*]] = arith.constant 0 : index + // CHECK: %[[dim:.*]] = tensor.dim %[[a]], %[[c0]] + %0 = "test.reify_bound"(%r) {type = "EQ", dim = 0} + : (tensor) -> (index) + // CHECK: return %[[dim]] + return %0 : index +} diff --git a/mlir/test/Dialect/Arith/vscale_constants.mlir b/mlir/test/Dialect/Arith/vscale_constants.mlir new file mode 100644 index 0000000000000..324766f49980f --- /dev/null +++ b/mlir/test/Dialect/Arith/vscale_constants.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-opt %s | FileCheck %s + +// Note: This test is checking value names (so deliberately is not using a regex match). + +func.func @test_vscale_constant_names() { + %vscale = vector.vscale + %c8 = arith.constant 8 : index + // CHECK: %c8_vscale = arith.muli + %0 = arith.muli %vscale, %c8 : index + %c10 = arith.constant 10 : index + // CHECK: %c10_vscale = arith.muli + %1 = arith.muli %c10, %vscale : index + return +} diff --git a/mlir/test/Dialect/ControlFlow/ops.mlir b/mlir/test/Dialect/ControlFlow/ops.mlir index 8453c2b7038f1..c9317c7613972 100644 --- a/mlir/test/Dialect/ControlFlow/ops.mlir +++ b/mlir/test/Dialect/ControlFlow/ops.mlir @@ -38,3 +38,16 @@ func.func @switch_i64(%flag : i64, %caseOperand : i32) { ^bb3(%bb3arg : i32): return } + +// CHECK-LABEL: func @switch_result_number +func.func @switch_result_number(%arg0: i32) { + %0:2 = "test.op_with_two_results"() : () -> (i32, i32) + cf.switch %arg0 : i32, [ + default: ^bb2, + 0: ^bb1(%0#0 : i32) + ] + ^bb1(%1: i32): + return + ^bb2: + return +} diff --git a/mlir/test/Dialect/LLVMIR/canonicalize.mlir b/mlir/test/Dialect/LLVMIR/canonicalize.mlir index 5e26fa37b681d..6b265bbbdbfb2 100644 --- a/mlir/test/Dialect/LLVMIR/canonicalize.mlir +++ b/mlir/test/Dialect/LLVMIR/canonicalize.mlir @@ -8,6 +8,8 @@ llvm.func @fold_icmp_eq(%arg0 : i32) -> i1 { llvm.return %0 : i1 } +// ----- + // CHECK-LABEL: @fold_icmp_ne llvm.func @fold_icmp_ne(%arg0 : vector<2xi32>) -> vector<2xi1> { // CHECK: %[[C0:.*]] = llvm.mlir.constant(dense : vector<2xi1>) : vector<2xi1> @@ -16,6 +18,8 @@ llvm.func @fold_icmp_ne(%arg0 : vector<2xi32>) -> vector<2xi1> { llvm.return %0 : vector<2xi1> } +// ----- + // CHECK-LABEL: @fold_icmp_alloca llvm.func @fold_icmp_alloca() -> i1 { // CHECK: %[[C0:.*]] = llvm.mlir.constant(true) : i1 @@ -83,16 +87,18 @@ llvm.func @fold_unrelated_extractvalue(%arr: !llvm.array<4 x f32>) -> f32 { // ----- // CHECK-LABEL: fold_bitcast -// CHECK-SAME: %[[a0:arg[0-9]+]] -// CHECK-NEXT: llvm.return %[[a0]] +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +// CHECK-NEXT: llvm.return %[[ARG]] llvm.func @fold_bitcast(%x : !llvm.ptr) -> !llvm.ptr { %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr llvm.return %c : !llvm.ptr } +// ----- + // CHECK-LABEL: fold_bitcast2 -// CHECK-SAME: %[[a0:arg[0-9]+]] -// CHECK-NEXT: llvm.return %[[a0]] +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +// CHECK-NEXT: llvm.return %[[ARG]] llvm.func @fold_bitcast2(%x : i32) -> i32 { %c = llvm.bitcast %x : i32 to f32 %d = llvm.bitcast %c : f32 to i32 @@ -101,17 +107,31 @@ llvm.func @fold_bitcast2(%x : i32) -> i32 { // ----- +// CHECK-LABEL: fold_bitcast_chain +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +llvm.func @fold_bitcast_chain(%x : i32) -> vector<2xi16> { + %c = llvm.bitcast %x : i32 to f32 + %d = llvm.bitcast %c : f32 to vector<2xi16> + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG]] : i32 to vector<2xi16> + // CHECK: llvm.return %[[BITCAST]] + llvm.return %d : vector<2xi16> +} + +// ----- + // CHECK-LABEL: fold_addrcast -// CHECK-SAME: %[[a0:arg[0-9]+]] -// CHECK-NEXT: llvm.return %[[a0]] +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +// CHECK-NEXT: llvm.return %[[ARG]] llvm.func @fold_addrcast(%x : !llvm.ptr) -> !llvm.ptr { %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr llvm.return %c : !llvm.ptr } +// ----- + // CHECK-LABEL: fold_addrcast2 -// CHECK-SAME: %[[a0:arg[0-9]+]] -// CHECK-NEXT: llvm.return %[[a0]] +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +// CHECK-NEXT: llvm.return %[[ARG]] llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr<5> %d = llvm.addrspacecast %c : !llvm.ptr<5> to !llvm.ptr @@ -120,18 +140,32 @@ llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { // ----- +// CHECK-LABEL: fold_addrcast_chain +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +llvm.func @fold_addrcast_chain(%x : !llvm.ptr) -> !llvm.ptr<2> { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr<1> + %d = llvm.addrspacecast %c : !llvm.ptr<1> to !llvm.ptr<2> + // CHECK: %[[ADDRCAST:.*]] = llvm.addrspacecast %[[ARG]] : !llvm.ptr to !llvm.ptr<2> + // CHECK: llvm.return %[[ADDRCAST]] + llvm.return %d : !llvm.ptr<2> +} + +// ----- + // CHECK-LABEL: fold_gep -// CHECK-SAME: %[[a0:arg[0-9]+]] -// CHECK-NEXT: llvm.return %[[a0]] +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +// CHECK-NEXT: llvm.return %[[ARG]] llvm.func @fold_gep(%x : !llvm.ptr) -> !llvm.ptr { %c0 = arith.constant 0 : i32 %c = llvm.getelementptr %x[%c0] : (!llvm.ptr, i32) -> !llvm.ptr, i8 llvm.return %c : !llvm.ptr } +// ----- + // CHECK-LABEL: fold_gep_neg -// CHECK-SAME: %[[a0:arg[0-9]+]] -// CHECK-NEXT: %[[RES:.*]] = llvm.getelementptr inbounds %[[a0]][0, 1] +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +// CHECK-NEXT: %[[RES:.*]] = llvm.getelementptr inbounds %[[ARG]][0, 1] // CHECK-NEXT: llvm.return %[[RES]] llvm.func @fold_gep_neg(%x : !llvm.ptr) -> !llvm.ptr { %c0 = arith.constant 0 : i32 @@ -139,9 +173,11 @@ llvm.func @fold_gep_neg(%x : !llvm.ptr) -> !llvm.ptr { llvm.return %0 : !llvm.ptr } +// ----- + // CHECK-LABEL: fold_gep_canon -// CHECK-SAME: %[[a0:arg[0-9]+]] -// CHECK-NEXT: %[[RES:.*]] = llvm.getelementptr %[[a0]][2] +// CHECK-SAME: %[[ARG:[[:alnum:]]+]] +// CHECK-NEXT: %[[RES:.*]] = llvm.getelementptr %[[ARG]][2] // CHECK-NEXT: llvm.return %[[RES]] llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr { %c2 = arith.constant 2 : i32 @@ -175,6 +211,8 @@ llvm.func @load_dce(%x : !llvm.ptr) { llvm.return } +// ----- + llvm.mlir.global external @fp() : !llvm.ptr // CHECK-LABEL: addr_dce @@ -184,6 +222,8 @@ llvm.func @addr_dce(%x : !llvm.ptr) { llvm.return } +// ----- + // CHECK-LABEL: alloca_dce // CHECK-NEXT: llvm.return llvm.func @alloca_dce() { diff --git a/mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir b/mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir index f7ddb4a7abe5a..b7cbd787f06e4 100644 --- a/mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir +++ b/mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir @@ -29,6 +29,27 @@ llvm.func @basic_store_load(%arg0: i64) -> i64 { llvm.return %2 : i64 } +// CHECK-LABEL: llvm.func @multiple_store_load +llvm.func @multiple_store_load(%arg0: i64) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: llvm.intr.dbg.declare + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + // CHECK-NOT: llvm.store + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + // CHECK-NOT: llvm.intr.dbg.declare + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[LOADED:.*]] : i64 + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[LOADED]] : i64 + // CHECK-NOT: llvm.intr.dbg.value + // CHECK-NOT: llvm.intr.dbg.declare + // CHECK-NOT: llvm.store + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: llvm.return %[[LOADED]] : i64 + llvm.return %2 : i64 +} + // CHECK-LABEL: llvm.func @block_argument_value // CHECK-SAME: (%[[ARG0:.*]]: i64, {{.*}}) llvm.func @block_argument_value(%arg0: i64, %arg1: i1) -> i64 { diff --git a/mlir/test/Dialect/LLVMIR/mem2reg.mlir b/mlir/test/Dialect/LLVMIR/mem2reg.mlir index 90e56c1166edf..fa5d842302d0f 100644 --- a/mlir/test/Dialect/LLVMIR/mem2reg.mlir +++ b/mlir/test/Dialect/LLVMIR/mem2reg.mlir @@ -697,3 +697,249 @@ llvm.func @transitive_reaching_def() -> !llvm.ptr { %3 = llvm.load %1 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr llvm.return %3 : !llvm.ptr } + +// ----- + +// CHECK-LABEL: @load_int_from_float +llvm.func @load_int_from_float() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x f32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : f32 to i32 + // CHECK: llvm.return %[[BITCAST:.*]] + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: @load_float_from_int +llvm.func @load_float_from_int() -> f32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f32 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : i32 to f32 + // CHECK: llvm.return %[[BITCAST:.*]] + llvm.return %2 : f32 +} + +// ----- + +// CHECK-LABEL: @load_int_from_vector +llvm.func @load_int_from_vector() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x vector<2xi16> : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : vector<2xi16> to i32 + // CHECK: llvm.return %[[BITCAST:.*]] + llvm.return %2 : i32 +} + +// ----- + +// LLVM arrays cannot be bitcasted, so the following cannot be promoted. + +// CHECK-LABEL: @load_int_from_array +llvm.func @load_int_from_array() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x !llvm.array<2 x i16> : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK-NOT: llvm.bitcast + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: @store_int_to_float +// CHECK-SAME: %[[ARG:.*]]: i32 +llvm.func @store_int_to_float(%arg: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x f32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg, %1 {alignment = 4 : i64} : i32, !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG]] + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: @store_float_to_int +// CHECK-SAME: %[[ARG:.*]]: f32 +llvm.func @store_float_to_int(%arg: f32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg, %1 {alignment = 4 : i64} : f32, !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG]] : f32 to i32 + // CHECK: llvm.return %[[BITCAST]] + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: @store_int_to_vector +// CHECK-SAME: %[[ARG:.*]]: i32 +llvm.func @store_int_to_vector(%arg: i32) -> vector<4xi8> { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x vector<2xi16> {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg, %1 {alignment = 4 : i64} : i32, !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> vector<4xi8> + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG]] : i32 to vector<4xi8> + // CHECK: llvm.return %[[BITCAST]] + llvm.return %2 : vector<4xi8> +} + +// ----- + +// CHECK-LABEL: @load_ptr_from_int +llvm.func @load_ptr_from_int() -> !llvm.ptr { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[CAST:.*]] = llvm.inttoptr %[[UNDEF]] : i64 to !llvm.ptr + // CHECK: llvm.return %[[CAST:.*]] + llvm.return %2 : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: @load_int_from_ptr +llvm.func @load_int_from_ptr() -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x !llvm.ptr {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[CAST:.*]] = llvm.ptrtoint %[[UNDEF]] : !llvm.ptr to i64 + // CHECK: llvm.return %[[CAST:.*]] + llvm.return %2 : i64 +} + +// ----- + +// CHECK-LABEL: @load_ptr_addrspace_cast +llvm.func @load_ptr_addrspace_cast() -> !llvm.ptr<2> { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x !llvm.ptr<1> {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr<2> + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[CAST:.*]] = llvm.addrspacecast %[[UNDEF]] : !llvm.ptr<1> to !llvm.ptr<2> + // CHECK: llvm.return %[[CAST:.*]] + llvm.return %2 : !llvm.ptr<2> +} + +// ----- + +// CHECK-LABEL: @stores_with_different_types +// CHECK-SAME: %[[ARG0:.*]]: i64 +// CHECK-SAME: %[[ARG1:.*]]: f64 +llvm.func @stores_with_different_types(%arg0: i64, %arg1: f64, %cond: i1) -> f64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.cond_br %cond, ^bb1, ^bb2 +^bb1: + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + // CHECK: llvm.br ^[[BB3:.*]](%[[ARG0]] + llvm.br ^bb3 +^bb2: + llvm.store %arg1, %1 {alignment = 4 : i64} : f64, !llvm.ptr + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG1]] : f64 to i64 + // CHECK: llvm.br ^[[BB3]](%[[BITCAST]] + llvm.br ^bb3 +// CHECK: ^[[BB3]](%[[BLOCK_ARG:.*]]: i64) +^bb3: + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f64 + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[BLOCK_ARG]] : i64 to f64 + // CHECK: llvm.return %[[BITCAST]] + llvm.return %2 : f64 +} + +// ----- + +// Verifies that stores with smaller bitsize inputs are not replaced. A trivial +// implementation will be incorrect due to endianness considerations. + +// CHECK-LABEL: @stores_with_different_type_sizes +llvm.func @stores_with_different_type_sizes(%arg0: i64, %arg1: f32, %cond: i1) -> f64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.cond_br %cond, ^bb1, ^bb2 +^bb1: + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + llvm.br ^bb3 +^bb2: + llvm.store %arg1, %1 {alignment = 4 : i64} : f32, !llvm.ptr + llvm.br ^bb3 +^bb3: + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f64 + llvm.return %2 : f64 +} + +// ----- + +// CHECK-LABEL: @load_smaller_int +llvm.func @load_smaller_int() -> i16 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i16 + llvm.return %2 : i16 +} + +// ----- + +// CHECK-LABEL: @load_different_type_smaller +llvm.func @load_different_type_smaller() -> f32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f32 + llvm.return %2 : f32 +} + +// ----- + +// This alloca is too small for the load, still, mem2reg should not touch it. + +// CHECK-LABEL: @impossible_load +llvm.func @impossible_load() -> f64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f64 + llvm.return %2 : f64 +} + +// ----- + +// Verifies that mem2reg does not introduce address space casts of pointers +// with different bitsize. + +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi64>>, + #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi64>> +>} { + + // CHECK-LABEL: @load_ptr_addrspace_cast_different_size + llvm.func @load_ptr_addrspace_cast_different_size() -> !llvm.ptr<2> { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x !llvm.ptr<1> {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr<2> + llvm.return %2 : !llvm.ptr<2> + } +} diff --git a/mlir/test/Dialect/LLVMIR/type-consistency.mlir b/mlir/test/Dialect/LLVMIR/type-consistency.mlir index a6176142f1746..c9c1355d16df9 100644 --- a/mlir/test/Dialect/LLVMIR/type-consistency.mlir +++ b/mlir/test/Dialect/LLVMIR/type-consistency.mlir @@ -157,8 +157,7 @@ llvm.func @coalesced_store_floats(%arg: i64) { // CHECK: %[[SHR:.*]] = llvm.lshr %[[ARG]], %[[CST32]] : i64 // CHECK: %[[TRUNC:.*]] = llvm.trunc %[[SHR]] : i64 to i32 // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"foo", (f32, f32)> - // CHECK: %[[BIT_CAST:.*]] = llvm.bitcast %[[TRUNC]] : i32 to f32 - // CHECK: llvm.store %[[BIT_CAST]], %[[GEP]] + // CHECK: llvm.store %[[TRUNC]], %[[GEP]] llvm.store %arg, %1 : i64, !llvm.ptr // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]] llvm.return @@ -327,21 +326,6 @@ llvm.func @vector_write_split_struct(%arg: vector<2xi64>) { // ----- -// CHECK-LABEL: llvm.func @bitcast_insertion -// CHECK-SAME: %[[ARG:.*]]: i32 -llvm.func @bitcast_insertion(%arg: i32) { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x f32 - %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr - // CHECK: %[[BIT_CAST:.*]] = llvm.bitcast %[[ARG]] : i32 to f32 - // CHECK: llvm.store %[[BIT_CAST]], %[[ALLOCA]] - llvm.store %arg, %1 : i32, !llvm.ptr - // CHECK-NOT: llvm.store %[[ARG]], %[[ALLOCA]] - llvm.return -} - -// ----- - // CHECK-LABEL: llvm.func @gep_split // CHECK-SAME: %[[ARG:.*]]: i64 llvm.func @gep_split(%arg: i64) { diff --git a/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir b/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir index 5143be3930663..e7d9815ab222b 100644 --- a/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir +++ b/mlir/test/Dialect/Linalg/transform-ops-invalid.mlir @@ -71,3 +71,24 @@ transform.sequence failures(propagate) { : (!transform.any_op) -> !transform.op<"linalg.generic"> } + +// ----- + +transform.sequence failures(propagate) { +^bb0(%arg0: !transform.any_op): + %0 = transform.param.constant 2 : i64 -> !transform.param + // expected-error@below {{custom op 'transform.structured.vectorize' expected 2 operand type(s)}} + transform.structured.vectorize %arg0 vector_sizes [%0, 2] : !transform.any_op, !transform.param, !transform.param + +} + +// ----- + +transform.sequence failures(propagate) { +^bb0(%arg0: !transform.any_op): + %0 = transform.param.constant 2 : i64 -> !transform.param + // expected-error@below {{expected ']' in dynamic index list}} + // expected-error@below {{custom op 'transform.structured.vectorize' expected SSA value or integer}} + transform.structured.vectorize %arg0 vector_sizes [%0 : !transform.param, 2] : !transform.any_op, !transform.param + +} diff --git a/mlir/test/Dialect/Linalg/transform-ops.mlir b/mlir/test/Dialect/Linalg/transform-ops.mlir index 6b276e69a595d..8f6274fd22c21 100644 --- a/mlir/test/Dialect/Linalg/transform-ops.mlir +++ b/mlir/test/Dialect/Linalg/transform-ops.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s | mlir-opt | FileCheck %s +// RUN: mlir-opt %s --split-input-file | mlir-opt | FileCheck %s transform.sequence failures(propagate) { ^bb1(%arg0: !transform.any_op): @@ -57,3 +57,12 @@ transform.sequence failures(propagate) { %1:2 = transform.structured.fuse_into_containing_op %arg2 into %loop : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) } + +// ----- + +transform.sequence failures(propagate) { +^bb0(%arg0: !transform.any_op): + // CHECK: transform.structured.vectorize %arg0 : !transform.any_op + transform.structured.vectorize %arg0 vector_sizes [] : !transform.any_op + +} diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 2d01d57304013..fd7d3b4767eb2 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -36,6 +36,81 @@ module attributes {transform.with_named_sequence} { // ----- +func.func @vectorize_dynamic_identity_with_constant(%arg0: tensor, + %arg1: tensor, + %arg2: tensor) -> tensor { + %c4 = arith.constant 4 : index + %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"] } + ins(%arg0, %arg1 : tensor, tensor) + outs(%arg2 : tensor) { + ^bb(%in0: f32, %in1: f32, %out: f32) : + %0 = arith.addf %in0, %in1 : f32 + linalg.yield %0 : f32 + } -> tensor + return %0 : tensor +} + +// CHECK-LABEL: @vectorize_dynamic_identity_with_constant +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor +// CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> +// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32> +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %size = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [%size] : !transform.any_op, !transform.any_op + transform.yield + } +} + +// ----- + +func.func @vectorize_dynamic_identity_with_param(%arg0: tensor, + %arg1: tensor, + %arg2: tensor) -> tensor { + %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>], + iterator_types = ["parallel"] } + ins(%arg0, %arg1 : tensor, tensor) + outs(%arg2 : tensor) { + ^bb(%in0: f32, %in1: f32, %out: f32) : + %0 = arith.addf %in0, %in1 : f32 + linalg.yield %0 : f32 + } -> tensor + return %0 : tensor +} + +// CHECK-LABEL: @vectorize_dynamic_identity_with_param +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_4:.*]] = tensor.dim %{{.*}}, %[[VAL_3]] : tensor +// CHECK: %[[VAL_7:.*]] = vector.create_mask %[[VAL_4]] : vector<4xi1> +// CHECK: %[[VAL_8:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_10:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_12:.*]] = vector.mask %[[VAL_7]] { vector.transfer_read %{{.*}} {in_bounds = [true]} : tensor, vector<4xf32> } : vector<4xi1> -> vector<4xf32> +// CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_8]], %[[VAL_10]] : vector<4xf32> +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_7]] { vector.transfer_write %{{.*}} {in_bounds = [true]} : vector<4xf32>, tensor } : vector<4xi1> -> tensor + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %vector_size = transform.param.constant 4 : i64 -> !transform.param + transform.structured.vectorize %0 vector_sizes [%vector_size] : !transform.any_op, !transform.param + transform.yield + } +} + +// ----- + func.func @vectorize_dynamic_1d_broadcast(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { @@ -231,6 +306,49 @@ module attributes {transform.with_named_sequence} { // ----- +func.func @vectorize_dynamic_transpose_reduction_with_params(%arg0: tensor, + %arg1: tensor) -> tensor { + %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, + affine_map<(d0, d1, d2) -> (d2, d1)>], + iterator_types = ["reduction", "parallel", "parallel"] } + ins(%arg0 : tensor) + outs(%arg1 : tensor) { + ^bb(%in: f32, %out: f32) : + %0 = arith.addf %in, %out : f32 + linalg.yield %0 : f32 + } -> tensor + return %0 : tensor +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %vector_size_0 = transform.param.constant 4 : i64 -> !transform.param + %vector_size_2 = transform.param.constant 16 : i64 -> !transform.param + transform.structured.vectorize %0 vector_sizes + [%vector_size_0, 8, %vector_size_2] : !transform.any_op, !transform.param, !transform.param + transform.yield + } +} + +// CHECK-LABEL: @vectorize_dynamic_transpose_reduction_with_params( +// CHECK-SAME: %[[VAL_0:.*]]: tensor, +// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor +// CHECK: %[[VAL_6:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor +// CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_3]], %[[VAL_5]], %[[VAL_7]] : vector<4x8x16xi1> +// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_0]]{{.*}} {in_bounds = [true, true, true]} : tensor, vector<4x8x16xf32> } : vector<4x8x16xi1> -> vector<4x8x16xf32> +// CHECK: %[[VAL_13:.*]] = vector.create_mask %[[VAL_7]], %[[VAL_5]] : vector<16x8xi1> +// CHECK: %[[VAL_14:.*]] = vector.mask %[[VAL_13]] { vector.transfer_read %[[VAL_1]]{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : tensor, vector<8x16xf32> } : vector<16x8xi1> -> vector<8x16xf32> +// CHECK: %[[VAL_15:.*]] = vector.mask %[[VAL_10]] { vector.multi_reduction , %[[VAL_11]], %[[VAL_14]] [0] : vector<4x8x16xf32> to vector<8x16xf32> } : vector<4x8x16xi1> -> vector<8x16xf32> +// CHECK: %[[VAL_17:.*]] = vector.mask %[[VAL_13]] { vector.transfer_write %[[VAL_15]], %{{.*}} {in_bounds = [true, true], permutation_map = #{{.*}}} : vector<8x16xf32>, tensor } : vector<16x8xi1> -> tensor + +// ----- + func.func @vectorize_partial_dynamic_identity(%arg0: tensor<8x?xf32>, %arg1: tensor<8x?xf32>, %arg2: tensor<8x?xf32>) -> tensor<8x?xf32> { diff --git a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir index 2d59331b72cf6..4dc3e4ea0ef45 100644 --- a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir +++ b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics -allow-unregistered-dialect --cse | FileCheck %s func.func @coalesce_inner() { %c0 = arith.constant 0 : index @@ -14,7 +14,7 @@ func.func @coalesce_inner() { scf.for %k = %i to %j step %c1 { // Inner loop must have been removed. scf.for %l = %i to %j step %c1 { - arith.addi %i, %j : index + "use"(%i, %j) : (index, index) -> () } } {coalesce} } @@ -33,13 +33,19 @@ module attributes {transform.with_named_sequence} { // ----- +// CHECK-DAG: #[[MAP:.+]] = affine_map<() -> (64)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 * s0)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)> func.func @coalesce_outer(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64xf32, 1>, %arg3: memref<64x64xf32, 1>) attributes {} { + // CHECK: %[[T0:.+]] = affine.apply #[[MAP]]() + // CHECK: %[[UB:.+]] = affine.apply #[[MAP1]](%[[T0]])[%[[T0]]] // CHECK: affine.for %[[IV1:.+]] = 0 to %[[UB:.+]] { // CHECK-NOT: affine.for %[[IV2:.+]] affine.for %arg4 = 0 to 64 { affine.for %arg5 = 0 to 64 { - // CHECK: %[[IDX0:.+]] = affine.apply #[[MAP0:.+]](%[[IV1]])[%{{.+}}] - // CHECK: %[[IDX1:.+]] = affine.apply #[[MAP1:.+]](%[[IV1]])[%{{.+}}] + // CHECK: %[[IDX0:.+]] = affine.apply #[[MAP2]](%[[IV1]])[%{{.+}}] + // CHECK: %[[IDX1:.+]] = affine.apply #[[MAP3]](%[[IV1]])[%{{.+}}] // CHECK-NEXT: %{{.+}} = affine.load %{{.+}}[%[[IDX1]], %[[IDX0]]] : memref<64x64xf32, 1> %0 = affine.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1> %1 = affine.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1> @@ -96,3 +102,200 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +func.func @tensor_loops(%arg0 : tensor, %lb0 : index, %ub0 : index, %step0 : index, + %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> tensor { + %0 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg1 = %arg0) -> tensor { + %1 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg2 = %arg1) -> tensor { + %2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg3 = %arg2) -> tensor { + %3 = "use"(%arg3, %i, %j, %k) : (tensor, index, index, index) -> (tensor) + scf.yield %3 : tensor + } + scf.yield %2 : tensor + } + scf.yield %1 : tensor + } {coalesce} + return %0 : tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> + %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.yield + } +} +// CHECK: func.func @tensor_loops( +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index +// CHECK: %[[NEWUB0_DIFF:.+]] = arith.subi %[[UB0]], %[[LB0]] +// CHECK-DAG: %[[NEWUB0:.+]] = arith.ceildivsi %[[NEWUB0_DIFF]], %[[STEP0]] +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 +// CHECK: %[[NEWUB1_DIFF:.+]] = arith.subi %[[UB1]], %[[LB1]] +// CHECK-DAG: %[[NEWUB1:.+]] = arith.ceildivsi %[[NEWUB1_DIFF]], %[[STEP1]] +// CHECK: %[[NEWUB2_DIFF:.+]] = arith.subi %[[UB2]], %[[LB2]] +// CHECK-DAG: %[[NEWUB2:.+]] = arith.ceildivsi %[[NEWUB2_DIFF]], %[[STEP2]] +// CHECK: %[[PROD1:.+]] = arith.muli %[[NEWUB0]], %[[NEWUB1]] +// CHECK: %[[NEWUB:.+]] = arith.muli %[[PROD1]], %[[NEWUB2]] +// CHECK: %[[RESULT:.+]] = scf.for %[[IV:[a-zA-Z0-9]+]] = %[[C0]] to %[[NEWUB]] step %[[C1]] iter_args(%[[ITER_ARG:.+]] = %[[ARG0]]) +// CHECK: %[[IV2:.+]] = arith.remsi %[[IV]], %[[NEWUB2]] +// CHECK: %[[PREVIOUS:.+]] = arith.divsi %[[IV]], %[[NEWUB2]] +// CHECK: %[[IV1:.+]] = arith.remsi %[[PREVIOUS]], %[[NEWUB1]] +// CHECK: %[[IV0:.+]] = arith.divsi %[[PREVIOUS]], %[[NEWUB1]] +// CHECK: %[[K_STEP:.+]] = arith.muli %[[IV2]], %[[STEP2]] +// CHECK: %[[K:.+]] = arith.addi %[[K_STEP]], %[[LB2]] +// CHECK: %[[J_STEP:.+]] = arith.muli %[[IV1]], %[[STEP1]] +// CHECK: %[[J:.+]] = arith.addi %[[J_STEP]], %[[LB1]] +// CHECK: %[[I_STEP:.+]] = arith.muli %[[IV0]], %[[STEP0]] +// CHECK: %[[I:.+]] = arith.addi %[[I_STEP]], %[[LB0]] +// CHECK: %[[USE:.+]] = "use"(%[[ITER_ARG]], %[[I]], %[[J]], %[[K]]) +// CHECK: scf.yield %[[USE]] +// CHECK: return %[[RESULT]] + +// ----- + +// Coalesce only first two loops, but not the last since the iter_args dont line up +func.func @tensor_loops_first_two(%arg0 : tensor, %arg1 : tensor, %lb0 : index, %ub0 : index, %step0 : index, + %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor, tensor) { + %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor, tensor) { + %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor, tensor) { + %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg5, %arg7 = %arg4) -> (tensor, tensor) { + %3:2 = "use"(%arg3, %i, %j, %k) : (tensor, index, index, index) -> (tensor, tensor) + scf.yield %3#0, %3#1 : tensor, tensor + } + scf.yield %2#0, %2#1 : tensor, tensor + } + scf.yield %1#0, %1#1 : tensor, tensor + } {coalesce} + return %0#0, %0#1 : tensor, tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> + %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.yield + } +} +// CHECK: func.func @tensor_loops_first_two( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index +// CHECK: scf.for +// CHECK: arith.remsi +// CHECK: arith.divsi +// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]] +// CHECK-NOT: scf.for +// CHECK: transform.named_sequence + +// ----- + +// Coalesce only first two loops, but not the last since the yields dont match up +func.func @tensor_loops_first_two_2(%arg0 : tensor, %arg1 : tensor, %lb0 : index, %ub0 : index, %step0 : index, + %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor, tensor) { + %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor, tensor) { + %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor, tensor) { + %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg4, %arg7 = %arg5) -> (tensor, tensor) { + %3:2 = "use"(%arg3, %i, %j, %k) : (tensor, index, index, index) -> (tensor, tensor) + scf.yield %3#0, %3#1 : tensor, tensor + } + scf.yield %2#1, %2#0 : tensor, tensor + } + scf.yield %1#0, %1#1 : tensor, tensor + } {coalesce} + return %0#0, %0#1 : tensor, tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> + %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.yield + } +} +// CHECK: func.func @tensor_loops_first_two_2( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index +// CHECK: scf.for +// CHECK: arith.remsi +// CHECK: arith.divsi +// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]] +// CHECK-NOT: scf.for +// CHECK: transform.named_sequence + +// ----- + +// Coalesce only last two loops, but not the first since the yields dont match up +func.func @tensor_loops_last_two(%arg0 : tensor, %arg1 : tensor, %lb0 : index, %ub0 : index, %step0 : index, + %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor, tensor) { + %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor, tensor) { + %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor, tensor) { + %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg4, %arg7 = %arg5) -> (tensor, tensor) { + %3:2 = "use"(%arg3, %i, %j, %k) : (tensor, index, index, index) -> (tensor, tensor) + scf.yield %3#0, %3#1 : tensor, tensor + } + scf.yield %2#0, %2#1 : tensor, tensor + } + scf.yield %1#1, %1#0 : tensor, tensor + } {coalesce} + return %0#0, %0#1 : tensor, tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> + %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.yield + } +} +// CHECK: func.func @tensor_loops_last_two( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index +// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB0]] to %[[UB0]] step %[[STEP0]] +// CHECK: arith.subi +// CHECK: arith.ceildivsi +// CHECK: arith.subi +// CHECK: arith.ceildivsi +// CHECK: scf.for +// CHECK: arith.remsi +// CHECK: arith.divsi +// CHECK-NOT: scf.for +// CHECK: transform.named_sequence + diff --git a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir index e4d7141592499..0ea06737886d4 100644 --- a/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir +++ b/mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir @@ -1,5 +1,5 @@ -// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \ -// RUN: -split-input-file | FileCheck %s +// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \ +// RUN: -verify-diagnostics -split-input-file | FileCheck %s // CHECK-LABEL: func @scf_for( // CHECK-SAME: %[[a:.*]]: index, %[[b:.*]]: index, %[[c:.*]]: index @@ -104,3 +104,118 @@ func.func @scf_for_swapping_yield(%t1: tensor, %t2: tensor, %a: in "test.some_use"(%reify1) : (index) -> () return } + +// ----- + +// CHECK-LABEL: func @scf_if_constant( +func.func @scf_if_constant(%c : i1) { + // CHECK: arith.constant 4 : index + // CHECK: arith.constant 9 : index + %c4 = arith.constant 4 : index + %c9 = arith.constant 9 : index + %r = scf.if %c -> index { + scf.yield %c4 : index + } else { + scf.yield %c9 : index + } + + // CHECK: %[[c4:.*]] = arith.constant 4 : index + // CHECK: %[[c10:.*]] = arith.constant 10 : index + %reify1 = "test.reify_bound"(%r) {type = "LB"} : (index) -> (index) + %reify2 = "test.reify_bound"(%r) {type = "UB"} : (index) -> (index) + // CHECK: "test.some_use"(%[[c4]], %[[c10]]) + "test.some_use"(%reify1, %reify2) : (index, index) -> () + return +} + +// ----- + +// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 + s1)> +// CHECK: #[[$map1:.*]] = affine_map<()[s0, s1] -> (s0 + s1 + 5)> +// CHECK-LABEL: func @scf_if_dynamic( +// CHECK-SAME: %[[a:.*]]: index, %[[b:.*]]: index, %{{.*}}: i1) +func.func @scf_if_dynamic(%a: index, %b: index, %c : i1) { + %c4 = arith.constant 4 : index + %r = scf.if %c -> index { + %add1 = arith.addi %a, %b : index + scf.yield %add1 : index + } else { + %add2 = arith.addi %b, %c4 : index + %add3 = arith.addi %add2, %a : index + scf.yield %add3 : index + } + + // CHECK: %[[lb:.*]] = affine.apply #[[$map]]()[%[[a]], %[[b]]] + // CHECK: %[[ub:.*]] = affine.apply #[[$map1]]()[%[[a]], %[[b]]] + %reify1 = "test.reify_bound"(%r) {type = "LB"} : (index) -> (index) + %reify2 = "test.reify_bound"(%r) {type = "UB"} : (index) -> (index) + // CHECK: "test.some_use"(%[[lb]], %[[ub]]) + "test.some_use"(%reify1, %reify2) : (index, index) -> () + return +} + +// ----- + +func.func @scf_if_no_affine_bound(%a: index, %b: index, %c : i1) { + %r = scf.if %c -> index { + scf.yield %a : index + } else { + scf.yield %b : index + } + // The reified bound would be min(%a, %b). min/max expressions are not + // supported in reified bounds. + // expected-error @below{{could not reify bound}} + %reify1 = "test.reify_bound"(%r) {type = "LB"} : (index) -> (index) + "test.some_use"(%reify1) : (index) -> () + return +} + +// ----- + +// CHECK-LABEL: func @scf_if_tensor_dim( +func.func @scf_if_tensor_dim(%c : i1) { + // CHECK: arith.constant 4 : index + // CHECK: arith.constant 9 : index + %c4 = arith.constant 4 : index + %c9 = arith.constant 9 : index + %t1 = tensor.empty(%c4) : tensor + %t2 = tensor.empty(%c9) : tensor + %r = scf.if %c -> tensor { + scf.yield %t1 : tensor + } else { + scf.yield %t2 : tensor + } + + // CHECK: %[[c4:.*]] = arith.constant 4 : index + // CHECK: %[[c10:.*]] = arith.constant 10 : index + %reify1 = "test.reify_bound"(%r) {type = "LB", dim = 0} + : (tensor) -> (index) + %reify2 = "test.reify_bound"(%r) {type = "UB", dim = 0} + : (tensor) -> (index) + // CHECK: "test.some_use"(%[[c4]], %[[c10]]) + "test.some_use"(%reify1, %reify2) : (index, index) -> () + return +} + +// ----- + +// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 + s1)> +// CHECK-LABEL: func @scf_if_eq( +// CHECK-SAME: %[[a:.*]]: index, %[[b:.*]]: index, %{{.*}}: i1) +func.func @scf_if_eq(%a: index, %b: index, %c : i1) { + %c0 = arith.constant 0 : index + %r = scf.if %c -> index { + %add1 = arith.addi %a, %b : index + scf.yield %add1 : index + } else { + %add2 = arith.addi %b, %c0 : index + %add3 = arith.addi %add2, %a : index + scf.yield %add3 : index + } + + // CHECK: %[[eq:.*]] = affine.apply #[[$map]]()[%[[a]], %[[b]]] + %reify1 = "test.reify_bound"(%r) {type = "EQ"} : (index) -> (index) + // CHECK: "test.some_use"(%[[eq]]) + "test.some_use"(%reify1) : (index) -> () + return +} diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir index 212541c79565b..22be78cd68205 100644 --- a/mlir/test/Dialect/Vector/linearize.mlir +++ b/mlir/test/Dialect/Vector/linearize.mlir @@ -153,14 +153,3 @@ func.func @test_0d_vector() -> vector { // ALL: return %[[CST]] return %0 : vector } - -// ----- - -func.func @test_scalable_no_linearize(%arg0: vector<2x[2]xf32>) -> vector<2x[2]xf32> { - // expected-error@+1 {{failed to legalize operation 'arith.constant' that was explicitly marked illegal}} - %0 = arith.constant dense<[[1., 1.], [3., 3.]]> : vector<2x[2]xf32> - %1 = math.sin %arg0 : vector<2x[2]xf32> - %2 = arith.addf %0, %1 : vector<2x[2]xf32> - - return %2 : vector<2x[2]xf32> -} diff --git a/mlir/test/Dialect/Vector/vector-multi-reduction-pass-lowering.mlir b/mlir/test/Dialect/Vector/vector-multi-reduction-pass-lowering.mlir new file mode 100644 index 0000000000000..4cb6fba9b691a --- /dev/null +++ b/mlir/test/Dialect/Vector/vector-multi-reduction-pass-lowering.mlir @@ -0,0 +1,45 @@ +// RUN: mlir-opt -lower-vector-multi-reduction="lowering-strategy=inner-reduction" -split-input-file %s | FileCheck %s --check-prefixes=ALL,INNER-REDUCTION +// RUN: mlir-opt -lower-vector-multi-reduction="lowering-strategy=inner-parallel" -split-input-file %s | FileCheck %s --check-prefixes=ALL,INNER-PARALLEL +// RUN: mlir-opt -lower-vector-multi-reduction -split-input-file %s | FileCheck %s --check-prefixes=ALL,INNER-PARALLEL + +func.func @vector_multi_reduction(%arg0: vector<2x4xf32>, %acc: vector<2xf32>) -> vector<2xf32> { + %0 = vector.multi_reduction , %arg0, %acc [1] : vector<2x4xf32> to vector<2xf32> + return %0 : vector<2xf32> +} +// ALL-LABEL: func @vector_multi_reduction +// ALL-SAME: %[[INPUT:.+]]: vector<2x4xf32>, %[[ACC:.*]]: vector<2xf32>) +// INNER-REDUCTION-DAG: %[[RESULT_VEC_0:.+]] = arith.constant dense<{{.*}}> : vector<2xf32> +// INNER-REDUCTION-DAG: %[[C0:.+]] = arith.constant 0 : index +// INNER-REDUCTION-DAG: %[[C1:.+]] = arith.constant 1 : index +// INNER-REDUCTION: %[[V0:.+]] = vector.extract %[[INPUT]][0] +// INNER-REDUCTION: %[[ACC0:.+]] = vector.extract %[[ACC]][0] +// INNER-REDUCTION: %[[RV0:.+]] = vector.reduction , %[[V0]], %[[ACC0]] : vector<4xf32> into f32 +// INNER-REDUCTION: %[[RESULT_VEC_1:.+]] = vector.insertelement %[[RV0:.+]], %[[RESULT_VEC_0]][%[[C0]] : index] : vector<2xf32> +// INNER-REDUCTION: %[[V1:.+]] = vector.extract %[[INPUT]][1] +// INNER-REDUCTION: %[[ACC1:.+]] = vector.extract %[[ACC]][1] +// INNER-REDUCTION: %[[RV1:.+]] = vector.reduction , %[[V1]], %[[ACC1]] : vector<4xf32> into f32 +// INNER-REDUCTION: %[[RESULT_VEC:.+]] = vector.insertelement %[[RV1:.+]], %[[RESULT_VEC_1]][%[[C1]] : index] : vector<2xf32> +// INNER-REDUCTION: return %[[RESULT_VEC]] + +// INNER-PARALLEL: %[[TRANSPOSED:.+]] = vector.transpose %[[INPUT]], [1, 0] : vector<2x4xf32> to vector<4x2xf32> +// INNER-PARALLEL: %[[V0:.+]] = vector.extract %[[TRANSPOSED]][0] : vector<2xf32> from vector<4x2xf32> +// INNER-PARALLEL: %[[RV0:.+]] = arith.mulf %[[V0]], %[[ACC]] : vector<2xf32> +// INNER-PARALLEL: %[[V1:.+]] = vector.extract %[[TRANSPOSED]][1] : vector<2xf32> from vector<4x2xf32> +// INNER-PARALLEL: %[[RV01:.+]] = arith.mulf %[[V1]], %[[RV0]] : vector<2xf32> +// INNER-PARALLEL: %[[V2:.+]] = vector.extract %[[TRANSPOSED]][2] : vector<2xf32> from vector<4x2xf32> +// INNER-PARALLEL: %[[RV012:.+]] = arith.mulf %[[V2]], %[[RV01]] : vector<2xf32> +// INNER-PARALLEL: %[[V3:.+]] = vector.extract %[[TRANSPOSED]][3] : vector<2xf32> from vector<4x2xf32> +// INNER-PARALLEL: %[[RESULT_VEC:.+]] = arith.mulf %[[V3]], %[[RV012]] : vector<2xf32> +// INNER-PARALLEL: return %[[RESULT_VEC]] : vector<2xf32> + +// ----- + +func.func @vector_multi_reduction_parallel_middle(%arg0: vector<3x4x5xf32>, %acc: vector<4xf32>) -> vector<4xf32> { + %0 = vector.multi_reduction , %arg0, %acc [0, 2] : vector<3x4x5xf32> to vector<4xf32> + return %0 : vector<4xf32> +} + +// ALL-LABEL: func @vector_multi_reduction_parallel_middle +// ALL-SAME: %[[INPUT:.+]]: vector<3x4x5xf32>, %[[ACC:.+]]: vector<4xf32> +// INNER-REDUCTION: vector.transpose %[[INPUT]], [1, 0, 2] : vector<3x4x5xf32> to vector<4x3x5xf32> +// INNER-PARALLEL: vector.transpose %[[INPUT]], [0, 2, 1] : vector<3x4x5xf32> to vector<3x5x4xf32> diff --git a/mlir/test/IR/affine-walk.mlir b/mlir/test/IR/affine-walk.mlir index 1de675ac70be2..0ee7abf9415cf 100644 --- a/mlir/test/IR/affine-walk.mlir +++ b/mlir/test/IR/affine-walk.mlir @@ -7,3 +7,8 @@ "test.check_first_mod"() {"map" = #map} : () -> () // expected-remark@-1 {{mod expression}} + +#map_rhs_mod = affine_map<(i, j) -> (i + i mod 2, j)> + +"test.check_first_mod"() {"map" = #map_rhs_mod} : () -> () +// expected-remark@-1 {{mod expression}} diff --git a/mlir/test/Target/LLVMIR/Import/debug-info.ll b/mlir/test/Target/LLVMIR/Import/debug-info.ll index 959a5a1cd9717..245cf300d2c1a 100644 --- a/mlir/test/Target/LLVMIR/Import/debug-info.ll +++ b/mlir/test/Target/LLVMIR/Import/debug-info.ll @@ -197,7 +197,7 @@ define void @composite_type() !dbg !3 { ; // ----- ; CHECK-DAG: #[[FILE:.+]] = #llvm.di_file<"debug-info.ll" in "/"> -; CHECK-DAG: #[[CU:.+]] = #llvm.di_compile_unit, sourceLanguage = DW_LANG_C, file = #[[FILE]], isOptimized = false, emissionKind = None> +; CHECK-DAG: #[[CU:.+]] = #llvm.di_compile_unit, sourceLanguage = DW_LANG_C, file = #[[FILE]], isOptimized = false, emissionKind = None, nameTableKind = None> ; Verify an empty subroutine types list is supported. ; CHECK-DAG: #[[SP_TYPE:.+]] = #llvm.di_subroutine_type ; CHECK-DAG: #[[SP:.+]] = #llvm.di_subprogram, compileUnit = #[[CU]], scope = #[[FILE]], name = "subprogram", linkageName = "subprogram", file = #[[FILE]], line = 42, scopeLine = 42, subprogramFlags = Definition, type = #[[SP_TYPE]]> @@ -209,7 +209,7 @@ define void @subprogram() !dbg !3 { !llvm.dbg.cu = !{!1} !llvm.module.flags = !{!0} !0 = !{i32 2, !"Debug Info Version", i32 3} -!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2) +!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2, nameTableKind: None) !2 = !DIFile(filename: "debug-info.ll", directory: "/") !3 = distinct !DISubprogram(name: "subprogram", linkageName: "subprogram", scope: !2, file: !2, line: 42, scopeLine: 42, spFlags: DISPFlagDefinition, unit: !1, type: !4) !4 = !DISubroutineType(cc: DW_CC_normal, types: !5) @@ -607,3 +607,146 @@ declare !dbg !1 void @declaration() !0 = !{i32 2, !"Debug Info Version", i32 3} !1 = !DISubprogram(name: "declaration", scope: !2, file: !2, flags: DIFlagPrototyped, spFlags: 0) !2 = !DIFile(filename: "debug-info.ll", directory: "/") + +; // ----- + +; Ensure that repeated occurence of recursive subtree does not result in +; duplicate MLIR entries. +; +; +--> B:B1 ----+ +; | ^ v +; A <---+------ B +; | v ^ +; +--> B:B2 ----+ +; This should result in only one B instance. + +; CHECK-DAG: #[[B1_INNER:.+]] = #llvm.di_derived_type<{{.*}}name = "B:B1", baseType = #[[B_SELF:.+]]> +; CHECK-DAG: #[[B2_INNER:.+]] = #llvm.di_derived_type<{{.*}}name = "B:B2", baseType = #[[B_SELF]]> +; CHECK-DAG: #[[B_INNER:.+]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID:.+]], {{.*}}name = "B", {{.*}}elements = #[[B1_INNER]], #[[B2_INNER]] + +; CHECK-DAG: #[[B1_OUTER:.+]] = #llvm.di_derived_type<{{.*}}name = "B:B1", baseType = #[[B_INNER]]> +; CHECK-DAG: #[[B2_OUTER:.+]] = #llvm.di_derived_type<{{.*}}name = "B:B2", baseType = #[[B_INNER]]> +; CHECK-DAG: #[[A_OUTER:.+]] = #llvm.di_composite_type<{{.*}}recId = [[A_RECID:.+]], {{.*}}name = "A", {{.*}}elements = #[[B1_OUTER]], #[[B2_OUTER]] + +; CHECK-DAG: #[[A_SELF:.+]] = #llvm.di_composite_type<{{.*}}recId = [[A_RECID]] +; CHECK-DAG: #[[B_SELF:.+]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID]] + +; CHECK: #llvm.di_subprogram<{{.*}}scope = #[[A_OUTER]] + +define void @class_field(ptr %arg1) !dbg !18 { + ret void +} + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2) +!2 = !DIFile(filename: "debug-info.ll", directory: "/") + +!3 = !DICompositeType(tag: DW_TAG_class_type, name: "A", file: !2, line: 42, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !4) +!4 = !{!7, !8} + +!5 = !DICompositeType(tag: DW_TAG_class_type, name: "B", scope: !3, file: !2, line: 42, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !9) +!7 = !DIDerivedType(tag: DW_TAG_member, name: "B:B1", file: !2, baseType: !5) +!8 = !DIDerivedType(tag: DW_TAG_member, name: "B:B2", file: !2, baseType: !5) +!9 = !{!7, !8} + +!18 = distinct !DISubprogram(name: "A", scope: !3, file: !2, spFlags: DISPFlagDefinition, unit: !1) + +; // ----- + +; Ensure that recursive cycles with multiple entry points are cached correctly. +; +; +---- A ----+ +; v v +; B <-------> C +; This should result in a cached instance of B --> C --> B_SELF to be reused +; when visiting B from C (after visiting B from A). + +; CHECK-DAG: #[[A:.+]] = #llvm.di_composite_type<{{.*}}name = "A", {{.*}}elements = #[[TO_B_OUTER:.+]], #[[TO_C_OUTER:.+]]> +; CHECK-DAG: #llvm.di_subprogram<{{.*}}scope = #[[A]], + +; CHECK-DAG: #[[TO_B_OUTER]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_OUTER:.+]]> +; CHECK-DAG: #[[B_OUTER]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID:.+]], {{.*}}name = "B", {{.*}}elements = #[[TO_C_INNER:.+]]> +; CHECK-DAG: #[[TO_C_INNER]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_INNER:.+]]> +; CHECK-DAG: #[[C_INNER]] = #llvm.di_composite_type<{{.*}}name = "C", {{.*}}elements = #[[TO_B_SELF:.+]]> +; CHECK-DAG: #[[TO_B_SELF]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_SELF:.+]]> +; CHECK-DAG: #[[B_SELF]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID]]> + +; CHECK-DAG: #[[TO_C_OUTER]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_OUTER:.+]]> +; CHECK-DAG: #[[C_OUTER]] = #llvm.di_composite_type<{{.*}}name = "C", {{.*}}elements = #[[TO_B_OUTER]]> + +define void @class_field(ptr %arg1) !dbg !18 { + ret void +} + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2) +!2 = !DIFile(filename: "debug-info.ll", directory: "/") + +!3 = !DICompositeType(tag: DW_TAG_class_type, name: "A", file: !2, line: 42, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !4) +!5 = !DICompositeType(tag: DW_TAG_class_type, name: "B", file: !2, line: 42, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !10) +!6 = !DICompositeType(tag: DW_TAG_class_type, name: "C", file: !2, line: 42, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !9) + +!7 = !DIDerivedType(tag: DW_TAG_member, name: "->B", file: !2, baseType: !5) +!8 = !DIDerivedType(tag: DW_TAG_member, name: "->C", file: !2, baseType: !6) +!4 = !{!7, !8} +!9 = !{!7} +!10 = !{!8} + +!18 = distinct !DISubprogram(name: "SP", scope: !3, file: !2, spFlags: DISPFlagDefinition, unit: !1) + +; // ----- + +; Ensures that replacing a nested mutually recursive decl does not result in +; nested duplicate recursive decls. +; +; A ---> B <--> C +; ^ ^ +; +-------------+ + +; CHECK-DAG: #[[A:.+]] = #llvm.di_composite_type<{{.*}}recId = [[A_RECID:.+]], {{.*}}name = "A", {{.*}}elements = #[[A_TO_B:.+]], #[[A_TO_C:.+]]> +; CHECK-DAG: #llvm.di_subprogram<{{.*}}scope = #[[A]], +; CHECK-DAG: #[[A_TO_B]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_FROM_A:.+]]> +; CHECK-DAG: #[[A_TO_C]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_FROM_A:.+]]> + +; CHECK-DAG: #[[B_FROM_A]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID:.+]], {{.*}}name = "B", {{.*}}elements = #[[B_TO_C:.+]]> +; CHECK-DAG: #[[B_TO_C]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_FROM_B:.+]]> +; CHECK-DAG: #[[C_FROM_B]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID:.+]], {{.*}}name = "C", {{.*}}elements = #[[TO_A_SELF:.+]], #[[TO_B_SELF:.+]], #[[TO_C_SELF:.+]]> + +; CHECK-DAG: #[[C_FROM_A]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID]], {{.*}}name = "C", {{.*}}elements = #[[TO_A_SELF]], #[[TO_B_INNER:.+]], #[[TO_C_SELF]] +; CHECK-DAG: #[[TO_B_INNER]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_INNER:.+]]> +; CHECK-DAG: #[[B_INNER]] = #llvm.di_composite_type<{{.*}}name = "B", {{.*}}elements = #[[TO_C_SELF]]> + +; CHECK-DAG: #[[TO_A_SELF]] = #llvm.di_derived_type<{{.*}}name = "->A", {{.*}}baseType = #[[A_SELF:.+]]> +; CHECK-DAG: #[[TO_B_SELF]] = #llvm.di_derived_type<{{.*}}name = "->B", {{.*}}baseType = #[[B_SELF:.+]]> +; CHECK-DAG: #[[TO_C_SELF]] = #llvm.di_derived_type<{{.*}}name = "->C", {{.*}}baseType = #[[C_SELF:.+]]> +; CHECK-DAG: #[[A_SELF]] = #llvm.di_composite_type<{{.*}}recId = [[A_RECID]]> +; CHECK-DAG: #[[B_SELF]] = #llvm.di_composite_type<{{.*}}recId = [[B_RECID]]> +; CHECK-DAG: #[[C_SELF]] = #llvm.di_composite_type<{{.*}}recId = [[C_RECID]]> + +define void @class_field(ptr %arg1) !dbg !18 { + ret void +} + +!llvm.dbg.cu = !{!1} +!llvm.module.flags = !{!0} +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = distinct !DICompileUnit(language: DW_LANG_C, file: !2) +!2 = !DIFile(filename: "debug-info.ll", directory: "/") + +!3 = !DICompositeType(tag: DW_TAG_class_type, name: "A", file: !2, line: 42, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !9) +!4 = !DICompositeType(tag: DW_TAG_class_type, name: "B", file: !2, line: 42, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !10) +!5 = !DICompositeType(tag: DW_TAG_class_type, name: "C", file: !2, line: 42, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !11) + +!6 = !DIDerivedType(tag: DW_TAG_member, name: "->A", file: !2, baseType: !3) +!7 = !DIDerivedType(tag: DW_TAG_member, name: "->B", file: !2, baseType: !4) +!8 = !DIDerivedType(tag: DW_TAG_member, name: "->C", file: !2, baseType: !5) + +!9 = !{!7, !8} ; A -> B, C +!10 = !{!8} ; B -> C +!11 = !{!6, !7, !8} ; C -> A, B, C + +!18 = distinct !DISubprogram(name: "SP", scope: !3, file: !2, spFlags: DISPFlagDefinition, unit: !1) diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index 0cefb4f8983aa..81a6eadbadd3f 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -641,10 +641,12 @@ define void @expect_with_probability(i16 %0) { ret void } +@tls_var = dso_local thread_local global i32 0, align 4 + ; CHECK-LABEL: llvm.func @threadlocal_test -define void @threadlocal_test(ptr %0) { +define void @threadlocal_test() { ; CHECK: "llvm.intr.threadlocal.address"(%{{.*}}) : (!llvm.ptr) -> !llvm.ptr - %local = call ptr @llvm.threadlocal.address.p0(ptr %0) + %local = call ptr @llvm.threadlocal.address.p0(ptr @tls_var) ret void } diff --git a/mlir/test/Target/LLVMIR/Import/test.ll b/mlir/test/Target/LLVMIR/Import/test.ll new file mode 100644 index 0000000000000..a3165d6020104 --- /dev/null +++ b/mlir/test/Target/LLVMIR/Import/test.ll @@ -0,0 +1,11 @@ +; RUN: mlir-translate -test-import-llvmir %s | FileCheck %s + +; CHECK-LABEL: @custom_load +; CHECK-SAME: %[[PTR:[[:alnum:]]+]] +define double @custom_load(ptr %ptr) { + ; CHECK: %[[LOAD:[0-9]+]] = llvm.load %[[PTR]] : !llvm.ptr -> f64 + ; CHECK: %[[TEST:[0-9]+]] = "test.same_operand_element_type"(%[[LOAD]], %[[LOAD]]) : (f64, f64) -> f64 + %1 = load double, ptr %ptr + ; CHECK: llvm.return %[[TEST]] : f64 + ret double %1 +} diff --git a/mlir/test/Target/LLVMIR/llvmir-debug.mlir b/mlir/test/Target/LLVMIR/llvmir-debug.mlir index 785a525caab8c..f4c18bf6bd53c 100644 --- a/mlir/test/Target/LLVMIR/llvmir-debug.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-debug.mlir @@ -37,7 +37,8 @@ llvm.func @func_no_debug() { > #cu = #llvm.di_compile_unit< id = distinct[0]<>, sourceLanguage = DW_LANG_C, file = #file, - producer = "MLIR", isOptimized = true, emissionKind = Full + producer = "MLIR", isOptimized = true, emissionKind = Full, + nameTableKind = None > #composite = #llvm.di_composite_type< tag = DW_TAG_structure_type, name = "composite", file = #file, @@ -127,7 +128,7 @@ llvm.func @empty_types() { llvm.return } loc(fused<#sp1>["foo.mlir":2:1]) -// CHECK: ![[CU_LOC:.*]] = distinct !DICompileUnit(language: DW_LANG_C, file: ![[CU_FILE_LOC:.*]], producer: "MLIR", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +// CHECK: ![[CU_LOC:.*]] = distinct !DICompileUnit(language: DW_LANG_C, file: ![[CU_FILE_LOC:.*]], producer: "MLIR", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, nameTableKind: None) // CHECK: ![[CU_FILE_LOC]] = !DIFile(filename: "foo.mlir", directory: "/test/") // CHECK: ![[FUNC_LOC]] = distinct !DISubprogram(name: "func_with_debug", linkageName: "func_with_debug", scope: ![[NESTED_NAMESPACE:.*]], file: ![[CU_FILE_LOC]], line: 3, type: ![[FUNC_TYPE:.*]], scopeLine: 3, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: ![[CU_LOC]]) @@ -423,3 +424,31 @@ llvm.mlir.global @global_variable() {dbg_expr = #di_global_variable_expression} // CHECK: ![[SCOPE]] = !DISubprogram({{.*}}type: ![[SUBROUTINE:[0-9]+]], // CHECK: ![[SUBROUTINE]] = !DISubroutineType(types: ![[SR_TYPES:[0-9]+]]) // CHECK: ![[SR_TYPES]] = !{![[COMP]]} + +// ----- + +// Ensures nested recursive decls work. +// The output should be identical to if the inner composite type decl was +// replaced with the recursive self reference. + +#di_file = #llvm.di_file<"test.mlir" in "/"> +#di_composite_type_self = #llvm.di_composite_type> + +#di_subroutine_type_inner = #llvm.di_subroutine_type +#di_subprogram_inner = #llvm.di_subprogram +#di_composite_type_inner = #llvm.di_composite_type, scope = #di_subprogram_inner> + +#di_subroutine_type = #llvm.di_subroutine_type +#di_subprogram = #llvm.di_subprogram +#di_composite_type = #llvm.di_composite_type, scope = #di_subprogram> + +#di_global_variable = #llvm.di_global_variable +#di_global_variable_expression = #llvm.di_global_variable_expression + +llvm.mlir.global @global_variable() {dbg_expr = #di_global_variable_expression} : !llvm.struct<()> + +// CHECK: distinct !DIGlobalVariable({{.*}}type: ![[COMP:[0-9]+]], +// CHECK: ![[COMP]] = distinct !DICompositeType({{.*}}scope: ![[SCOPE:[0-9]+]], +// CHECK: ![[SCOPE]] = !DISubprogram({{.*}}type: ![[SUBROUTINE:[0-9]+]], +// CHECK: ![[SUBROUTINE]] = !DISubroutineType(types: ![[SR_TYPES:[0-9]+]]) +// CHECK: ![[SR_TYPES]] = !{![[COMP]]} diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index 0013522582a72..db5184a63d983 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -576,10 +576,13 @@ llvm.func @expect_with_probability(%arg0: i16) { llvm.return } +llvm.mlir.global external thread_local @tls_var(0 : i32) {addr_space = 0 : i32, alignment = 4 : i64, dso_local} : i32 + // CHECK-LABEL: @threadlocal_test -llvm.func @threadlocal_test(%arg0 : !llvm.ptr) { - // CHECK: call ptr @llvm.threadlocal.address.p0(ptr %{{.*}}) - "llvm.intr.threadlocal.address"(%arg0) : (!llvm.ptr) -> !llvm.ptr +llvm.func @threadlocal_test() { + // CHECK: call ptr @llvm.threadlocal.address.p0(ptr @tls_var) + %0 = llvm.mlir.addressof @tls_var : !llvm.ptr + "llvm.intr.threadlocal.address"(%0) : (!llvm.ptr) -> !llvm.ptr llvm.return } diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir index 2baa20010d055..244c0315c2dbc 100644 --- a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-host.mlir @@ -6,15 +6,15 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe // CHECK-DAG: @_QMtest_0Earray_1d = global [3 x i32] [i32 1, i32 2, i32 3] // CHECK-DAG: @_QMtest_0Earray_1d_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Earray_1d - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Earray_1d_decl_tgt_ref_ptr\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Earray_1d_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Earray_1d_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Earray_1d_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Earray_1d_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Earray_1d_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Earray_1d_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Earray_1d(dense<[1, 2, 3]> : tensor<3xi32>) {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.array<3 x i32> // CHECK-DAG: @_QMtest_0Earray_2d = global [2 x [2 x i32]] {{.*}} // CHECK-DAG: @_QMtest_0Earray_2d_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Earray_2d - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Earray_2d_decl_tgt_ref_ptr\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Earray_2d_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Earray_2d_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Earray_2d_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Earray_2d_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Earray_2d_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Earray_2d_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Earray_2d() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.array<2 x array<2 x i32>> { %0 = llvm.mlir.undef : !llvm.array<2 x array<2 x i32>> @@ -33,8 +33,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe // CHECK-DAG: @_QMtest_0Edata_extended_link_1 = global float 2.000000e+00 // CHECK-DAG: @_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_extended_link_1 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [48 x i8] c"_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [48 x i8] c"_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_link_1_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_extended_link_1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { %0 = llvm.mlir.constant(2.000000e+00 : f32) : f32 @@ -43,8 +43,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe // CHECK-DAG: @_QMtest_0Edata_extended_link_2 = global float 3.000000e+00 // CHECK-DAG: @_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_extended_link_2 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [48 x i8] c"_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [48 x i8] c"_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_link_2_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_extended_link_2() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { %0 = llvm.mlir.constant(3.000000e+00 : f32) : f32 @@ -52,8 +52,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe } // CHECK-DAG: @_QMtest_0Edata_extended_to_1 = global float 2.000000e+00 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [29 x i8] c"_QMtest_0Edata_extended_to_1\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_to_1 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_to_1, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [29 x i8] c"_QMtest_0Edata_extended_to_1\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_extended_to_1 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_to_1, ptr @.offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_to_1", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_extended_to_1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { %0 = llvm.mlir.constant(2.000000e+00 : f32) : f32 @@ -61,8 +61,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe } // CHECK-DAG: @_QMtest_0Edata_extended_enter_1 = global float 2.000000e+00 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [32 x i8] c"_QMtest_0Edata_extended_enter_1\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_enter_1 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_enter_1, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [32 x i8] c"_QMtest_0Edata_extended_enter_1\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_extended_enter_1 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_enter_1, ptr @.offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_enter_1", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_extended_enter_1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { %0 = llvm.mlir.constant(2.000000e+00 : f32) : f32 @@ -70,8 +70,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe } // CHECK-DAG: @_QMtest_0Edata_extended_to_2 = global float 3.000000e+00 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [29 x i8] c"_QMtest_0Edata_extended_to_2\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_to_2 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_to_2, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [29 x i8] c"_QMtest_0Edata_extended_to_2\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_extended_to_2 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_to_2, ptr @.offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_to_2", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_extended_to_2() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { %0 = llvm.mlir.constant(3.000000e+00 : f32) : f32 @@ -79,8 +79,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe } // CHECK-DAG: @_QMtest_0Edata_extended_enter_2 = global float 3.000000e+00 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [32 x i8] c"_QMtest_0Edata_extended_enter_2\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_extended_enter_2 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_enter_2, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [32 x i8] c"_QMtest_0Edata_extended_enter_2\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_extended_enter_2 = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_extended_enter_2, ptr @.offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_extended_enter_2", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_extended_enter_2() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : f32 { %0 = llvm.mlir.constant(3.000000e+00 : f32) : f32 @@ -89,8 +89,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe // CHECK-DAG: @_QMtest_0Edata_int = global i32 1 // CHECK-DAG: @_QMtest_0Edata_int_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Edata_int - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Edata_int_decl_tgt_ref_ptr\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Edata_int_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_int_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_int() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { %0 = llvm.mlir.constant(10 : i32) : i32 @@ -98,8 +98,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe } // CHECK-DAG: @_QMtest_0Edata_int_clauseless_to = global i32 1 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [33 x i8] c"_QMtest_0Edata_int_clauseless_to\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_clauseless_to = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_clauseless_to, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [33 x i8] c"_QMtest_0Edata_int_clauseless_to\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_int_clauseless_to = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_clauseless_to, ptr @.offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_clauseless_to", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_int_clauseless_to() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { %0 = llvm.mlir.constant(1 : i32) : i32 @@ -107,8 +107,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe } // CHECK-DAG: @_QMtest_0Edata_int_clauseless_enter = global i32 1 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Edata_int_clauseless_enter\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_clauseless_enter = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_clauseless_enter, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [36 x i8] c"_QMtest_0Edata_int_clauseless_enter\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_int_clauseless_enter = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_clauseless_enter, ptr @.offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_clauseless_enter", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_int_clauseless_enter() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { %0 = llvm.mlir.constant(1 : i32) : i32 @@ -116,8 +116,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe } // CHECK-DAG: @_QMtest_0Edata_int_to = global i32 5 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [22 x i8] c"_QMtest_0Edata_int_to\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_to = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_to, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [22 x i8] c"_QMtest_0Edata_int_to\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_int_to = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_to, ptr @.offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_to", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_int_to() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { %0 = llvm.mlir.constant(5 : i32) : i32 @@ -125,8 +125,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe } // CHECK-DAG: @_QMtest_0Edata_int_enter = global i32 5 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [25 x i8] c"_QMtest_0Edata_int_enter\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Edata_int_enter = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_enter, ptr @.omp_offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [25 x i8] c"_QMtest_0Edata_int_enter\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Edata_int_enter = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Edata_int_enter, ptr @.offloading.entry_name{{.*}}, i64 4, i32 0, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Edata_int_enter", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Edata_int_enter() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { %0 = llvm.mlir.constant(5 : i32) : i32 @@ -135,8 +135,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe // CHECK-DAG: @_QMtest_0Ept1 = global { ptr, i64, i32, i8, i8, i8, i8 } { ptr null, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20180515, i8 0, i8 9, i8 1, i8 0 } // CHECK-DAG: @_QMtest_0Ept1_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Ept1 - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [31 x i8] c"_QMtest_0Ept1_decl_tgt_ref_ptr\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Ept1_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Ept1_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [31 x i8] c"_QMtest_0Ept1_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Ept1_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Ept1_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Ept1_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Ept1() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> { %0 = llvm.mlir.zero : !llvm.ptr @@ -166,8 +166,8 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_targe // CHECK-DAG: @_QMtest_0Ept2_tar = global i32 5 // CHECK-DAG: @_QMtest_0Ept2_tar_decl_tgt_ref_ptr = weak global ptr @_QMtest_0Ept2_tar - // CHECK-DAG: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [35 x i8] c"_QMtest_0Ept2_tar_decl_tgt_ref_ptr\00" - // CHECK-DAG: @.omp_offloading.entry._QMtest_0Ept2_tar_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Ept2_tar_decl_tgt_ref_ptr, ptr @.omp_offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 + // CHECK-DAG: @.offloading.entry_name{{.*}} = internal unnamed_addr constant [35 x i8] c"_QMtest_0Ept2_tar_decl_tgt_ref_ptr\00" + // CHECK-DAG: @.offloading.entry._QMtest_0Ept2_tar_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { ptr @_QMtest_0Ept2_tar_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 // CHECK-DAG: !{{.*}} = !{i32 {{.*}}, !"_QMtest_0Ept2_tar_decl_tgt_ref_ptr", i32 {{.*}}, i32 {{.*}}} llvm.mlir.global external @_QMtest_0Ept2_tar() {addr_space = 0 : i32, omp.declare_target = #omp.declaretarget} : i32 { %0 = llvm.mlir.constant(5 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir index 8ab50f05f0716..b0fe642238f14 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir @@ -4,10 +4,10 @@ // for nested omp do loop inside omp target region module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { - llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes { + llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget, target_cpu = "gfx90a", - target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]> - } { + target_features = #llvm.target_features<["+gfx9-insts", "+wavefrontsize64"]>} + { omp.parallel { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir new file mode 100644 index 0000000000000..3d18e608d857e --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-target-inside-task.mlir @@ -0,0 +1,41 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_target_device = true, omp.is_gpu = true} { + llvm.func @omp_target_region_() { + %0 = llvm.mlir.constant(20 : i32) : i32 + %1 = llvm.mlir.constant(10 : i32) : i32 + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr + llvm.store %1, %3 : i32, !llvm.ptr + llvm.store %0, %5 : i32, !llvm.ptr + omp.task { + %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} + omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): + %8 = llvm.load %arg0 : !llvm.ptr -> i32 + %9 = llvm.load %arg1 : !llvm.ptr -> i32 + %10 = llvm.add %8, %9 : i32 + llvm.store %10, %arg2 : i32, !llvm.ptr + omp.terminator + } + omp.terminator + } + llvm.return + } + + llvm.func @omp_target_no_map() { + omp.target { + omp.terminator + } + llvm.return + } +} + +// CHECK: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}_omp_target_region__l19 +// CHECK: ret void diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir index 96cced7a1d584..c5f89eb2c3274 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-llvm.mlir @@ -5,7 +5,7 @@ module attributes {omp.is_target_device = true} { llvm.func @foo(i32) - llvm.func @omp_target_teams_shared_simple(%arg0 : i32) { + llvm.func @omp_target_teams_shared_simple(%arg0 : i32) attributes {omp.declare_target = #omp.declaretarget} { omp.teams { llvm.call @foo(%arg0) : (i32) -> () omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index e246c551886cf..0d77423abcb4f 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -4,7 +4,7 @@ // for nested omp do loop with collapse clause inside omp target region module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { - llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) { + llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget} { %loop_ub = llvm.mlir.constant(99 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir index 220eb85b3483e..0f3f503dfa537 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop.mlir @@ -4,7 +4,7 @@ // for nested omp do loop inside omp target region module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { - llvm.func @target_wsloop(%arg0: !llvm.ptr ){ + llvm.func @target_wsloop(%arg0: !llvm.ptr ) attributes {omp.declare_target = #omp.declaretarget} { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 @@ -16,7 +16,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo llvm.return } - llvm.func @target_empty_wsloop(){ + llvm.func @target_empty_wsloop() attributes {omp.declare_target = #omp.declaretarget} { %loop_ub = llvm.mlir.constant(9 : i32) : i32 %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir new file mode 100644 index 0000000000000..d41429a6de066 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir @@ -0,0 +1,61 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This tests checks that a target op inside a data op +// We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash. +// CHECK: {{.*}} = add i32 {{.*}}, 1 +module attributes { } { + llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 + llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget} { + %0 = llvm.mlir.constant(99 : index) : i64 + %1 = llvm.mlir.constant(0 : index) : i64 + %2 = llvm.mlir.constant(1 : index) : i64 + %3 = llvm.mlir.constant(100 : index) : i64 + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "array_length"} : (i64) -> !llvm.ptr<5> + %6 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr + %7 = llvm.mlir.constant(1 : i64) : i64 + %8 = llvm.alloca %7 x i32 {bindc_name = "index_"} : (i64) -> !llvm.ptr<5> + %9 = llvm.addrspacecast %8 : !llvm.ptr<5> to !llvm.ptr + %10 = llvm.mlir.addressof @_QFEint_array : !llvm.ptr + %11 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%0 : i64) extent(%3 : i64) stride(%2 : i64) start_idx(%2 : i64) + %12 = omp.map.info var_ptr(%10 : !llvm.ptr, !llvm.array<100 x i32>) map_clauses(from) capture(ByRef) bounds(%11) -> !llvm.ptr {name = "int_array"} + omp.target_data map_entries(%12 : !llvm.ptr) { + %13 = omp.map.info var_ptr(%10 : !llvm.ptr, !llvm.array<100 x i32>) map_clauses(from) capture(ByRef) bounds(%11) -> !llvm.ptr {name = "int_array"} + %14 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"} + omp.target map_entries(%13 -> %arg0, %14 -> %arg1 : !llvm.ptr, !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %15 = llvm.mlir.constant(100 : i32) : i32 + %16 = llvm.mlir.constant(1 : i32) : i32 + %17 = llvm.mlir.constant(100 : index) : i64 + omp.parallel { + %18 = llvm.mlir.constant(1 : i64) : i64 + %19 = llvm.alloca %18 x i32 {pinned} : (i64) -> !llvm.ptr<5> + %20 = llvm.addrspacecast %19 : !llvm.ptr<5> to !llvm.ptr + omp.wsloop for (%arg2) : i32 = (%16) to (%15) inclusive step (%16) { + llvm.store %arg2, %20 : i32, !llvm.ptr + %21 = llvm.load %20 : !llvm.ptr -> i32 + %22 = llvm.sext %21 : i32 to i64 + %23 = llvm.mlir.constant(1 : i64) : i64 + %24 = llvm.mlir.constant(0 : i64) : i64 + %25 = llvm.sub %22, %23 overflow : i64 + %26 = llvm.mul %25, %23 overflow : i64 + %27 = llvm.mul %26, %23 overflow : i64 + %28 = llvm.add %27, %24 overflow : i64 + %29 = llvm.mul %23, %17 overflow : i64 + %30 = llvm.getelementptr %arg0[%28] : (!llvm.ptr, i64) -> !llvm.ptr, i32 + llvm.store %21, %30 : i32, !llvm.ptr + omp.yield + } + omp.terminator + } + omp.terminator + } + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @_QFEint_array() {addr_space = 0 : i32} : !llvm.array<100 x i32> { + %0 = llvm.mlir.zero : !llvm.array<100 x i32> + llvm.return %0 : !llvm.array<100 x i32> + } +} diff --git a/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir new file mode 100644 index 0000000000000..b4c848beef690 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-task-target-device.mlir @@ -0,0 +1,27 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This tests the fix for https://github.com/llvm/llvm-project/issues/84606 +// We are only interested in ensuring that the -mlir-to-llmvir pass doesn't crash. +// CHECK: {{.*}} = add i32 {{.*}}, 5 +module attributes {omp.is_target_device = true } { + llvm.func @_QQmain() attributes {fir.bindc_name = "main", omp.declare_target = #omp.declaretarget} { + %0 = llvm.mlir.constant(0 : i32) : i32 + %1 = llvm.mlir.constant(1 : i64) : i64 + %2 = llvm.alloca %1 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr<5> + %3 = llvm.addrspacecast %2 : !llvm.ptr<5> to !llvm.ptr + omp.task { + llvm.store %0, %3 : i32, !llvm.ptr + omp.terminator + } + %4 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "a"} + omp.target map_entries(%4 -> %arg0 : !llvm.ptr) { + ^bb0(%arg0: !llvm.ptr): + %5 = llvm.mlir.constant(5 : i32) : i32 + %6 = llvm.load %arg0 : !llvm.ptr -> i32 + %7 = llvm.add %6, %5 : i32 + llvm.store %7, %arg0 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } +} diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir index 660d7edb2fbb3..d1c23d584f92b 100644 --- a/mlir/test/Transforms/parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s -// CHECK-LABEL: func @parallel_many_dims() { +// CHECK: func @parallel_many_dims() { func.func @parallel_many_dims() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -28,19 +28,19 @@ func.func @parallel_many_dims() { return } -// CHECK-DAG: [[C12:%.*]] = arith.constant 12 : index -// CHECK-DAG: [[C10:%.*]] = arith.constant 10 : index -// CHECK-DAG: [[C9:%.*]] = arith.constant 9 : index -// CHECK-DAG: [[C6:%.*]] = arith.constant 6 : index -// CHECK-DAG: [[C4:%.*]] = arith.constant 4 : index -// CHECK-DAG: [[C3:%.*]] = arith.constant 3 : index -// CHECK-DAG: [[C2:%.*]] = arith.constant 2 : index -// CHECK-DAG: [[C1:%.*]] = arith.constant 1 : index -// CHECK-DAG: [[C0:%.*]] = arith.constant 0 : index -// CHECK: scf.parallel ([[NEW_I0:%.*]]) = ([[C0]]) to ([[C4]]) step ([[C1]]) { -// CHECK: [[V0:%.*]] = arith.remsi [[NEW_I0]], [[C2]] : index -// CHECK: [[I0:%.*]] = arith.divsi [[NEW_I0]], [[C2]] : index -// CHECK: [[V2:%.*]] = arith.muli [[V0]], [[C10]] : index -// CHECK: [[I3:%.*]] = arith.addi [[V2]], [[C9]] : index -// CHECK: "magic.op"([[I0]], [[C3]], [[C6]], [[I3]], [[C12]]) : (index, index, index, index, index) -> index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index +// CHECK-DAG: %[[C12:.*]] = arith.constant 12 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index +// CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK: scf.parallel (%[[NEW_I0:.*]]) = (%[[C0]]) to (%[[C4]]) step (%[[C1]]) { +// CHECK: %[[V0:.*]] = arith.remsi %[[NEW_I0]], %[[C2]] : index +// CHECK: %[[I0:.*]] = arith.divsi %[[NEW_I0]], %[[C2]] : index +// CHECK: %[[V2:.*]] = arith.muli %[[V0]], %[[C10]] +// CHECK: %[[I3:.*]] = arith.addi %[[V2]], %[[C9]] +// CHECK: "magic.op"(%[[I0]], %[[C3]], %[[C6]], %[[I3]], %[[C12]]) : (index, index, index, index, index) -> index // CHECK: scf.reduce diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir index 542786b5fa5e5..4eed61a65aa47 100644 --- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir @@ -13,22 +13,22 @@ func.func @collapse_to_single() { return } -// CHECK-LABEL: func @collapse_to_single() { -// CHECK-DAG: [[C18:%.*]] = arith.constant 18 : index -// CHECK-DAG: [[C6:%.*]] = arith.constant 6 : index -// CHECK-DAG: [[C3:%.*]] = arith.constant 3 : index -// CHECK-DAG: [[C7:%.*]] = arith.constant 7 : index -// CHECK-DAG: [[C4:%.*]] = arith.constant 4 : index -// CHECK-DAG: [[C1:%.*]] = arith.constant 1 : index -// CHECK-DAG: [[C0:%.*]] = arith.constant 0 : index -// CHECK: scf.parallel ([[NEW_I:%.*]]) = ([[C0]]) to ([[C18]]) step ([[C1]]) { -// CHECK: [[I0_COUNT:%.*]] = arith.remsi [[NEW_I]], [[C6]] : index -// CHECK: [[I1_COUNT:%.*]] = arith.divsi [[NEW_I]], [[C6]] : index -// CHECK: [[V0:%.*]] = arith.muli [[I0_COUNT]], [[C4]] : index -// CHECK: [[I1:%.*]] = arith.addi [[V0]], [[C7]] : index -// CHECK: [[V1:%.*]] = arith.muli [[I1_COUNT]], [[C3]] : index -// CHECK: [[I0:%.*]] = arith.addi [[V1]], [[C3]] : index -// CHECK: "magic.op"([[I0]], [[I1]]) : (index, index) -> index +// CHECK: func @collapse_to_single() { +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index +// CHECK-DAG: %[[C18:.*]] = arith.constant 18 : index +// CHECK: scf.parallel (%[[NEW_I:.*]]) = (%[[C0]]) to (%[[C18]]) step (%[[C1]]) { +// CHECK: %[[I0_COUNT:.*]] = arith.remsi %[[NEW_I]], %[[C6]] : index +// CHECK: %[[I1_COUNT:.*]] = arith.divsi %[[NEW_I]], %[[C6]] : index +// CHECK: %[[V0:.*]] = arith.muli %[[I0_COUNT]], %[[C4]] +// CHECK: %[[I1:.*]] = arith.addi %[[V0]], %[[C7]] +// CHECK: %[[V1:.*]] = arith.muli %[[I1_COUNT]], %[[C3]] +// CHECK: %[[I0:.*]] = arith.addi %[[V1]], %[[C3]] +// CHECK: "magic.op"(%[[I0]], %[[I1]]) : (index, index) -> index // CHECK: scf.reduce // CHECK-NEXT: } // CHECK-NEXT: return diff --git a/mlir/test/lib/Dialect/Test/CMakeLists.txt b/mlir/test/lib/Dialect/Test/CMakeLists.txt index b82b1631eead5..47ddcf6524748 100644 --- a/mlir/test/lib/Dialect/Test/CMakeLists.txt +++ b/mlir/test/lib/Dialect/Test/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_OPTIONAL_SOURCES TestDialect.cpp TestPatterns.cpp TestTraits.cpp + TestFromLLVMIRTranslation.cpp TestToLLVMIRTranslation.cpp ) @@ -86,6 +87,23 @@ add_mlir_library(MLIRTestDialect MLIRTransforms ) +add_mlir_translation_library(MLIRTestFromLLVMIRTranslation + TestFromLLVMIRTranslation.cpp + + EXCLUDE_FROM_LIBMLIR + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRIR + MLIRLLVMDialect + MLIRTestDialect + MLIRSupport + MLIRTargetLLVMIRImport + MLIRLLVMIRToLLVMTranslation +) + add_mlir_translation_library(MLIRTestToLLVMIRTranslation TestToLLVMIRTranslation.cpp diff --git a/mlir/test/lib/Dialect/Test/TestFromLLVMIRTranslation.cpp b/mlir/test/lib/Dialect/Test/TestFromLLVMIRTranslation.cpp new file mode 100644 index 0000000000000..3673d62bea2c9 --- /dev/null +++ b/mlir/test/lib/Dialect/Test/TestFromLLVMIRTranslation.cpp @@ -0,0 +1,111 @@ +//===- TestFromLLVMIRTranslation.cpp - Import Test dialect from LLVM IR ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a translation between LLVM IR and the MLIR Test dialect. +// +//===----------------------------------------------------------------------===// + +#include "TestDialect.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.h" +#include "mlir/Target/LLVMIR/Import.h" +#include "mlir/Target/LLVMIR/ModuleImport.h" +#include "mlir/Tools/mlir-translate/Translation.h" + +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/SourceMgr.h" + +using namespace mlir; +using namespace test; + +static ArrayRef getSupportedInstructionsImpl() { + static unsigned instructions[] = {llvm::Instruction::Load}; + return instructions; +} + +static LogicalResult convertLoad(OpBuilder &builder, llvm::Instruction *inst, + ArrayRef llvmOperands, + LLVM::ModuleImport &moduleImport) { + FailureOr addr = moduleImport.convertValue(llvmOperands[0]); + if (failed(addr)) + return failure(); + // Create the LoadOp + Value loadOp = builder.create( + moduleImport.translateLoc(inst->getDebugLoc()), + moduleImport.convertType(inst->getType()), *addr); + moduleImport.mapValue(inst) = builder.create( + loadOp.getLoc(), loadOp.getType(), loadOp, loadOp); + return success(); +} + +namespace { +class TestDialectLLVMImportDialectInterface + : public LLVMImportDialectInterface { +public: + using LLVMImportDialectInterface::LLVMImportDialectInterface; + + LogicalResult + convertInstruction(OpBuilder &builder, llvm::Instruction *inst, + ArrayRef llvmOperands, + LLVM::ModuleImport &moduleImport) const override { + switch (inst->getOpcode()) { + case llvm::Instruction::Load: + return convertLoad(builder, inst, llvmOperands, moduleImport); + default: + break; + } + return failure(); + } + + ArrayRef getSupportedInstructions() const override { + return getSupportedInstructionsImpl(); + } +}; +} // namespace + +namespace mlir { +void registerTestFromLLVMIR() { + TranslateToMLIRRegistration registration( + "test-import-llvmir", "test dialect from LLVM IR", + [](llvm::SourceMgr &sourceMgr, + MLIRContext *context) -> OwningOpRef { + llvm::SMDiagnostic err; + llvm::LLVMContext llvmContext; + std::unique_ptr llvmModule = + llvm::parseIR(*sourceMgr.getMemoryBuffer(sourceMgr.getMainFileID()), + err, llvmContext); + if (!llvmModule) { + std::string errStr; + llvm::raw_string_ostream errStream(errStr); + err.print(/*ProgName=*/"", errStream); + emitError(UnknownLoc::get(context)) << errStream.str(); + return {}; + } + if (llvm::verifyModule(*llvmModule, &llvm::errs())) + return nullptr; + + return translateLLVMIRToModule(std::move(llvmModule), context, false); + }, + [](DialectRegistry ®istry) { + registry.insert(); + registry.insert(); + registerLLVMDialectImport(registry); + registry.addExtension( + +[](MLIRContext *ctx, test::TestDialect *dialect) { + dialect->addInterfaces(); + }); + }); +} +} // namespace mlir diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td index b5b8619e7c9be..6f2d430fb6db6 100644 --- a/mlir/test/mlir-tblgen/op-attribute.td +++ b/mlir/test/mlir-tblgen/op-attribute.td @@ -98,26 +98,26 @@ def AOp : NS_Op<"a_op", []> { // Test getter methods // --- -// DEF: some-attr-kind AOp::getAAttrAttr() -// DEF-NEXT: ::llvm::cast(::mlir::impl::getAttrFromSortedRange((*this)->getAttrs().begin() + 0, (*this)->getAttrs().end() - 0, getAAttrAttrName())) +// DECL: some-attr-kind getAAttrAttr() +// DECL-NEXT: ::llvm::cast(::mlir::impl::getAttrFromSortedRange((*this)->getAttrs().begin() + 0, (*this)->getAttrs().end() - 0, getAAttrAttrName())) // DEF: some-return-type AOp::getAAttr() { // DEF-NEXT: auto attr = getAAttrAttr() // DEF-NEXT: return attr.some-convert-from-storage(); -// DEF: some-attr-kind AOp::getBAttrAttr() -// DEF-NEXT: ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange((*this)->getAttrs().begin() + 1, (*this)->getAttrs().end() - 0, getBAttrAttrName())) +// DECL: some-attr-kind getBAttrAttr() +// DECL-NEXT: ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange((*this)->getAttrs().begin() + 1, (*this)->getAttrs().end() - 0, getBAttrAttrName())) // DEF: some-return-type AOp::getBAttr() { // DEF-NEXT: auto attr = getBAttrAttr(); // DEF-NEXT: return attr.some-convert-from-storage(); -// DEF: some-attr-kind AOp::getCAttrAttr() -// DEF-NEXT: ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange((*this)->getAttrs().begin() + 1, (*this)->getAttrs().end() - 0, getCAttrAttrName())) +// DECL: some-attr-kind getCAttrAttr() +// DECL-NEXT: ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange((*this)->getAttrs().begin() + 1, (*this)->getAttrs().end() - 0, getCAttrAttrName())) // DEF: ::std::optional AOp::getCAttr() { // DEF-NEXT: auto attr = getCAttrAttr() // DEF-NEXT: return attr ? ::std::optional(attr.some-convert-from-storage()) : (::std::nullopt); -// DEF: some-attr-kind AOp::getDAttrAttr() -// DEF-NEXT: ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange((*this)->getAttrs().begin() + 1, (*this)->getAttrs().end() - 0, getDAttrAttrName())) +// DECL: some-attr-kind getDAttrAttr() +// DECL-NEXT: ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange((*this)->getAttrs().begin() + 1, (*this)->getAttrs().end() - 0, getDAttrAttrName())) // DEF: some-return-type AOp::getDAttr() { // DEF-NEXT: auto attr = getDAttrAttr(); // DEF-NEXT: if (!attr) @@ -127,16 +127,16 @@ def AOp : NS_Op<"a_op", []> { // Test setter methods // --- -// DEF: void AOp::setAAttrAttr(some-attr-kind attr) { -// DEF-NEXT: (*this)->setAttr(getAAttrAttrName(), attr); +// DECL: void setAAttrAttr(some-attr-kind attr) { +// DECL-NEXT: (*this)->setAttr(getAAttrAttrName(), attr); // DEF: void AOp::setAAttr(some-return-type attrValue) { // DEF-NEXT: (*this)->setAttr(getAAttrAttrName(), some-const-builder-call(::mlir::Builder((*this)->getContext()), attrValue)); -// DEF: void AOp::setBAttrAttr(some-attr-kind attr) { -// DEF-NEXT: (*this)->setAttr(getBAttrAttrName(), attr); +// DECL: void setBAttrAttr(some-attr-kind attr) { +// DECL-NEXT: (*this)->setAttr(getBAttrAttrName(), attr); // DEF: void AOp::setBAttr(some-return-type attrValue) { // DEF-NEXT: (*this)->setAttr(getBAttrAttrName(), some-const-builder-call(::mlir::Builder((*this)->getContext()), attrValue)); -// DEF: void AOp::setCAttrAttr(some-attr-kind attr) { -// DEF-NEXT: (*this)->setAttr(getCAttrAttrName(), attr); +// DECL: void setCAttrAttr(some-attr-kind attr) { +// DECL-NEXT: (*this)->setAttr(getCAttrAttrName(), attr); // DEF: void AOp::setCAttr(::std::optional attrValue) { // DEF-NEXT: if (attrValue) // DEF-NEXT: return (*this)->setAttr(getCAttrAttrName(), some-const-builder-call(::mlir::Builder((*this)->getContext()), *attrValue)); @@ -145,8 +145,8 @@ def AOp : NS_Op<"a_op", []> { // Test remove methods // --- -// DEF: ::mlir::Attribute AOp::removeCAttrAttr() { -// DEF-NEXT: return (*this)->removeAttr(getCAttrAttrName()); +// DECL: ::mlir::Attribute removeCAttrAttr() { +// DECL-NEXT: return (*this)->removeAttr(getCAttrAttrName()); // Test build methods // --- @@ -236,22 +236,22 @@ def AgetOp : Op { // Test getter methods // --- -// DEF: some-attr-kind AgetOp::getAAttrAttr() -// DEF-NEXT: ::llvm::cast(::mlir::impl::getAttrFromSortedRange({{.*}})) +// DECL: some-attr-kind getAAttrAttr() +// DECL-NEXT: ::llvm::cast(::mlir::impl::getAttrFromSortedRange({{.*}})) // DEF: some-return-type AgetOp::getAAttr() { // DEF-NEXT: auto attr = getAAttrAttr() // DEF-NEXT: return attr.some-convert-from-storage(); -// DEF: some-attr-kind AgetOp::getBAttrAttr() -// DEF-NEXT: return ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange({{.*}})) +// DECL: some-attr-kind getBAttrAttr() +// DECL-NEXT: return ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange({{.*}})) // DEF: some-return-type AgetOp::getBAttr() { // DEF-NEXT: auto attr = getBAttrAttr(); // DEF-NEXT: if (!attr) // DEF-NEXT: return some-const-builder-call(::mlir::Builder((*this)->getContext()), 4.2).some-convert-from-storage(); // DEF-NEXT: return attr.some-convert-from-storage(); -// DEF: some-attr-kind AgetOp::getCAttrAttr() -// DEF-NEXT: return ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange({{.*}})) +// DECL: some-attr-kind getCAttrAttr() +// DECL-NEXT: return ::llvm::dyn_cast_or_null(::mlir::impl::getAttrFromSortedRange({{.*}})) // DEF: ::std::optional AgetOp::getCAttr() { // DEF-NEXT: auto attr = getCAttrAttr() // DEF-NEXT: return attr ? ::std::optional(attr.some-convert-from-storage()) : (::std::nullopt); @@ -259,18 +259,18 @@ def AgetOp : Op { // Test setter methods // --- -// DEF: void AgetOp::setAAttrAttr(some-attr-kind attr) { -// DEF-NEXT: (*this)->setAttr(getAAttrAttrName(), attr); -// DEF: void AgetOp::setBAttrAttr(some-attr-kind attr) { -// DEF-NEXT: (*this)->setAttr(getBAttrAttrName(), attr); -// DEF: void AgetOp::setCAttrAttr(some-attr-kind attr) { -// DEF-NEXT: (*this)->setAttr(getCAttrAttrName(), attr); +// DECL: void setAAttrAttr(some-attr-kind attr) { +// DECL-NEXT: (*this)->setAttr(getAAttrAttrName(), attr); +// DECL: void setBAttrAttr(some-attr-kind attr) { +// DECL-NEXT: (*this)->setAttr(getBAttrAttrName(), attr); +// DECL: void setCAttrAttr(some-attr-kind attr) { +// DECL-NEXT: (*this)->setAttr(getCAttrAttrName(), attr); // Test remove methods // --- -// DEF: ::mlir::Attribute AgetOp::removeCAttrAttr() { -// DEF-NEXT: return (*this)->removeAttr(getCAttrAttrName()); +// DECL: ::mlir::Attribute removeCAttrAttr() { +// DECL-NEXT: return (*this)->removeAttr(getCAttrAttrName()); // Test build methods // --- @@ -476,9 +476,6 @@ def NamespaceOp : NS_Op<"namespace_op", []> { SomeAttrDef:$AttrDef ); } -// DECL: NamespaceOp -// DECL: foobar::SomeAttrAttr getAttrDef() - // Test mixing operands and attributes in arbitrary order // --- @@ -487,6 +484,14 @@ def MixOperandsAndAttrs : NS_Op<"mix_operands_and_attrs", []> { let arguments = (ins F32Attr:$attr, F32:$operand, F32Attr:$otherAttr, F32:$otherArg); } +// DECL-LABEL: MixOperandsAndAttrs declarations +// DECL-DAG: ::mlir::TypedValue<::mlir::FloatType> getOperand() +// DECL-DAG: ::mlir::TypedValue<::mlir::FloatType> getOtherArg() + +// DECL-LABEL: NamespaceOp declarations +// DECL: foobar::SomeAttrAttr getAttrDef() + + def OpWithDefaultAndRegion : NS_Op<"default_with_region", []> { let arguments = (ins DefaultValuedAttr:$dv_bool_attr @@ -509,11 +514,9 @@ def OpWithDefaultAndSuccessor : NS_Op<"default_with_succ", []> { // We should not have a default attribute in this case. // DECL-LABEL: OpWithDefaultAndSuccessor declarations -// DECL: static void build({{.*}}, bool dv_bool_attr, ::mlir::BlockRange succ) +// DECL-DAG: static void build({{.*}}, bool dv_bool_attr, ::mlir::BlockRange succ) // DEF-LABEL: MixOperandsAndAttrs definitions -// DEF-DAG: ::mlir::TypedValue<::mlir::FloatType> MixOperandsAndAttrs::getOperand() -// DEF-DAG: ::mlir::TypedValue<::mlir::FloatType> MixOperandsAndAttrs::getOtherArg() // DEF-DAG: void MixOperandsAndAttrs::build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::FloatAttr attr, ::mlir::Value operand, ::mlir::FloatAttr otherAttr, ::mlir::Value otherArg) // DEF-DAG: ::llvm::APFloat MixOperandsAndAttrs::getAttr() // DEF-DAG: ::llvm::APFloat MixOperandsAndAttrs::getOtherAttr() @@ -529,14 +532,13 @@ def UnitAttrOp : NS_Op<"unit_attr_op", []> { // DEF: bool UnitAttrOp::getAttr() { // DEF: return {{.*}} != nullptr -// DEF: ::mlir::Attribute UnitAttrOp::removeAttrAttr() { -// DEF-NEXT: (*this)->removeAttr(getAttrAttrName()); // DEF: build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, /*optional*/::mlir::UnitAttr attr) // DEF: build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, /*optional*/bool attr) // DECL-LABEL: UnitAttrOp declarations -// DECL-NOT: declarations +// DECL: ::mlir::Attribute removeAttrAttr() { +// DECL-NEXT: (*this)->removeAttr(getAttrAttrName()); // DECL: build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, /*optional*/bool attr = false) diff --git a/mlir/test/mlir-tblgen/op-decl-and-defs.td b/mlir/test/mlir-tblgen/op-decl-and-defs.td index ca133fafdcb57..499e3ceecaf04 100644 --- a/mlir/test/mlir-tblgen/op-decl-and-defs.td +++ b/mlir/test/mlir-tblgen/op-decl-and-defs.td @@ -63,8 +63,8 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { // CHECK: uint32_t getAttr1(); // CHECK: ::mlir::FloatAttr getSomeAttr2Attr(); // CHECK: ::std::optional< ::llvm::APFloat > getSomeAttr2(); -// CHECK: ::mlir::Region &getSomeRegion(); -// CHECK: ::mlir::RegionRange getSomeRegions(); +// CHECK: ::mlir::Region &getSomeRegion() { +// CHECK: ::mlir::RegionRange getSomeRegions() { // CHECK: }; // CHECK: } @@ -94,20 +94,20 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { // CHECK: static constexpr ::llvm::StringLiteral getOperationName() { // CHECK: return ::llvm::StringLiteral("test.a_op"); // CHECK: } -// CHECK: ::mlir::Operation::operand_range getODSOperands(unsigned index); -// CHECK: ::mlir::TypedValue<::mlir::IntegerType> getA(); -// CHECK: ::mlir::Operation::operand_range getB(); -// CHECK: ::mlir::OpOperand &getAMutable(); +// CHECK: ::mlir::Operation::operand_range getODSOperands(unsigned index) { +// CHECK: ::mlir::TypedValue<::mlir::IntegerType> getA() { +// CHECK: ::mlir::Operation::operand_range getB() { +// CHECK: ::mlir::OpOperand &getAMutable() { // CHECK: ::mlir::MutableOperandRange getBMutable(); -// CHECK: ::mlir::Operation::result_range getODSResults(unsigned index); -// CHECK: ::mlir::TypedValue<::mlir::IntegerType> getR(); -// CHECK: ::mlir::Region &getSomeRegion(); -// CHECK: ::mlir::MutableArrayRef<::mlir::Region> getSomeRegions(); -// CHECK: ::mlir::IntegerAttr getAttr1Attr() +// CHECK: ::mlir::Operation::result_range getODSResults(unsigned index) { +// CHECK: ::mlir::TypedValue<::mlir::IntegerType> getR() { +// CHECK: ::mlir::Region &getSomeRegion() { +// CHECK: ::mlir::MutableArrayRef<::mlir::Region> getSomeRegions() { +// CHECK: ::mlir::IntegerAttr getAttr1Attr() { // CHECK: uint32_t getAttr1(); -// CHECK: ::mlir::FloatAttr getSomeAttr2Attr() +// CHECK: ::mlir::FloatAttr getSomeAttr2Attr() { // CHECK: ::std::optional< ::llvm::APFloat > getSomeAttr2(); -// CHECK: ::mlir::Attribute removeSomeAttr2Attr(); +// CHECK: ::mlir::Attribute removeSomeAttr2Attr() { // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, Value val); // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, int integer = 0); // CHECK{LITERAL}: [[deprecated("the deprecation message")]] @@ -137,9 +137,9 @@ def NS_AOp : NS_Op<"a_op", [IsolatedFromAbove, IsolatedFromAbove]> { // DEFS-SAME: p.getProperties() // DEFS-SAME: op->getRegions() -// DEFS: ::mlir::RegionRange AOpGenericAdaptorBase::getSomeRegions() -// DEFS-NEXT: return odsRegions.drop_front(1); -// DEFS: ::mlir::RegionRange AOpGenericAdaptorBase::getRegions() +// DECLS: ::mlir::RegionRange AOpGenericAdaptorBase::getSomeRegions() +// DECLS-NEXT: return odsRegions.drop_front(1); +// DECLS: ::mlir::RegionRange AOpGenericAdaptorBase::getRegions() // Check AttrSizedOperandSegments // --- @@ -196,9 +196,9 @@ def NS_EOp : NS_Op<"op_with_optionals", []> { } // CHECK-LABEL: NS::EOp declarations -// CHECK: ::mlir::TypedValue<::mlir::IntegerType> getA(); +// CHECK: ::mlir::TypedValue<::mlir::IntegerType> getA() { // CHECK: ::mlir::MutableOperandRange getAMutable(); -// CHECK: ::mlir::TypedValue<::mlir::FloatType> getB(); +// CHECK: ::mlir::TypedValue<::mlir::FloatType> getB() { // CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, /*optional*/::mlir::Type b, /*optional*/::mlir::Value a) // Check that all types match constraint results in generating builder. diff --git a/mlir/test/mlir-tblgen/op-operand.td b/mlir/test/mlir-tblgen/op-operand.td index 68a9def83c2e0..a749708244798 100644 --- a/mlir/test/mlir-tblgen/op-operand.td +++ b/mlir/test/mlir-tblgen/op-operand.td @@ -1,4 +1,5 @@ // RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL include "mlir/IR/OpBase.td" @@ -39,11 +40,11 @@ def OpD : NS_Op<"mix_variadic_and_normal_inputs_op", [SameVariadicOperandSize]> let arguments = (ins Variadic:$input1, AnyTensor:$input2, Variadic:$input3); } -// CHECK-LABEL: ::mlir::Operation::operand_range OpD::getInput1 -// CHECK-NEXT: return getODSOperands(0); +// DECL-LABEL: ::mlir::Operation::operand_range getInput1 +// DECL-NEXT: return getODSOperands(0); -// CHECK-LABEL: ::mlir::TypedValue<::mlir::TensorType> OpD::getInput2 -// CHECK-NEXT: return ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(1).begin()); +// DECL-LABEL: ::mlir::TypedValue<::mlir::TensorType> getInput2 +// DECL-NEXT: return ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSOperands(1).begin()); // CHECK-LABEL: OpD::build // CHECK-NEXT: odsState.addOperands(input1); diff --git a/mlir/test/mlir-tblgen/op-properties.td b/mlir/test/mlir-tblgen/op-properties.td new file mode 100644 index 0000000000000..7b0ee6b2a1bd8 --- /dev/null +++ b/mlir/test/mlir-tblgen/op-properties.td @@ -0,0 +1,21 @@ +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s + +include "mlir/IR/AttrTypeBase.td" +include "mlir/IR/EnumAttr.td" +include "mlir/IR/OpBase.td" + +def Test_Dialect : Dialect { + let name = "test"; + let cppNamespace = "foobar"; +} +class NS_Op traits = []> : + Op; + +def OpWithAttr : NS_Op<"op_with_attr">{ + let arguments = (ins AnyAttr:$attr, OptionalAttr:$optional); +} + +// CHECK: void setAttrAttr(::mlir::Attribute attr) +// CHECK-NEXT: getProperties().attr = attr +// CHECK: void setOptionalAttr(::mlir::Attribute attr) +// CHECK-NEXT: getProperties().optional = attr diff --git a/mlir/test/mlir-tblgen/op-result.td b/mlir/test/mlir-tblgen/op-result.td index a4a3764aae2b2..0ca570cf8cafb 100644 --- a/mlir/test/mlir-tblgen/op-result.td +++ b/mlir/test/mlir-tblgen/op-result.td @@ -1,4 +1,5 @@ // RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s +// RUN: mlir-tblgen -gen-op-decls -I %S/../../include %s | FileCheck %s --check-prefix=DECL include "mlir/IR/OpBase.td" include "mlir/Interfaces/InferTypeOpInterface.td" @@ -97,11 +98,11 @@ def OpI : NS_Op<"mix_variadic_and_normal_results_op", [SameVariadicResultSize]> let results = (outs Variadic:$output1, AnyTensor:$output2, Variadic:$output3); } -// CHECK-LABEL: ::mlir::Operation::result_range OpI::getOutput1 -// CHECK-NEXT: return getODSResults(0); +// DECL-LABEL: ::mlir::Operation::result_range getOutput1 +// DECL-NEXT: return getODSResults(0); -// CHECK-LABEL: ::mlir::TypedValue<::mlir::TensorType> OpI::getOutput2 -// CHECK-NEXT: return ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSResults(1).begin()); +// DECL-LABEL: ::mlir::TypedValue<::mlir::TensorType> getOutput2 +// DECL-NEXT: return ::llvm::cast<::mlir::TypedValue<::mlir::TensorType>>(*getODSResults(1).begin()); // CHECK-LABEL: OpI::build // CHECK-NEXT: odsState.addTypes(output1); diff --git a/mlir/test/python/dialects/transform_structured_ext.py b/mlir/test/python/dialects/transform_structured_ext.py index c9b7802e1cc45..91ecd0fc38e17 100644 --- a/mlir/test/python/dialects/transform_structured_ext.py +++ b/mlir/test/python/dialects/transform_structured_ext.py @@ -210,7 +210,17 @@ def testVectorizeMixed(target): # CHECK: transform.sequence # CHECK: %[[V0:.*]] = transform.structured.match # CHECK: transform.structured.vectorize - # CHECK-SAME: vector_sizes [%[[V0]] : !transform.any_op, 4] + # CHECK-SAME: vector_sizes [%[[V0]], 4] + + +@run +@create_sequence +def testVectorizeEmpty(target): + structured.VectorizeOp(target, []) + # CHECK-LABEL: TEST: testVectorizeEmpty + # CHECK: transform.sequence + # CHECK: transform.structured.vectorize + # CHECK-NOT: vector_sizes @run @@ -223,7 +233,7 @@ def testVectorizeScalable(target): # CHECK: transform.sequence # CHECK-DAG: %[[V0:.*]] = transform.structured.match # CHECK-DAG: transform.structured.vectorize - # CHECK-SAME: vector_sizes [16, [%[[V0]] : !transform.any_op], [4], [8]] + # CHECK-SAME: vector_sizes [16, [%[[V0]]], [4], [8]] @run diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 3a697520dfad5..53ed5cb7c043e 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -1712,7 +1712,9 @@ void OpEmitter::genAttrGetters() { // having to use the string interface for better compile time verification. auto emitAttrWithStorageType = [&](StringRef name, StringRef attrName, Attribute attr) { - auto *method = opClass.addMethod(attr.getStorageType(), name + "Attr"); + // The method body for this getter is trivial. Emit it inline. + auto *method = + opClass.addInlineMethod(attr.getStorageType(), name + "Attr"); if (!method) return; method->body() << formatv( @@ -1804,23 +1806,37 @@ void OpEmitter::genAttrGetters() { } void OpEmitter::genAttrSetters() { + bool useProperties = op.getDialect().usePropertiesForAttributes(); + + // Generate the code to set an attribute. + auto emitSetAttr = [&](Method *method, StringRef getterName, + StringRef attrName, StringRef attrVar) { + if (useProperties) { + method->body() << formatv(" getProperties().{0} = {1};", attrName, + attrVar); + } else { + method->body() << formatv(" (*this)->setAttr({0}AttrName(), {1});", + getterName, attrVar); + } + }; + // Generate raw named setter type. This is a wrapper class that allows setting // to the attributes via setters instead of having to use the string interface // for better compile time verification. auto emitAttrWithStorageType = [&](StringRef setterName, StringRef getterName, - Attribute attr) { + StringRef attrName, Attribute attr) { + // This method body is trivial, so emit it inline. auto *method = - opClass.addMethod("void", setterName + "Attr", - MethodParameter(attr.getStorageType(), "attr")); + opClass.addInlineMethod("void", setterName + "Attr", + MethodParameter(attr.getStorageType(), "attr")); if (method) - method->body() << formatv(" (*this)->setAttr({0}AttrName(), attr);", - getterName); + emitSetAttr(method, getterName, attrName, "attr"); }; // Generate a setter that accepts the underlying C++ type as opposed to the // attribute type. auto emitAttrWithReturnType = [&](StringRef setterName, StringRef getterName, - Attribute attr) { + StringRef attrName, Attribute attr) { Attribute baseAttr = attr.getBaseAttr(); if (!canUseUnwrappedRawValue(baseAttr)) return; @@ -1849,9 +1865,8 @@ void OpEmitter::genAttrSetters() { // If the value isn't optional, just set it directly. if (!isOptional) { - method->body() << formatv( - " (*this)->setAttr({0}AttrName(), {1});", getterName, - constBuildAttrFromParam(attr, fctx, "attrValue")); + emitSetAttr(method, getterName, attrName, + constBuildAttrFromParam(attr, fctx, "attrValue")); return; } @@ -1862,13 +1877,25 @@ void OpEmitter::genAttrSetters() { // optional but not in the same way as the others (i.e. it uses bool over // std::optional<>). StringRef paramStr = isUnitAttr ? "attrValue" : "*attrValue"; - const char *optionalCodeBody = R"( + if (!useProperties) { + const char *optionalCodeBody = R"( if (attrValue) return (*this)->setAttr({0}AttrName(), {1}); (*this)->removeAttr({0}AttrName());)"; - method->body() << formatv( - optionalCodeBody, getterName, - constBuildAttrFromParam(baseAttr, fctx, paramStr)); + method->body() << formatv( + optionalCodeBody, getterName, + constBuildAttrFromParam(baseAttr, fctx, paramStr)); + } else { + const char *optionalCodeBody = R"( + auto &odsProp = getProperties().{0}; + if (attrValue) + odsProp = {1}; + else + odsProp = nullptr;)"; + method->body() << formatv( + optionalCodeBody, attrName, + constBuildAttrFromParam(baseAttr, fctx, paramStr)); + } }; for (const NamedAttribute &namedAttr : op.getAttributes()) { @@ -1876,8 +1903,10 @@ void OpEmitter::genAttrSetters() { continue; std::string setterName = op.getSetterName(namedAttr.name); std::string getterName = op.getGetterName(namedAttr.name); - emitAttrWithStorageType(setterName, getterName, namedAttr.attr); - emitAttrWithReturnType(setterName, getterName, namedAttr.attr); + emitAttrWithStorageType(setterName, getterName, namedAttr.name, + namedAttr.attr); + emitAttrWithReturnType(setterName, getterName, namedAttr.name, + namedAttr.attr); } } @@ -1886,8 +1915,8 @@ void OpEmitter::genOptionalAttrRemovers() { // use the string interface. Enables better compile time verification. auto emitRemoveAttr = [&](StringRef name, bool useProperties) { auto upperInitial = name.take_front().upper(); - auto *method = opClass.addMethod("::mlir::Attribute", - op.getRemoverName(name) + "Attr"); + auto *method = opClass.addInlineMethod("::mlir::Attribute", + op.getRemoverName(name) + "Attr"); if (!method) return; if (useProperties) { @@ -1926,7 +1955,11 @@ static void generateValueRangeStartAndEnd( rangeSizeCall = "odsOperandsSize"; } + // The method is trivial if the operation does not have any variadic operands. + // In that case, make sure to generate it in-line. auto *method = opClass.addMethod("std::pair", methodName, + numVariadic == 0 ? Method::Properties::Inline + : Method::Properties::None, parameters); if (!method) return; @@ -2028,17 +2061,19 @@ generateNamedOperandGetters(const Operator &op, Class &opClass, // Generate trampoline for calling 'getODSOperandIndexAndLength' with just // the index. This just calls the implementation in the base class but // passes the operand size as parameter. - Method *method = opClass.addMethod("std::pair", - "getODSOperandIndexAndLength", - MethodParameter("unsigned", "index")); + Method *method = opClass.addInlineMethod( + "std::pair", "getODSOperandIndexAndLength", + MethodParameter("unsigned", "index")); ERROR_IF_PRUNED(method, "getODSOperandIndexAndLength", op); MethodBody &body = method->body(); body.indent() << formatv( "return Base::getODSOperandIndexAndLength(index, {0});", rangeSizeCall); } - auto *m = opClass.addMethod(rangeType, "getODSOperands", - MethodParameter("unsigned", "index")); + // The implementation of this method is trivial and it is very load-bearing. + // Generate it inline. + auto *m = opClass.addInlineMethod(rangeType, "getODSOperands", + MethodParameter("unsigned", "index")); ERROR_IF_PRUNED(m, "getODSOperands", op); auto &body = m->body(); body << formatv(valueRangeReturnCode, rangeBeginCall, @@ -2052,10 +2087,10 @@ generateNamedOperandGetters(const Operator &op, Class &opClass, continue; std::string name = op.getGetterName(operand.name); if (operand.isOptional()) { - m = opClass.addMethod(isGenericAdaptorBase - ? rangeElementType - : generateTypeForGetter(operand), - name); + m = opClass.addInlineMethod(isGenericAdaptorBase + ? rangeElementType + : generateTypeForGetter(operand), + name); ERROR_IF_PRUNED(m, name, op); m->body().indent() << formatv("auto operands = getODSOperands({0});\n" "return operands.empty() ? {1}{{} : ", @@ -2074,19 +2109,19 @@ generateNamedOperandGetters(const Operator &op, Class &opClass, continue; } - m = opClass.addMethod("::mlir::OperandRangeRange", name); + m = opClass.addInlineMethod("::mlir::OperandRangeRange", name); ERROR_IF_PRUNED(m, name, op); m->body() << " return getODSOperands(" << i << ").split(" << segmentAttr << "Attr());"; } else if (operand.isVariadic()) { - m = opClass.addMethod(rangeType, name); + m = opClass.addInlineMethod(rangeType, name); ERROR_IF_PRUNED(m, name, op); m->body() << " return getODSOperands(" << i << ");"; } else { - m = opClass.addMethod(isGenericAdaptorBase - ? rangeElementType - : generateTypeForGetter(operand), - name); + m = opClass.addInlineMethod(isGenericAdaptorBase + ? rangeElementType + : generateTypeForGetter(operand), + name); ERROR_IF_PRUNED(m, name, op); m->body().indent() << "return "; if (!isGenericAdaptorBase) @@ -2138,12 +2173,16 @@ void OpEmitter::genNamedOperandSetters() { } else { returnType = "::mlir::OpOperand &"; } - auto *m = opClass.addMethod(returnType, name + "Mutable"); + bool isVariadicOperand = + operand.isVariadicOfVariadic() || operand.isVariableLength(); + auto *m = opClass.addMethod(returnType, name + "Mutable", + isVariadicOperand ? Method::Properties::None + : Method::Properties::Inline); ERROR_IF_PRUNED(m, name, op); auto &body = m->body(); body << " auto range = getODSOperandIndexAndLength(" << i << ");\n"; - if (!operand.isVariadicOfVariadic() && !operand.isVariableLength()) { + if (!isVariadicOperand) { // In case of a single operand, return a single OpOperand. body << " return getOperation()->getOpOperand(range.first);\n"; continue; @@ -2228,9 +2267,11 @@ void OpEmitter::genNamedResultGetters() { numVariadicResults, numNormalResults, "getOperation()->getNumResults()", attrSizedResults, attrSizeInitCode, op.getResults()); - auto *m = - opClass.addMethod("::mlir::Operation::result_range", "getODSResults", - MethodParameter("unsigned", "index")); + // The implementation of this method is trivial and it is very load-bearing. + // Generate it inline. + auto *m = opClass.addInlineMethod("::mlir::Operation::result_range", + "getODSResults", + MethodParameter("unsigned", "index")); ERROR_IF_PRUNED(m, "getODSResults", op); m->body() << formatv(valueRangeReturnCode, "getOperation()->result_begin()", "getODSResultIndexAndLength(index)"); @@ -2241,7 +2282,7 @@ void OpEmitter::genNamedResultGetters() { continue; std::string name = op.getGetterName(result.name); if (result.isOptional()) { - m = opClass.addMethod(generateTypeForGetter(result), name); + m = opClass.addInlineMethod(generateTypeForGetter(result), name); ERROR_IF_PRUNED(m, name, op); m->body() << " auto results = getODSResults(" << i << ");\n" << llvm::formatv(" return results.empty()" @@ -2249,11 +2290,11 @@ void OpEmitter::genNamedResultGetters() { " : ::llvm::cast<{0}>(*results.begin());", m->getReturnType()); } else if (result.isVariadic()) { - m = opClass.addMethod("::mlir::Operation::result_range", name); + m = opClass.addInlineMethod("::mlir::Operation::result_range", name); ERROR_IF_PRUNED(m, name, op); m->body() << " return getODSResults(" << i << ");"; } else { - m = opClass.addMethod(generateTypeForGetter(result), name); + m = opClass.addInlineMethod(generateTypeForGetter(result), name); ERROR_IF_PRUNED(m, name, op); m->body() << llvm::formatv( " return ::llvm::cast<{0}>(*getODSResults({1}).begin());", @@ -2272,15 +2313,15 @@ void OpEmitter::genNamedRegionGetters() { // Generate the accessors for a variadic region. if (region.isVariadic()) { - auto *m = - opClass.addMethod("::mlir::MutableArrayRef<::mlir::Region>", name); + auto *m = opClass.addInlineMethod( + "::mlir::MutableArrayRef<::mlir::Region>", name); ERROR_IF_PRUNED(m, name, op); m->body() << formatv(" return (*this)->getRegions().drop_front({0});", i); continue; } - auto *m = opClass.addMethod("::mlir::Region &", name); + auto *m = opClass.addInlineMethod("::mlir::Region &", name); ERROR_IF_PRUNED(m, name, op); m->body() << formatv(" return (*this)->getRegion({0});", i); } @@ -2295,7 +2336,7 @@ void OpEmitter::genNamedSuccessorGetters() { std::string name = op.getGetterName(successor.name); // Generate the accessors for a variadic successor list. if (successor.isVariadic()) { - auto *m = opClass.addMethod("::mlir::SuccessorRange", name); + auto *m = opClass.addInlineMethod("::mlir::SuccessorRange", name); ERROR_IF_PRUNED(m, name, op); m->body() << formatv( " return {std::next((*this)->successor_begin(), {0}), " @@ -2304,7 +2345,7 @@ void OpEmitter::genNamedSuccessorGetters() { continue; } - auto *m = opClass.addMethod("::mlir::Block *", name); + auto *m = opClass.addInlineMethod("::mlir::Block *", name); ERROR_IF_PRUNED(m, name, op); m->body() << formatv(" return (*this)->getSuccessor({0});", i); } @@ -4122,8 +4163,12 @@ OpOperandAdaptorEmitter::OpOperandAdaptorEmitter( // Generate named accessor with Attribute return type. auto emitAttrWithStorageType = [&](StringRef name, StringRef emitName, Attribute attr) { - auto *method = - genericAdaptorBase.addMethod(attr.getStorageType(), emitName + "Attr"); + // The method body is trivial if the attribute does not have a default + // value, in which case the default value may be arbitrary code. + auto *method = genericAdaptorBase.addMethod( + attr.getStorageType(), emitName + "Attr", + attr.hasDefaultValue() || !useProperties ? Method::Properties::None + : Method::Properties::Inline); ERROR_IF_PRUNED(method, "Adaptor::" + emitName + "Attr", op); auto &body = method->body().indent(); if (!useProperties) @@ -4153,8 +4198,8 @@ OpOperandAdaptorEmitter::OpOperandAdaptorEmitter( m->body() << " return properties;"; } { - auto *m = - genericAdaptorBase.addMethod("::mlir::DictionaryAttr", "getAttributes"); + auto *m = genericAdaptorBase.addInlineMethod("::mlir::DictionaryAttr", + "getAttributes"); ERROR_IF_PRUNED(m, "Adaptor::getAttributes", op); m->body() << " return odsAttrs;"; } @@ -4177,21 +4222,21 @@ OpOperandAdaptorEmitter::OpOperandAdaptorEmitter( // Generate the accessors for a variadic region. std::string name = op.getGetterName(region.name); if (region.isVariadic()) { - auto *m = genericAdaptorBase.addMethod("::mlir::RegionRange", name); + auto *m = genericAdaptorBase.addInlineMethod("::mlir::RegionRange", name); ERROR_IF_PRUNED(m, "Adaptor::" + name, op); m->body() << formatv(" return odsRegions.drop_front({0});", i); continue; } - auto *m = genericAdaptorBase.addMethod("::mlir::Region &", name); + auto *m = genericAdaptorBase.addInlineMethod("::mlir::Region &", name); ERROR_IF_PRUNED(m, "Adaptor::" + name, op); m->body() << formatv(" return *odsRegions[{0}];", i); } if (numRegions > 0) { // Any invalid overlap for `getRegions` will have been diagnosed before // here already. - if (auto *m = - genericAdaptorBase.addMethod("::mlir::RegionRange", "getRegions")) + if (auto *m = genericAdaptorBase.addInlineMethod("::mlir::RegionRange", + "getRegions")) m->body() << " return odsRegions;"; } diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 1ffac059f1981..c8e0476d45b9a 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -508,7 +508,7 @@ const char *const optionalOperandParserCode = R"( )"; const char *const operandParserCode = R"( {0}OperandsLoc = parser.getCurrentLocation(); - if (parser.parseOperand({0}RawOperands[0])) + if (parser.parseOperand({0}RawOperand)) return ::mlir::failure(); )"; /// The code snippet used to generate a parser call for a VariadicOfVariadic @@ -564,11 +564,11 @@ const char *const typeParserCode = R"( {0} type; if (parser.parseCustomTypeWithFallback(type)) return ::mlir::failure(); - {1}RawTypes[0] = type; + {1}RawType = type; } )"; const char *const qualifiedTypeParserCode = R"( - if (parser.parseType({1}RawTypes[0])) + if (parser.parseType({1}RawType)) return ::mlir::failure(); )"; @@ -842,9 +842,9 @@ static void genElementParserStorage(FormatElement *element, const Operator &op, } } else { body << " ::mlir::OpAsmParser::UnresolvedOperand " << name - << "RawOperands[1];\n" + << "RawOperand{};\n" << " ::llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> " - << name << "Operands(" << name << "RawOperands);"; + << name << "Operands(&" << name << "RawOperand, 1);"; } body << llvm::formatv(" ::llvm::SMLoc {0}OperandsLoc;\n" " (void){0}OperandsLoc;\n", @@ -879,10 +879,11 @@ static void genElementParserStorage(FormatElement *element, const Operator &op, if (lengthKind != ArgumentLengthKind::Single) body << " ::llvm::SmallVector<::mlir::Type, 1> " << name << "Types;\n"; else - body << llvm::formatv(" ::mlir::Type {0}RawTypes[1];\n", name) - << llvm::formatv( - " ::llvm::ArrayRef<::mlir::Type> {0}Types({0}RawTypes);\n", - name); + body + << llvm::formatv(" ::mlir::Type {0}RawType{{};\n", name) + << llvm::formatv( + " ::llvm::ArrayRef<::mlir::Type> {0}Types(&{0}RawType, 1);\n", + name); } else if (auto *dir = dyn_cast(element)) { ArgumentLengthKind ignored; body << " ::llvm::ArrayRef<::mlir::Type> " @@ -910,7 +911,7 @@ static void genCustomParameterParser(FormatElement *param, MethodBody &body) { else if (lengthKind == ArgumentLengthKind::Optional) body << llvm::formatv("{0}Operand", name); else - body << formatv("{0}RawOperands[0]", name); + body << formatv("{0}RawOperand", name); } else if (auto *region = dyn_cast(param)) { StringRef name = region->getVar()->name; @@ -939,7 +940,7 @@ static void genCustomParameterParser(FormatElement *param, MethodBody &body) { else if (lengthKind == ArgumentLengthKind::Optional) body << llvm::formatv("{0}Type", listName); else - body << formatv("{0}RawTypes[0]", listName); + body << formatv("{0}RawType", listName); } else if (auto *string = dyn_cast(param)) { FmtContext ctx; diff --git a/mlir/tools/mlir-translate/mlir-translate.cpp b/mlir/tools/mlir-translate/mlir-translate.cpp index 4f9661c058c2d..309def888a073 100644 --- a/mlir/tools/mlir-translate/mlir-translate.cpp +++ b/mlir/tools/mlir-translate/mlir-translate.cpp @@ -23,6 +23,7 @@ void registerTestRoundtripSPIRV(); void registerTestRoundtripDebugSPIRV(); #ifdef MLIR_INCLUDE_TESTS void registerTestToLLVMIR(); +void registerTestFromLLVMIR(); #endif } // namespace mlir @@ -31,6 +32,7 @@ static void registerTestTranslations() { registerTestRoundtripDebugSPIRV(); #ifdef MLIR_INCLUDE_TESTS registerTestToLLVMIR(); + registerTestFromLLVMIR(); #endif } diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index f05bcabb44174..701c35150f304 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -10,12 +10,19 @@ include(ExtendPath) +# The generated headers will be placed in clang's resource directory if present. +if(OPENMP_STANDALONE_BUILD OR NOT LLVM_RUNTIMES_BUILD) + set(LIBOMP_HEADERS_INTDIR ${CMAKE_CURRENT_BINARY_DIR}) +else() + set(LIBOMP_HEADERS_INTDIR ${LLVM_BINARY_DIR}/${LIBOMP_HEADERS_INSTALL_PATH}) +endif() + # Configure omp.h, kmp_config.h and omp-tools.h if necessary -configure_file(${LIBOMP_INC_DIR}/omp.h.var omp.h @ONLY) -configure_file(${LIBOMP_INC_DIR}/ompx.h.var ompx.h @ONLY) -configure_file(kmp_config.h.cmake kmp_config.h @ONLY) +configure_file(${LIBOMP_INC_DIR}/omp.h.var ${LIBOMP_HEADERS_INTDIR}/omp.h @ONLY) +configure_file(${LIBOMP_INC_DIR}/ompx.h.var ${LIBOMP_HEADERS_INTDIR}/ompx.h @ONLY) +configure_file(kmp_config.h.cmake ${LIBOMP_HEADERS_INTDIR}/kmp_config.h @ONLY) if(${LIBOMP_OMPT_SUPPORT}) - configure_file(${LIBOMP_INC_DIR}/omp-tools.h.var omp-tools.h @ONLY) + configure_file(${LIBOMP_INC_DIR}/omp-tools.h.var ${LIBOMP_HEADERS_INTDIR}/omp-tools.h @ONLY) endif() # Generate message catalog files: kmp_i18n_id.inc and kmp_i18n_default.inc @@ -48,6 +55,7 @@ include_directories( ${LIBOMP_SRC_DIR}/i18n ${LIBOMP_INC_DIR} ${LIBOMP_SRC_DIR}/thirdparty/ittnotify + ${LIBOMP_HEADERS_INTDIR} ) # Building with time profiling support requires LLVM directory includes. @@ -419,15 +427,15 @@ else() endif() install( FILES - ${CMAKE_CURRENT_BINARY_DIR}/omp.h - ${CMAKE_CURRENT_BINARY_DIR}/ompx.h + ${LIBOMP_HEADERS_INTDIR}/omp.h + ${LIBOMP_HEADERS_INTDIR}/ompx.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} ) if(${LIBOMP_OMPT_SUPPORT}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH}) + install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH}) # install under legacy name ompt.h - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) - set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE) + install(FILES ${LIBOMP_HEADERS_INTDIR}/omp-tools.h DESTINATION ${LIBOMP_HEADERS_INSTALL_PATH} RENAME ompt.h) + set(LIBOMP_OMP_TOOLS_INCLUDE_DIR ${LIBOMP_HEADERS_INTDIR} PARENT_SCOPE) endif() if(${BUILD_FORTRAN_MODULES}) set (destination ${LIBOMP_HEADERS_INSTALL_PATH}) diff --git a/openmp/runtime/test/affinity/kmp-abs-hw-subset.c b/openmp/runtime/test/affinity/kmp-abs-hw-subset.c index 7b3493f1e5c49..025a239a1b839 100644 --- a/openmp/runtime/test/affinity/kmp-abs-hw-subset.c +++ b/openmp/runtime/test/affinity/kmp-abs-hw-subset.c @@ -6,6 +6,12 @@ // RUN: env OMP_PLACES=threads %libomp-run 3 1 // RUN: env OMP_PLACES=threads %libomp-run 3 2 // REQUIRES: linux +// +// The test requires topologies with sockets, cores, threads layers where +// the socket layer contains multiple threads. +// The s390x architecture does not produce this topology and seems to have +// one thread per socket. +// UNSUPPORTED: s390x-target-arch #include #include diff --git a/polly/lib/Analysis/ScopDetectionDiagnostic.cpp b/polly/lib/Analysis/ScopDetectionDiagnostic.cpp index 30fbd17c78bfe..d2fbcf7319856 100644 --- a/polly/lib/Analysis/ScopDetectionDiagnostic.cpp +++ b/polly/lib/Analysis/ScopDetectionDiagnostic.cpp @@ -45,7 +45,7 @@ using namespace llvm; #define DEBUG_TYPE "polly-detect" #define SCOP_STAT(NAME, DESC) \ - { "polly-detect", "NAME", "Number of rejected regions: " DESC } + {"polly-detect", "NAME", "Number of rejected regions: " DESC} static Statistic RejectStatistics[] = { SCOP_STAT(CFG, ""), diff --git a/polly/lib/Support/GICHelper.cpp b/polly/lib/Support/GICHelper.cpp index 0e491944c162e..04d422cf99462 100644 --- a/polly/lib/Support/GICHelper.cpp +++ b/polly/lib/Support/GICHelper.cpp @@ -83,10 +83,10 @@ APInt polly::APIntFromVal(__isl_take isl_val *Val) { } template -static inline std::string stringFromIslObjInternal(__isl_keep ISLTy *isl_obj, - ISL_CTX_GETTER ctx_getter_fn, - ISL_PRINTER printer_fn, - std::string DefaultValue) { +static inline std::string +stringFromIslObjInternal(__isl_keep ISLTy *isl_obj, + ISL_CTX_GETTER ctx_getter_fn, ISL_PRINTER printer_fn, + const std::string &DefaultValue) { if (!isl_obj) return DefaultValue; isl_ctx *ctx = ctx_getter_fn(isl_obj); @@ -134,12 +134,11 @@ ISL_C_OBJECT_TO_STRING(union_map) ISL_C_OBJECT_TO_STRING(union_pw_aff) ISL_C_OBJECT_TO_STRING(union_pw_multi_aff) -static void replace(std::string &str, const std::string &find, - const std::string &replace) { +static void replace(std::string &str, StringRef find, StringRef replace) { size_t pos = 0; while ((pos = str.find(find, pos)) != std::string::npos) { - str.replace(pos, find.length(), replace); - pos += replace.length(); + str.replace(pos, find.size(), replace); + pos += replace.size(); } } diff --git a/polly/lib/Transform/MatmulOptimizer.cpp b/polly/lib/Transform/MatmulOptimizer.cpp index 51ae5a778e4fa..ff1683b2d63c5 100644 --- a/polly/lib/Transform/MatmulOptimizer.cpp +++ b/polly/lib/Transform/MatmulOptimizer.cpp @@ -598,7 +598,7 @@ createMacroKernel(isl::schedule_node Node, /// @param MMI Parameters of the matrix multiplication operands. /// @return The size of the widest type of the matrix multiplication operands /// in bytes, including alignment padding. -static uint64_t getMatMulAlignTypeSize(MatMulInfoTy MMI) { +static uint64_t getMatMulAlignTypeSize(const MatMulInfoTy &MMI) { auto *S = MMI.A->getStatement()->getParent(); auto &DL = S->getFunction().getParent()->getDataLayout(); auto ElementSizeA = DL.getTypeAllocSize(MMI.A->getElementType()); @@ -613,7 +613,7 @@ static uint64_t getMatMulAlignTypeSize(MatMulInfoTy MMI) { /// @param MMI Parameters of the matrix multiplication operands. /// @return The size of the widest type of the matrix multiplication operands /// in bits. -static uint64_t getMatMulTypeSize(MatMulInfoTy MMI) { +static uint64_t getMatMulTypeSize(const MatMulInfoTy &MMI) { auto *S = MMI.A->getStatement()->getParent(); auto &DL = S->getFunction().getParent()->getDataLayout(); auto ElementSizeA = DL.getTypeSizeInBits(MMI.A->getElementType()); @@ -635,7 +635,7 @@ static uint64_t getMatMulTypeSize(MatMulInfoTy MMI) { /// @return The structure of type MicroKernelParamsTy. /// @see MicroKernelParamsTy static MicroKernelParamsTy getMicroKernelParams(const TargetTransformInfo *TTI, - MatMulInfoTy MMI) { + const MatMulInfoTy &MMI) { assert(TTI && "The target transform info should be provided."); // Nvec - Number of double-precision floating-point numbers that can be hold @@ -712,7 +712,7 @@ static void getTargetCacheParameters(const llvm::TargetTransformInfo *TTI) { static MacroKernelParamsTy getMacroKernelParams(const llvm::TargetTransformInfo *TTI, const MicroKernelParamsTy &MicroKernelParams, - MatMulInfoTy MMI) { + const MatMulInfoTy &MMI) { getTargetCacheParameters(TTI); // According to www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf, // it requires information about the first two levels of a cache to determine diff --git a/utils/bazel/.bazelrc b/utils/bazel/.bazelrc index 1d7cf4a4df1b1..5a6d1889076af 100644 --- a/utils/bazel/.bazelrc +++ b/utils/bazel/.bazelrc @@ -9,10 +9,16 @@ # Prevent invalid caching if input files are modified during a build. build --experimental_guard_against_concurrent_changes +# Automatically enable --config=(linux|macos|windows) based on the host +build --enable_platform_specific_config + # In opt mode, bazel by default builds both PIC and non-PIC object files for # tests vs binaries. We don't need this feature and it slows down opt builds # considerably. -build --force_pic +# TODO: Remove platform specifics we're on bazel 7.x https://github.com/bazelbuild/bazel/issues/12439 +# Apple platforms always enable pic so this flag is unnecessary anyways +build:linux --force_pic +build:windows --force_pic # Shared objects take up more space. With fast linkers and binaries that aren't # super large, the benefits of shared objects are minimal. @@ -34,6 +40,9 @@ build --incompatible_no_implicit_file_export # eventually become the default common --incompatible_disallow_empty_glob +# TODO: Remove once we move to bazel 7.x +build --experimental_cc_shared_library + ############################################################################### # Options to select different strategies for linking potential dependent # libraries. The default leaves it disabled. diff --git a/utils/bazel/configure.bzl b/utils/bazel/configure.bzl index d6cd6aa0813e4..717b86d7d6e8a 100644 --- a/utils/bazel/configure.bzl +++ b/utils/bazel/configure.bzl @@ -4,8 +4,6 @@ """Helper macros to configure the LLVM overlay project.""" -load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") - # Directory of overlay files relative to WORKSPACE DEFAULT_OVERLAY_PATH = "llvm-project-overlay" @@ -77,6 +75,7 @@ def _extract_cmake_settings(repository_ctx, llvm_cmake): "LLVM_VERSION_MAJOR": None, "LLVM_VERSION_MINOR": None, "LLVM_VERSION_PATCH": None, + "LLVM_VERSION_SUFFIX": None, } # It would be easier to use external commands like sed(1) and python. @@ -126,6 +125,13 @@ def _extract_cmake_settings(repository_ctx, llvm_cmake): c["LLVM_VERSION_PATCH"], ) + c["PACKAGE_VERSION"] = "{}.{}.{}{}".format( + c["LLVM_VERSION_MAJOR"], + c["LLVM_VERSION_MINOR"], + c["LLVM_VERSION_PATCH"], + c["LLVM_VERSION_SUFFIX"], + ) + return c def _write_dict_to_file(repository_ctx, filepath, header, vars): diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 14f2b453a7660..2c3c39e5b53df 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -2,7 +2,6 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -load("@bazel_skylib//rules:expand_template.bzl", "expand_template") load( "//:vars.bzl", "LLVM_VERSION", @@ -13,6 +12,7 @@ load( load("//:workspace_root.bzl", "workspace_root") load("//llvm:binary_alias.bzl", "binary_alias") load("//llvm:cc_plugin_library.bzl", "cc_plugin_library") +load("//llvm:driver.bzl", "llvm_driver_cc_binary") load("//llvm:tblgen.bzl", "gentbl") package( @@ -736,14 +736,6 @@ gentbl( "-gen-clang-attr-impl", "include/clang/AST/AttrImpl.inc", ), - ( - "-gen-clang-attr-can-print-left-list", - "include/clang/Basic/AttrLeftSideCanPrintList.inc", - ), - ( - "-gen-clang-attr-must-print-left-list", - "include/clang/Basic/AttrLeftSideMustPrintList.inc", - ), ], tblgen = ":clang-tblgen", td_file = "include/clang/Basic/Attr.td", @@ -2042,6 +2034,7 @@ cc_library( name = "install_api", srcs = glob([ "lib/InstallAPI/*.cpp", + "lib/InstallAPI/*.h", ]), hdrs = glob([ "include/clang/InstallAPI/*.h", @@ -2320,20 +2313,9 @@ cc_binary( ], ) -expand_template( - name = "clang_main", - out = "clang-driver.cpp", - substitutions = { - "@TOOL_NAME@": "clang", - }, - template = "//llvm:cmake/modules/llvm-driver-template.cpp.in", -) - cc_library( name = "clang-driver", - srcs = glob([ - "tools/driver/*.cpp", - ]) + ["clang-driver.cpp"], + srcs = glob(["tools/driver/*.cpp"]), copts = [ # Disable stack frame size checks in the driver because # clang::ensureStackAddressSpace allocates a large array on the stack. @@ -2371,13 +2353,10 @@ cc_library( ], ) -cc_binary( +llvm_driver_cc_binary( name = "clang", - srcs = [], stamp = 0, - deps = [ - ":clang-driver", - ], + deps = [":clang-driver"], ) cc_binary( @@ -2622,19 +2601,9 @@ gentbl( td_srcs = ["//llvm:include/llvm/Option/OptParser.td"], ) -expand_template( - name = "clang-scan-deps-main", - out = "clang-scan-deps-driver.cpp", - substitutions = { - "@TOOL_NAME@": "clang_scan_deps", - }, - template = "//llvm:cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "clang-scan-deps", - srcs = glob(["tools/clang-scan-deps/*.cpp"]) + ["clang-scan-deps-driver.cpp"], - stamp = 0, +cc_library( + name = "clang-scan-deps-lib", + srcs = glob(["tools/clang-scan-deps/*.cpp"]), deps = [ ":ScanDepsTableGen", ":driver", @@ -2646,6 +2615,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "clang-scan-deps", + stamp = 0, + deps = [":clang-scan-deps-lib"], +) + cc_library( name = "extract_api", srcs = glob([ diff --git a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel index 577e6c033b4e8..9a4e103be01e2 100644 --- a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel @@ -54,3 +54,57 @@ cc_library( ":config", ], ) + +cc_library( + name = "orc_rt_common_headers", + hdrs = [ + "lib/orc/adt.h", + "lib/orc/bitmask_enum.h", + "lib/orc/common.h", + "lib/orc/compiler.h", + "lib/orc/debug.h", + "lib/orc/endianness.h", + "lib/orc/error.h", + "lib/orc/executor_address.h", + "lib/orc/executor_symbol_def.h", + "lib/orc/extensible_rtti.h", + "lib/orc/interval_map.h", + "lib/orc/interval_set.h", + "lib/orc/simple_packed_serialization.h", + "lib/orc/stl_extras.h", + "lib/orc/string_pool.h", + "lib/orc/wrapper_function_utils.h", + ], + strip_include_prefix = "lib/orc", +) + +cc_library( + name = "orc_rt", + srcs = [ + "lib/orc/debug.cpp", + "lib/orc/dlfcn_wrapper.cpp", + "lib/orc/extensible_rtti.cpp", + "lib/orc/log_error_to_stderr.cpp", + "lib/orc/run_program_wrapper.cpp", + ] + select({ + "@platforms//os:macos": [ + "lib/orc/macho_platform.cpp", + "lib/orc/macho_platform.h", + "lib/orc/macho_tlv.arm64.S", + "lib/orc/macho_tlv.x86-64.S", + ], + "@platforms//os:linux": [ + "lib/orc/elfnix_platform.cpp", + "lib/orc/elfnix_platform.h", + "lib/orc/elfnix_tls.aarch64.S", + "lib/orc/elfnix_tls.ppc64.S", + "lib/orc/elfnix_tls.x86-64.S", + ], + }), + hdrs = glob(["include/orc_rt/*.h"]), + includes = ["include"], + linkstatic = True, + deps = [ + ":orc_rt_common_headers", + ], +) diff --git a/utils/bazel/llvm-project-overlay/compiler-rt/lib/orc/tests/unit/BUILD.bazel b/utils/bazel/llvm-project-overlay/compiler-rt/lib/orc/tests/unit/BUILD.bazel new file mode 100644 index 0000000000000..520bf2ab397a0 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/compiler-rt/lib/orc/tests/unit/BUILD.bazel @@ -0,0 +1,28 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], +) + +licenses(["notice"]) + +[ + cc_test( + name = "{}".format(file.split(".")[0]), + srcs = [file] + glob(["*.h"]), + deps = [ + "//compiler-rt:orc_rt", + "//compiler-rt:orc_rt_common_headers", + "//third-party/unittest:gtest", + "//third-party/unittest:gtest_main", + ], + ) + for file in glob( + ["*.cpp"], + # TODO: Broken with older libc++ versions, currently unused anyways + exclude = ["interval_set_test.cpp"], + ) +] diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 9dfe4c48184e3..d38dc3029f74f 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -68,7 +68,6 @@ libc_support_library( name = "llvm_libc_macros_math_macros", hdrs = ["include/llvm-libc-macros/math-macros.h"], deps = [":llvm_libc_macros_limits_macros"], - defines = ["__FP_LOGBNAN_MIN"], ) libc_support_library( @@ -95,6 +94,10 @@ libc_support_library( libc_support_library( name = "llvm_libc_types_float128", hdrs = ["include/llvm-libc-types/float128.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [":llvm_libc_macros_float_macros"], ) @@ -103,6 +106,18 @@ libc_support_library( hdrs = ["include/llvm-libc-macros/linux/fcntl-macros.h"], ) +############################ Proxy Header Files ################################ + +libc_support_library( + name = "hdr_math_macros", + hdrs = ["hdr/math_macros.h"], +) + +libc_support_library( + name = "hdr_fenv_macros", + hdrs = ["hdr/fenv_macros.h"], +) + ############################### Support libraries ############################## libc_support_library( @@ -473,12 +488,12 @@ libc_support_library( "src/__support/ryu_long_double_constants.h", ], deps = [ + ":__support_big_int", ":__support_common", ":__support_cpp_type_traits", ":__support_fputil_dyadic_float", ":__support_fputil_fp_bits", ":__support_libc_assert", - ":__support_uint", ], ) @@ -491,8 +506,8 @@ libc_support_library( ) libc_support_library( - name = "__support_uint", - hdrs = ["src/__support/UInt.h"], + name = "__support_big_int", + hdrs = ["src/__support/big_int.h"], deps = [ ":__support_cpp_array", ":__support_cpp_bit", @@ -517,11 +532,11 @@ libc_support_library( libc_support_library( name = "__support_uint128", - hdrs = ["src/__support/UInt128.h"], + hdrs = ["src/__support/uint128.h"], deps = [ + ":__support_big_int", ":__support_macros_attributes", ":__support_macros_properties_types", - ":__support_uint", ], ) @@ -550,6 +565,7 @@ libc_support_library( name = "__support_integer_to_string", hdrs = ["src/__support/integer_to_string.h"], deps = [ + ":__support_big_int", ":__support_common", ":__support_cpp_algorithm", ":__support_cpp_bit", @@ -558,7 +574,6 @@ libc_support_library( ":__support_cpp_span", ":__support_cpp_string_view", ":__support_cpp_type_traits", - ":__support_uint", ], ) @@ -692,7 +707,6 @@ libc_support_library( ":__support_cpp_type_traits", ":__support_fputil_fenv_impl", ":__support_fputil_fp_bits", - ":math_utils", ], ) @@ -733,7 +747,8 @@ libc_support_library( ":__support_macros_properties_architectures", ":__support_macros_sanitizer", ":errno", - ":llvm_libc_macros_math_macros", + ":hdr_fenv_macros", + ":hdr_math_macros", ], ) @@ -742,6 +757,7 @@ libc_support_library( hdrs = ["src/__support/FPUtil/rounding_mode.h"], deps = [ ":__support_macros_attributes", + ":hdr_fenv_macros", ], ) @@ -807,7 +823,7 @@ libc_support_library( ":__support_fputil_normal_float", ":__support_macros_optimization", ":__support_uint128", - ":llvm_libc_macros_math_macros", + ":hdr_math_macros", ], ) @@ -821,7 +837,7 @@ libc_support_library( ":__support_fputil_fp_bits", ":__support_fputil_rounding_mode", ":__support_macros_attributes", - ":llvm_libc_macros_math_macros", + ":hdr_math_macros", ], ) @@ -962,22 +978,28 @@ libc_support_library( name = "__support_fputil_dyadic_float", hdrs = ["src/__support/FPUtil/dyadic_float.h"], deps = [ + ":__support_big_int", ":__support_common", ":__support_fputil_fp_bits", ":__support_fputil_multiply_add", ":__support_macros_optimization", - ":__support_uint", ], ) libc_support_library( name = "__support_osutil_syscall", hdrs = ["src/__support/OSUtil/syscall.h"], - textual_hdrs = [ - "src/__support/OSUtil/linux/syscall.h", - "src/__support/OSUtil/linux/aarch64/syscall.h", - "src/__support/OSUtil/linux/x86_64/syscall.h", - ], + textual_hdrs = select({ + "@platforms//os:macos": [ + "src/__support/OSUtil/darwin/syscall.h", + "src/__support/OSUtil/darwin/arm/syscall.h", + ], + "@platforms//os:linux": [ + "src/__support/OSUtil/linux/syscall.h", + "src/__support/OSUtil/linux/aarch64/syscall.h", + "src/__support/OSUtil/linux/x86_64/syscall.h", + ], + }), deps = [ ":__support_common", ":__support_cpp_bit", @@ -987,9 +1009,10 @@ libc_support_library( libc_support_library( name = "__support_osutil_io", hdrs = ["src/__support/OSUtil/io.h"], - textual_hdrs = [ - "src/__support/OSUtil/linux/io.h", - ], + textual_hdrs = select({ + "@platforms//os:macos": ["src/__support/OSUtil/darwin/io.h"], + "@platforms//os:linux": ["src/__support/OSUtil/linux/io.h"], + }), deps = [ ":__support_common", ":__support_cpp_string_view", @@ -1000,8 +1023,12 @@ libc_support_library( libc_support_library( name = "__support_osutil_quick_exit", - hdrs = ["src/__support/OSUtil/quick_exit.h"], srcs = ["src/__support/OSUtil/linux/quick_exit.cpp"], + hdrs = ["src/__support/OSUtil/quick_exit.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [ ":__support_osutil_syscall", ], @@ -1020,6 +1047,10 @@ libc_support_library( "src/__support/StringUtil/error_to_string.h", "src/__support/StringUtil/signal_to_string.h", ], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [ ":__support_cpp_array", ":__support_cpp_span", @@ -1037,6 +1068,10 @@ libc_support_library( "src/__support/threads/mutex.h", "src/__support/threads/mutex_common.h", ], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), textual_hdrs = [ "src/__support/threads/linux/mutex.h", "src/__support/threads/linux/futex_word.h", @@ -1172,6 +1207,16 @@ libc_function( ], ) +libc_function( + name = "fesetexcept", + srcs = ["src/fenv/fesetexcept.cpp"], + hdrs = ["src/fenv/fesetexcept.h"], + deps = [ + ":__support_common", + ":__support_fputil_fenv_impl", + ], +) + libc_function( name = "fegetexceptflag", srcs = ["src/fenv/fegetexceptflag.cpp"], @@ -1204,19 +1249,6 @@ libc_function( ################################ math targets ################################ -libc_support_library( - name = "math_utils", - srcs = ["src/math/generic/math_utils.cpp"], - hdrs = ["src/math/generic/math_utils.h"], - deps = [ - "__support_cpp_bit", - "__support_cpp_type_traits", - ":__support_common", - ":errno", - ":llvm_libc_macros_math_macros", - ], -) - libc_support_library( name = "common_constants", srcs = ["src/math/generic/common_constants.cpp"], @@ -1265,7 +1297,6 @@ libc_support_library( ":__support_fputil_nearest_integer", ":__support_fputil_polyeval", ":common_constants", - ":math_utils", ], ) @@ -1673,7 +1704,6 @@ libc_math_function( ":__support_fputil_rounding_mode", ":__support_macros_optimization", ":inv_trigf_utils", - ":math_utils", ], ) @@ -1701,12 +1731,16 @@ libc_math_function(name = "fabsf") libc_math_function(name = "fabsl") +libc_math_function(name = "fabsf128") + libc_math_function(name = "fdim") libc_math_function(name = "fdimf") libc_math_function(name = "fdiml") +libc_math_function(name = "fdimf128") + libc_math_function( name = "ceil", specializations = [ @@ -1730,6 +1764,8 @@ libc_math_function( ], ) +libc_math_function(name = "ceilf128") + libc_math_function( name = "floor", specializations = [ @@ -1748,12 +1784,16 @@ libc_math_function( libc_math_function(name = "floorl") +libc_math_function(name = "floorf128") + libc_math_function(name = "ldexp") libc_math_function(name = "ldexpf") libc_math_function(name = "ldexpl") +libc_math_function(name = "ldexpf128") + libc_math_function( name = "trunc", specializations = [ @@ -1772,6 +1812,8 @@ libc_math_function( libc_math_function(name = "truncl") +libc_math_function(name = "truncf128") + libc_math_function( name = "round", specializations = [ @@ -1790,6 +1832,8 @@ libc_math_function( libc_math_function(name = "roundl") +libc_math_function(name = "roundf128") + libc_math_function( name = "fmod", additional_deps = [ @@ -1810,6 +1854,8 @@ libc_math_function(name = "frexpf") libc_math_function(name = "frexpl") +libc_math_function(name = "frexpf128") + libc_math_function(name = "hypot") libc_math_function( @@ -1825,12 +1871,16 @@ libc_math_function(name = "logbf") libc_math_function(name = "logbl") +libc_math_function(name = "logbf128") + libc_math_function(name = "modf") libc_math_function(name = "modff") libc_math_function(name = "modfl") +libc_math_function(name = "modff128") + libc_math_function(name = "remquo") libc_math_function(name = "remquof") @@ -1849,12 +1899,16 @@ libc_math_function(name = "fminf") libc_math_function(name = "fminl") +libc_math_function(name = "fminf128") + libc_math_function(name = "fmax") libc_math_function(name = "fmaxf") libc_math_function(name = "fmaxl") +libc_math_function(name = "fmaxf128") + libc_math_function( name = "cosf", additional_deps = [ @@ -1927,13 +1981,25 @@ libc_math_function( ], ) +libc_math_function( + name = "sqrtf128", + additional_deps = [ + ":__support_fputil_sqrt", + ], +) + libc_math_function(name = "copysign") libc_math_function(name = "copysignf") libc_math_function(name = "copysignl") -libc_math_function(name = "copysignf128") +libc_math_function( + name = "copysignf128", + additional_deps = [ + ":llvm_libc_types_float128", + ], +) libc_math_function(name = "ilogb") @@ -1941,36 +2007,48 @@ libc_math_function(name = "ilogbf") libc_math_function(name = "ilogbl") +libc_math_function(name = "ilogbf128") + libc_math_function(name = "rint") libc_math_function(name = "rintf") libc_math_function(name = "rintl") +libc_math_function(name = "rintf128") + libc_math_function(name = "lrint") libc_math_function(name = "lrintf") libc_math_function(name = "lrintl") +libc_math_function(name = "lrintf128") + libc_math_function(name = "llrint") libc_math_function(name = "llrintf") libc_math_function(name = "llrintl") +libc_math_function(name = "llrintf128") + libc_math_function(name = "lround") libc_math_function(name = "lroundf") libc_math_function(name = "lroundl") +libc_math_function(name = "lroundf128") + libc_math_function(name = "llround") libc_math_function(name = "llroundf") libc_math_function(name = "llroundl") +libc_math_function(name = "llroundf128") + libc_math_function( name = "nan", additional_deps = [ @@ -1995,6 +2073,14 @@ libc_math_function( ], ) +libc_math_function( + name = "nanf128", + additional_deps = [ + ":__support_str_to_float", + ":errno", + ], +) + libc_math_function(name = "nearbyint") libc_math_function(name = "nearbyintf") @@ -2007,6 +2093,8 @@ libc_math_function(name = "nextafterf") libc_math_function(name = "nextafterl") +libc_math_function(name = "nextafterf128") + libc_math_function(name = "nexttoward") libc_math_function(name = "nexttowardf") @@ -2598,6 +2686,10 @@ libc_function( name = "open", srcs = ["src/fcntl/linux/open.cpp"], hdrs = ["src/fcntl/open.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [ ":__support_common", ":__support_osutil_syscall", @@ -2609,6 +2701,10 @@ libc_function( name = "openat", srcs = ["src/fcntl/linux/openat.cpp"], hdrs = ["src/fcntl/openat.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [ ":__support_common", ":__support_osutil_syscall", @@ -2688,6 +2784,10 @@ libc_function( name = "dup3", srcs = ["src/unistd/linux/dup3.cpp"], hdrs = ["src/unistd/dup3.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [ ":__support_common", ":__support_osutil_syscall", @@ -2857,6 +2957,10 @@ libc_function( name = "pread", srcs = ["src/unistd/linux/pread.cpp"], hdrs = ["src/unistd/pread.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [ ":__support_common", ":__support_osutil_syscall", @@ -2868,6 +2972,10 @@ libc_function( name = "pwrite", srcs = ["src/unistd/linux/pwrite.cpp"], hdrs = ["src/unistd/pwrite.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [ ":__support_common", ":__support_osutil_syscall", @@ -3105,6 +3213,7 @@ libc_support_library( ], defines = PRINTF_COPTS, deps = [ + ":__support_big_int", ":__support_common", ":__support_cpp_limits", ":__support_cpp_span", @@ -3115,7 +3224,6 @@ libc_support_library( ":__support_fputil_rounding_mode", ":__support_integer_to_string", ":__support_libc_assert", - ":__support_uint", ":__support_uint128", ":printf_config", ":printf_core_structs", @@ -3271,6 +3379,10 @@ libc_function( name = "rename", srcs = ["src/stdio/linux/rename.cpp"], hdrs = ["src/stdio/rename.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), deps = [ ":__support_common", ":__support_osutil_syscall", @@ -3309,6 +3421,10 @@ libc_function( name = "epoll_wait", srcs = ["src/sys/epoll/linux/epoll_wait.cpp"], hdrs = ["src/sys/epoll/epoll_wait.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), weak = True, deps = [ ":__support_osutil_syscall", @@ -3320,6 +3436,10 @@ libc_function( name = "epoll_pwait", srcs = ["src/sys/epoll/linux/epoll_pwait.cpp"], hdrs = ["src/sys/epoll/epoll_pwait.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), weak = True, deps = [ ":__support_osutil_syscall", diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index d2087a3d528f4..82c015a7eeda0 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -15,11 +15,11 @@ libc_support_library( srcs = ["TestLogger.cpp"], hdrs = ["TestLogger.h"], deps = [ + "//libc:__support_big_int", "//libc:__support_cpp_string", "//libc:__support_cpp_string_view", "//libc:__support_macros_properties_types", "//libc:__support_osutil_io", - "//libc:__support_uint", "//libc:__support_uint128", ], ) @@ -85,7 +85,8 @@ libc_support_library( "//libc:__support_fputil_fp_bits", "//libc:__support_fputil_fpbits_str", "//libc:__support_fputil_rounding_mode", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", + "//libc:hdr_fenv_macros", ], ) @@ -128,8 +129,8 @@ libc_support_library( "StringUtils.h", ], deps = [ + "//libc:__support_big_int", "//libc:__support_cpp_string", "//libc:__support_cpp_type_traits", - "//libc:__support_uint", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index c0d402a89ea3c..3980ef60c197e 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -64,12 +64,12 @@ libc_test( name = "integer_to_string_test", srcs = ["integer_to_string_test.cpp"], deps = [ + "//libc:__support_big_int", "//libc:__support_cpp_limits", "//libc:__support_cpp_span", "//libc:__support_cpp_string_view", "//libc:__support_integer_literals", "//libc:__support_integer_to_string", - "//libc:__support_uint", "//libc:__support_uint128", ], ) @@ -83,13 +83,14 @@ libc_test( ) libc_test( - name = "uint_test", - srcs = ["uint_test.cpp"], + name = "big_int_test", + srcs = ["big_int_test.cpp"], deps = [ + "//libc:__support_big_int", "//libc:__support_cpp_optional", "//libc:__support_integer_literals", "//libc:__support_macros_properties_types", - "//libc:__support_uint", + "//libc:hdr_math_macros", "//libc:llvm_libc_macros_math_macros", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/CPP/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/CPP/BUILD.bazel index dad1c7708e448..96dafbc6da485 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/CPP/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/CPP/BUILD.bazel @@ -26,9 +26,9 @@ libc_test( name = "bit_test", srcs = ["bit_test.cpp"], deps = [ + "//libc:__support_big_int", "//libc:__support_cpp_bit", "//libc:__support_macros_properties_types", - "//libc:__support_uint", ], ) @@ -48,9 +48,9 @@ libc_test( name = "limits_test", srcs = ["limits_test.cpp"], deps = [ + "//libc:__support_big_int", "//libc:__support_cpp_limits", "//libc:__support_macros_properties_types", - "//libc:__support_uint", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel index 18683e42724a5..ff3b035f64ade 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/FPUtil/BUILD.bazel @@ -26,8 +26,8 @@ libc_test( srcs = ["dyadic_float_test.cpp"], copts = ["-frounding-math"], deps = [ + "//libc:__support_big_int", "//libc:__support_fputil_dyadic_float", - "//libc:__support_uint", "//libc:__support_uint128", "//libc/test/UnitTest:fp_test_helpers", "//libc/utils/MPFRWrapper:mpfr_wrapper", @@ -41,5 +41,6 @@ libc_test( "//libc:__support_fputil_rounding_mode", "//libc:__support_uint128", "//libc/utils/MPFRWrapper:mpfr_wrapper", + "//libc:hdr_fenv_macros", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/fenv/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/fenv/BUILD.bazel index f64f78c113c01..bce1dd786a850 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/fenv/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/fenv/BUILD.bazel @@ -16,10 +16,12 @@ libc_test( libc_function_deps = [ "//libc:feclearexcept", "//libc:feraiseexcept", + "//libc:fesetexcept", "//libc:fetestexcept", ], deps = [ "//libc:__support_fputil_fenv_impl", + "//libc:hdr_fenv_macros", ], ) @@ -30,6 +32,7 @@ libc_test( "//libc:fegetround", "//libc:fesetround", ], + deps = ["//libc:hdr_fenv_macros"], ) libc_test( @@ -46,6 +49,7 @@ libc_test( "//libc:__support_fputil_fenv_impl", "//libc:__support_macros_properties_architectures", "//libc/test/UnitTest:fp_test_helpers", + "//libc:hdr_fenv_macros", ], ) @@ -84,6 +88,7 @@ libc_test( ], deps = [ "//libc:__support_fputil_fenv_impl", + "//libc:hdr_fenv_macros", ], ) @@ -98,6 +103,7 @@ libc_test( deps = [ "//libc:__support_common", "//libc:__support_macros_properties_architectures", + "//libc:hdr_fenv_macros", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel index 15e367f0aca2d..e30c8bf023cf2 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/BUILD.bazel @@ -178,7 +178,7 @@ libc_support_library( deps = [ "//libc:__support_fputil_basic_operations", "//libc:__support_fputil_fp_bits", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", "//libc/test/UnitTest:LibcUnitTest", "//libc/test/UnitTest:fp_test_helpers", "//libc/utils/MPFRWrapper:mpfr_wrapper", @@ -297,7 +297,7 @@ libc_support_library( "//libc:__support_cpp_limits", "//libc:__support_fputil_fp_bits", "//libc:__support_fputil_manipulation_functions", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", "//libc/test/UnitTest:LibcUnitTest", ], ) @@ -324,7 +324,7 @@ libc_support_library( "//libc:__support_fputil_basic_operations", "//libc:__support_fputil_fenv_impl", "//libc:__support_fputil_fp_bits", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", "//libc/test/UnitTest:LibcUnitTest", "//libc/test/UnitTest:fp_test_helpers", ], @@ -352,7 +352,7 @@ libc_support_library( "//libc:__support_cpp_limits", "//libc:__support_fputil_fp_bits", "//libc:__support_fputil_normal_float", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", "//libc/test/UnitTest:LibcUnitTest", "//libc/test/UnitTest:fp_test_helpers", ], @@ -379,7 +379,8 @@ libc_support_library( deps = [ "//libc:__support_fputil_fenv_impl", "//libc:__support_fputil_fp_bits", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_fenv_macros", + "//libc:hdr_math_macros", "//libc/test/UnitTest:LibcUnitTest", "//libc/test/UnitTest:fp_test_helpers", "//libc/utils/MPFRWrapper:mpfr_wrapper", @@ -416,7 +417,7 @@ libc_support_library( deps = [ "//libc:__support_fputil_fenv_impl", "//libc:__support_fputil_fp_bits", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", "//libc/test/UnitTest:LibcUnitTest", "//libc/test/UnitTest:fp_test_helpers", "//libc/utils/MPFRWrapper:mpfr_wrapper", @@ -528,7 +529,7 @@ libc_support_library( "//libc:__support_cpp_type_traits", "//libc:__support_fputil_basic_operations", "//libc:__support_fputil_fp_bits", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", "//libc/test/UnitTest:LibcUnitTest", "//libc/test/UnitTest:fp_test_helpers", ], diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl b/utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl index 1a5868d242e80..da4964bd89824 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl +++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/libc_math_test_rules.bzl @@ -34,7 +34,7 @@ def math_test(name, hdrs = [], deps = [], **kwargs): "//libc:__support_math_extras", "//libc:__support_uint128", "//libc/test/UnitTest:fp_test_helpers", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", ] + deps, **kwargs ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel new file mode 100644 index 0000000000000..7d4b9978db3f2 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel @@ -0,0 +1,148 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Smoke tests for LLVM libc math.h functions. + +load("//libc:libc_build_rules.bzl", "libc_support_library") +load("//libc/test/src/math:libc_math_test_rules.bzl", "math_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +math_test( + name = "fabsf128", + hdrs = ["FAbsTest.h"], +) + +math_test( + name = "ceilf128", + hdrs = ["CeilTest.h"], +) + +math_test( + name = "floorf128", + hdrs = ["FloorTest.h"], +) + +math_test( + name = "truncf128", + hdrs = ["TruncTest.h"], +) + +math_test( + name = "roundf128", + hdrs = ["RoundTest.h"], +) + +math_test( + name = "frexpf128", + hdrs = ["FrexpTest.h"], +) + +math_test( + name = "logbf128", + hdrs = ["LogbTest.h"], +) + +math_test( + name = "modff128", + hdrs = ["ModfTest.h"], +) + +math_test( + name = "fminf128", + hdrs = ["FMinTest.h"], +) + +math_test( + name = "fmaxf128", + hdrs = ["FMaxTest.h"], +) + +math_test( + name = "sqrtf128", + hdrs = ["SqrtTest.h"], + deps = ["//libc:__support_cpp_bit"], +) + +math_test( + name = "copysignf128", + hdrs = ["CopySignTest.h"], +) + +math_test( + name = "ilogbf128", + hdrs = ["ILogbTest.h"], + deps = ["//libc:__support_cpp_limits"], +) + +math_test( + name = "fdimf128", + hdrs = ["FDimTest.h"], +) + +libc_support_library( + name = "ldexp_test_template", + hdrs = ["LdExpTest.h"], + deps = [ + "//libc:__support_cpp_limits", + "//libc:__support_fputil_fp_bits", + "//libc:__support_fputil_normal_float", + "//libc:hdr_math_macros", + "//libc/test/UnitTest:LibcUnitTest", + "//libc/test/UnitTest:fp_test_helpers", + ], +) + +math_test( + name = "ldexpf128", + hdrs = ["LdExpTest.h"], + deps = ["//libc:__support_cpp_limits"], +) + +math_test( + name = "rintf128", + hdrs = ["RIntTest.h"], + deps = ["//libc:hdr_fenv_macros"], +) + +math_test( + name = "lrintf128", + hdrs = ["RoundToIntegerTest.h"], +) + +math_test( + name = "llrintf128", + hdrs = ["RoundToIntegerTest.h"], +) +math_test( + name = "lroundf128", + hdrs = ["RoundToIntegerTest.h"], +) + +math_test( + name = "llroundf128", + hdrs = ["RoundToIntegerTest.h"], +) + +libc_support_library( + name = "nextafter_test_template", + hdrs = ["NextAfterTest.h"], + deps = [ + "//libc:__support_cpp_array", + "//libc:__support_cpp_bit", + "//libc:__support_cpp_type_traits", + "//libc:__support_fputil_basic_operations", + "//libc:__support_fputil_fp_bits", + "//libc:hdr_math_macros", + "//libc/test/UnitTest:LibcUnitTest", + "//libc/test/UnitTest:fp_test_helpers", + ], +) + +math_test( + name = "nextafterf128", + deps = [":nextafter_test_template"], +) diff --git a/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel index 5f59d70ecc16d..53a8c9b9476f3 100644 --- a/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/utils/MPFRWrapper/BUILD.bazel @@ -46,7 +46,7 @@ libc_support_library( "//libc:__support_cpp_type_traits", "//libc:__support_fputil_fp_bits", "//libc:__support_fputil_fpbits_str", - "//libc:llvm_libc_macros_math_macros", + "//libc:hdr_math_macros", "//libc/test/UnitTest:LibcUnitTest", "//libc/test/UnitTest:fp_test_helpers", "//libc/utils/MPFRWrapper:mpfr_impl", diff --git a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel index 5a494a13acea2..2ccd6fcae635d 100644 --- a/utils/bazel/llvm-project-overlay/lld/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lld/BUILD.bazel @@ -2,12 +2,12 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -load("@bazel_skylib//rules:expand_template.bzl", "expand_template") load( "//:vars.bzl", "LLVM_VERSION", ) load("//llvm:binary_alias.bzl", "binary_alias") +load("//llvm:driver.bzl", "llvm_driver_cc_binary") load("//llvm:tblgen.bzl", "gentbl") package( @@ -282,20 +282,9 @@ cc_library( ], ) -expand_template( - name = "lld_main", - out = "lld-driver.cpp", - substitutions = { - "@TOOL_NAME@": "lld", - }, - template = "//llvm:cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "lld", - srcs = glob([ - "tools/lld/*.cpp", - ]) + ["lld-driver.cpp"], +cc_library( + name = "lld-lib", + srcs = glob(["tools/lld/*.cpp"]), deps = [ ":COFF", ":Common", @@ -308,6 +297,11 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "lld", + deps = [":lld-lib"], +) + # These are the required names for lld running under different environs. # # Unix/Linux require that the binary be named "ld.lld". diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel new file mode 100644 index 0000000000000..6dfe8085b9285 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel @@ -0,0 +1,854 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("@bazel_skylib//lib:selects.bzl", "selects") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("//:vars.bzl", "LLVM_VERSION_MAJOR", "LLVM_VERSION_MINOR", "LLVM_VERSION_PATCH", "LLVM_VERSION_SUFFIX", "PACKAGE_VERSION") +load("//lldb/source/Plugins:plugin_config.bzl", "DEFAULT_PLUGINS", "DEFAULT_SCRIPT_PLUGINS", "OBJCPP_COPTS") +load("//mlir:tblgen.bzl", "gentbl_cc_library", "td_library") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], +) + +licenses(["notice"]) + +exports_files(["LICENSE.TXT"]) + +bool_flag( + name = "enable_curses", + build_setting_default = False, +) + +config_setting( + name = "curses_enabled_setting", + flag_values = {":enable_curses": "true"}, +) + +selects.config_setting_group( + name = "curses_enabled", + match_any = [ + ":curses_enabled_setting", + "@platforms//os:macos", + ], +) + +bool_flag( + name = "enable_libedit", + build_setting_default = False, +) + +config_setting( + name = "libedit_enabled_setting", + flag_values = {":enable_libedit": "true"}, +) + +selects.config_setting_group( + name = "libedit_enabled", + match_any = [ + ":libedit_enabled_setting", + "@platforms//os:macos", + ], +) + +_VERSION_SUBSTITUTIONS = { + "@LLDB_VERSION@": PACKAGE_VERSION, + "@LLDB_VERSION_MAJOR@": LLVM_VERSION_MAJOR, + "@LLDB_VERSION_MINOR@": LLVM_VERSION_MINOR, + "@LLDB_VERSION_PATCH@": LLVM_VERSION_PATCH, + "@LLDB_VERSION_SUFFIX@": LLVM_VERSION_SUFFIX, + '#cmakedefine LLDB_FULL_VERSION_STRING "@LLDB_FULL_VERSION_STRING@"': "/* #undef LLDB_FULL_VERSION_STRING */", +} + +genrule( + name = "vcs_version_gen", + outs = ["VCSVersion.inc"], + cmd = "echo '#undef LLDB_REVISION' >> $@\n" + + "echo '#undef LLDB_REPOSITORY' >> $@\n", +) + +expand_template( + name = "version_inc_gen", + out = "Version/Version.inc", + substitutions = _VERSION_SUBSTITUTIONS, + template = "include/lldb/Version/Version.inc.in", +) + +cc_library( + name = "Version", + srcs = [ + "source/Version/Version.cpp", + ":vcs_version_gen", + ":version_inc_gen", + ], + hdrs = ["include/lldb/Version/Version.h"], + features = ["-layering_check"], # Version.inc breaks this unintentionally + strip_include_prefix = "include", + deps = ["//clang:basic"], +) + +expand_template( + name = "ConfigHeader", + out = "include/lldb/Host/Config.h", + substitutions = { + "#cmakedefine01 HAVE_PTSNAME_R": "#define HAVE_PTSNAME_R 1", + "#cmakedefine01 LLDB_ENABLE_TERMIOS": "#define LLDB_ENABLE_TERMIOS 1", + + # TODO: Add LZMA support by including the library in bazel + "#cmakedefine01 LLDB_ENABLE_LZMA": "#define LLDB_ENABLE_LZMA 0", + + # TODO: lua support + "#cmakedefine01 LLDB_ENABLE_LUA": "#define LLDB_ENABLE_LUA 0", + + # TODO: python support + "#cmakedefine01 LLDB_ENABLE_PYTHON": "#define LLDB_ENABLE_PYTHON 0", + # Only enabled by default on Windows + "#cmakedefine01 LLDB_EMBED_PYTHON_HOME": "#define LLDB_EMBED_PYTHON_HOME 0", + # Only used if LLDB_EMBED_PYTHON_HOME is true + "#cmakedefine LLDB_PYTHON_HOME R\"(${LLDB_PYTHON_HOME})\"": "#define LLDB_PYTHON_HOME \"\"", + + # Unsupported + "#cmakedefine01 CURSES_HAVE_NCURSES_CURSES_H": "#define CURSES_HAVE_NCURSES_CURSES_H 0", + "#cmakedefine01 LLDB_ENABLE_FBSDVMCORE": "#define LLDB_ENABLE_FBSDVMCORE 0", + + # Defaults that could be configurable if needed + "#cmakedefine01 LLDB_ENABLE_POSIX": "#define LLDB_ENABLE_POSIX 1", + "#cmakedefine LLDB_GLOBAL_INIT_DIRECTORY R\"(${LLDB_GLOBAL_INIT_DIRECTORY})\"": "#define LLDB_GLOBAL_INIT_DIRECTORY \"\"", + "${LLDB_INSTALL_LIBDIR_BASENAME}": "lib", + "${LLDB_BUG_REPORT_URL}": "", + } | select({ + "@platforms//os:macos": { + "#cmakedefine HAVE_LIBCOMPRESSION": "#define HAVE_LIBCOMPRESSION", + "#cmakedefine01 HAVE_NR_PROCESS_VM_READV": "#define HAVE_NR_PROCESS_VM_READV 0", + "#cmakedefine01 HAVE_PPOLL": "#define HAVE_PPOLL 0", + "#cmakedefine01 HAVE_PROCESS_VM_READV": "#define HAVE_PROCESS_VM_READV 0", + "#cmakedefine01 HAVE_SYS_EVENT_H": "#define HAVE_SYS_EVENT_H 1", + "#cmakedefine01 LLDB_ENABLE_LIBXML2": "#define LLDB_ENABLE_LIBXML2 1", + "#cmakedefine01 LLDB_HAVE_EL_RFUNC_T": "#define LLDB_HAVE_EL_RFUNC_T 0", + }, + "@platforms//os:linux": { + "#cmakedefine HAVE_LIBCOMPRESSION": "/* #undef HAVE_LIBCOMPRESSION */", + "#cmakedefine01 HAVE_NR_PROCESS_VM_READV": "#define HAVE_NR_PROCESS_VM_READV 1", + "#cmakedefine01 HAVE_PPOLL": "#define HAVE_PPOLL 1", + "#cmakedefine01 HAVE_PROCESS_VM_READV": "#define HAVE_PROCESS_VM_READV 1", + "#cmakedefine01 HAVE_SYS_EVENT_H": "#define HAVE_SYS_EVENT_H 0", + "#cmakedefine01 LLDB_ENABLE_LIBXML2": "#define LLDB_ENABLE_LIBXML2 0", + "#cmakedefine01 LLDB_HAVE_EL_RFUNC_T": "#define LLDB_HAVE_EL_RFUNC_T 1", + }, + }) | select({ + ":curses_enabled": { + "#cmakedefine01 LLDB_ENABLE_CURSES": "#define LLDB_ENABLE_CURSES 1", + }, + "//conditions:default": { + "#cmakedefine01 LLDB_ENABLE_CURSES": "#define LLDB_ENABLE_CURSES 0", + }, + }) | select({ + ":libedit_enabled": { + "#cmakedefine01 LLDB_EDITLINE_USE_WCHAR": "#define LLDB_EDITLINE_USE_WCHAR 1", + "#cmakedefine01 LLDB_ENABLE_LIBEDIT": "#define LLDB_ENABLE_LIBEDIT 1", + }, + "//conditions:default": { + "#cmakedefine01 LLDB_EDITLINE_USE_WCHAR": "#define LLDB_EDITLINE_USE_WCHAR 0", + "#cmakedefine01 LLDB_ENABLE_LIBEDIT": "#define LLDB_ENABLE_LIBEDIT 0", + }, + }), + template = "include/lldb/Host/Config.h.cmake", +) + +cc_library( + name = "Config", + hdrs = [":ConfigHeader"], + include_prefix = "lldb/Host", +) + +cc_binary( + name = "lldb-tblgen", + srcs = glob([ + "utils/TableGen/*.cpp", + "utils/TableGen/*.h", + ]), + deps = [ + "//llvm:CodeGenTypes", + "//llvm:Support", + "//llvm:TableGen", + "//llvm:TargetParser", + "//llvm:config", + ], +) + +cc_library( + name = "API", + srcs = glob([ + "source/API/**/*.cpp", + "source/API/**/*.h", + ]), + hdrs = glob(["include/lldb/API/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Breakpoint", + ":Commands", + ":Core", + ":DataFormatters", + ":Expression", + ":Headers", + ":Host", + ":Initialization", + ":InterpreterHeaders", + ":Symbol", + ":SymbolHeaders", + ":Target", + ":TargetHeaders", + ":Utility", + ":Version", + "//lldb/source/Plugins:PluginExpressionParserClangHeaders", + "//lldb/source/Plugins:PluginsConfig", + "//llvm:ExecutionEngine", + "//llvm:MCJIT", + "//llvm:Support", + "//llvm:config", + ], +) + +cc_library( + name = "Breakpoint", + srcs = glob(["source/Breakpoint/**/*.cpp"]), + hdrs = glob(["include/lldb/Breakpoint/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Core", + ":DataFormattersHeaders", + ":Expression", + ":Headers", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "DataFormatters", + srcs = glob(["source/DataFormatters/**/*.cpp"]), + hdrs = glob(["include/lldb/DataFormatters/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":CoreHeaders", + ":Headers", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "Expression", + srcs = glob(["source/Expression/**/*.cpp"]), + hdrs = glob(["include/lldb/Expression/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Core", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//lldb/source/Plugins:PluginSymbolFileDWARFHeaders", + "//llvm:Core", + "//llvm:DebugInfoDWARF", + "//llvm:ExecutionEngine", + "//llvm:Support", + ], +) + +cc_library( + name = "Initialization", + srcs = glob(["source/Initialization/**/*.cpp"]), + hdrs = glob(["include/lldb/Initialization/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Core", + ":Headers", + ":Host", + ":TargetHeaders", + ":Utility", + ":Version", + "//lldb/source/Plugins:PluginProcessGDBRemote", + "//lldb/source/Plugins:PluginProcessPOSIX", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "InterpreterProperties", + strip_include_prefix = "source/Interpreter", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "source/Interpreter/InterpreterProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "source/Interpreter/InterpreterPropertiesEnum.inc", + ), + ], + tblgen = ":lldb-tblgen", + td_file = "source/Interpreter/InterpreterProperties.td", + deps = [":CoreTdFiles"], +) + +cc_library( + name = "APIHeaders", + hdrs = glob(["include/lldb/API/**/*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "InterpreterHeaders", + hdrs = glob(["include/lldb/Interpreter/**/*.h"]), + strip_include_prefix = "include", + deps = [":APIHeaders"], +) + +cc_library( + name = "BreakpointHeaders", + hdrs = glob(["include/lldb/Breakpoint/**/*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "ExpressionHeaders", + hdrs = glob(["include/lldb/Expression/**/*.h"]), + strip_include_prefix = "include", + deps = ["//llvm:ExecutionEngine"], +) + +cc_library( + name = "DataFormattersHeaders", + hdrs = glob(["include/lldb/DataFormatters/**/*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "Interpreter", + srcs = glob(["source/Interpreter/**/*.cpp"]), + deps = [ + ":API", + ":Commands", + ":Core", + ":DataFormatters", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":InterpreterProperties", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//llvm:Support", + ], +) + +td_library( + name = "CommandsTdFiles", + srcs = glob(["source/Commands/**/*.td"]), +) + +gentbl_cc_library( + name = "CommandOptions", + strip_include_prefix = "source/Commands", + tbl_outs = [ + ( + ["-gen-lldb-option-defs"], + "source/Commands/CommandOptions.inc", + ), + ], + tblgen = ":lldb-tblgen", + td_file = "source/Commands/Options.td", + deps = [":CommandsTdFiles"], +) + +cc_library( + name = "Commands", + srcs = glob(["source/Commands/**/*.cpp"]), + hdrs = glob(["source/Commands/**/*.h"]), + strip_include_prefix = "source", + deps = [ + ":Breakpoint", + ":CommandOptions", + ":Core", + ":DataFormatters", + ":Expression", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":SymbolHeaders", + ":Target", + ":TargetHeaders", + ":Utility", + ":Version", + "//clang:codegen", + "//clang:frontend", + "//llvm:Support", + ], +) + +cc_library( + name = "SymbolHeaders", + hdrs = glob(["include/lldb/Symbol/**/*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "Symbol", + srcs = glob(["source/Symbol/**/*.cpp"]), + deps = [ + ":Core", + ":Expression", + ":Headers", + ":Host", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + ":UtilityPrivateHeaders", + "//llvm:DebugInfo", + "//llvm:DebugInfoDWARF", + "//llvm:Support", + ], +) + +cc_library( + name = "HostMacOSXHeaders", + hdrs = glob([ + "include/lldb/Host/*.h", + "include/lldb/Host/macosx/*.h", + "include/lldb/Host/posix/*.h", + ]), + strip_include_prefix = "include", + deps = [":Utility"], +) + +cc_library( + name = "HostMacOSXPrivateHeaders", + hdrs = glob([ + "source/Host/macosx/cfcpp/*.h", + "source/Host/macosx/objcxx/*.h", + ]), + strip_include_prefix = "source", + tags = ["nobuildkite"], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [":Utility"], +) + +objc_library( + name = "HostMacOSXObjCXX", + srcs = glob([ + "source/Host/macosx/objcxx/*.mm", + ]), + copts = OBJCPP_COPTS, + tags = ["nobuildkite"], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":HostMacOSXHeaders", + ":HostMacOSXPrivateHeaders", + ], +) + +cc_library( + name = "Host", + srcs = glob([ + "source/Host/common/**/*.cpp", + ]) + select({ + "@platforms//os:linux": glob( + [ + "source/Host/posix/**/*.cpp", + "source/Host/linux/**/*.cpp", + ], + exclude = ["source/Host/linux/android/**/*.cpp"], + ), + "@platforms//os:macos": glob( + [ + "source/Host/macosx/cfcpp/*.cpp", + "source/Host/posix/**/*.cpp", + ], + ), + }), + hdrs = [":ConfigHeader"] + glob([ + "include/lldb/Host/*.h", + "include/lldb/Host/common/*.h", + ]) + select({ + "@platforms//os:macos": glob([ + "include/lldb/Host/macosx/*.h", + "include/lldb/Host/posix/*.h", + ]), + "@platforms//os:linux": glob([ + "include/lldb/Host/linux/*.h", + "include/lldb/Host/posix/*.h", + ]), + }), + # TODO: Move this to Config library when https://github.com/bazelbuild/bazel/issues/21884 is fixed + linkopts = select({ + "@platforms//os:macos": [ + "-lcompression", + "-lxml2", + "-Wl,-framework,CoreServices", + "-Wl,-framework,Security", + ], + "//conditions:default": [], + }) + select({ + ":curses_enabled": [ + "-lcurses", + "-lpanel", + ], + "//conditions:default": [], + }) + select({ + ":libedit_enabled": [ + "-ledit", + ], + "//conditions:default": [], + }), + strip_include_prefix = "include", + deps = [ + ":Headers", + ":Utility", + "//llvm:Object", + "//llvm:Support", + "//llvm:TargetParser", + "//llvm:config", + ] + select({ + "@platforms//os:macos": [":HostMacOSXObjCXX"], + "//conditions:default": [], + }), +) + +td_library( + name = "CoreTdFiles", + srcs = glob([ + "source/Core/**/*.td", + "include/lldb/Core/*.td", + ]), +) + +gentbl_cc_library( + name = "CoreProperties", + strip_include_prefix = "source/Core", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "source/Core/CoreProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "source/Core/CorePropertiesEnum.inc", + ), + ], + tblgen = ":lldb-tblgen", + td_file = "source/Core/CoreProperties.td", + deps = [":CoreTdFiles"], +) + +cc_library( + name = "CoreHeaders", + hdrs = glob(["include/lldb/Core/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":BreakpointHeaders", + ":CoreProperties", + ":DataFormattersHeaders", + ":ExpressionHeaders", + ":Host", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//clang:driver", + "//llvm:Demangle", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "Core", + srcs = glob(["source/Core/**/*.cpp"]), + hdrs = glob(["include/lldb/Core/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":BreakpointHeaders", + ":CoreHeaders", + ":CoreProperties", + ":DataFormattersHeaders", + ":ExpressionHeaders", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//clang:driver", + "//lldb/source/Plugins:PluginCPlusPlusLanguageHeaders", + "//lldb/source/Plugins:PluginObjCLanguageHeaders", + "//llvm:Demangle", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +gentbl_cc_library( + name = "TargetProperties", + strip_include_prefix = "source/Target", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "source/Target/TargetProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "source/Target/TargetPropertiesEnum.inc", + ), + ], + tblgen = ":lldb-tblgen", + td_file = "source/Target/TargetProperties.td", + deps = [":CoreTdFiles"], +) + +cc_library( + name = "AppleArm64ExceptionClass", + hdrs = ["include/lldb/Target/AppleArm64ExceptionClass.def"], + strip_include_prefix = "include/lldb/Target", +) + +cc_library( + name = "TargetHeaders", + hdrs = glob(["include/lldb/Target/**/*.h"]), + strip_include_prefix = "include", + deps = [":AppleArm64ExceptionClass"], +) + +cc_library( + name = "Target", + srcs = glob(["source/Target/**/*.cpp"]), + deps = [ + ":BreakpointHeaders", + ":Core", + ":DataFormattersHeaders", + ":ExpressionHeaders", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":Symbol", + ":SymbolHeaders", + ":TargetHeaders", + ":TargetProperties", + ":Utility", + "//lldb/source/Plugins:PluginProcessUtility", + "//llvm:MC", + "//llvm:Support", + ], +) + +cc_library( + name = "Headers", + hdrs = glob(["include/lldb/lldb-*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "UtilityPrivateHeaders", + hdrs = glob(["source/Utility/**/*.h"]), + strip_include_prefix = "source", + deps = [":Headers"], +) + +cc_library( + name = "Utility", + srcs = glob(["source/Utility/**/*.cpp"]), + hdrs = glob(["include/lldb/Utility/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Headers", + ":UtilityPrivateHeaders", + "//llvm:BinaryFormat", + "//llvm:Support", + "//llvm:TargetParser", + "//llvm:config", + ], +) + +cc_library( + name = "liblldb.static", + deps = [ + ":API", + ":Host", + ":Interpreter", + "//llvm:AllTargetsDisassemblers", + ] + [ + "//lldb/source/Plugins:Plugin{}".format(x) + for x in DEFAULT_PLUGINS + DEFAULT_SCRIPT_PLUGINS + ] + select({ + "@platforms//os:macos": [ + "//lldb/source/Plugins:PluginProcessMacOSXKernel", + "//lldb/source/Plugins:PluginSymbolLocatorDebugSymbols", + "//lldb/source/Plugins:PluginSymbolVendorMacOSX", + ], + "//conditions:default": [], + }), +) + +cc_shared_library( + name = "liblldb", + # TODO: Remove once fixed https://github.com/bazelbuild/bazel/issues/21893 + additional_linker_inputs = select({ + "@platforms//os:macos": [ + ":HostMacOSXObjCXX", + "//lldb/source/Plugins:PluginPlatformMacOSXObjCXX", + ], + "//conditions:default": [], + }), + shared_lib_name = select({ + "@platforms//os:macos": "liblldb{}.dylib".format(PACKAGE_VERSION), + "@platforms//os:linux": "liblldb{}.so".format(PACKAGE_VERSION), + }), + # TODO: Remove once fixed https://github.com/bazelbuild/bazel/issues/21893 + user_link_flags = select({ + "@platforms//os:macos": [ + "$(location :HostMacOSXObjCXX)", + "$(location //lldb/source/Plugins:PluginPlatformMacOSXObjCXX)", + ], + "//conditions:default": [], + }), + deps = [":liblldb.static"], +) + +gentbl_cc_library( + name = "lldb_options_inc_gen", + strip_include_prefix = ".", + tbl_outs = [( + ["-gen-opt-parser-defs"], + "Options.inc", + )], + tblgen = "//llvm:llvm-tblgen", + td_file = "tools/driver/Options.td", + deps = ["//llvm:OptParserTdFiles"], +) + +cc_binary( + name = "lldb", + srcs = glob([ + "tools/driver/*.cpp", + "tools/driver/*.h", + ]), + data = [ + ":lldb-argdumper", + ] + select({ + "@platforms//os:macos": [":debugserver"], + "//conditions:default": [], + }), + deps = [ + ":APIHeaders", + ":Host", + ":liblldb.static", + ":lldb_options_inc_gen", + "//llvm:Option", + "//llvm:Support", + ], +) + +cc_library( + name = "DebugServerCommonMacOSXHeaders", + hdrs = glob(["tools/debugserver/source/MacOSX/**/*.h"]), + strip_include_prefix = "tools/debugserver/source/MacOSX", +) + +cc_library( + name = "DebugServerCommonHeaders", + hdrs = glob(["tools/debugserver/source/**/*.h"]), + strip_include_prefix = "tools/debugserver/source", + deps = [":DebugServerCommonMacOSXHeaders"], +) + +objc_library( + name = "DebugServerMacOSX", + srcs = glob(["tools/debugserver/source/MacOSX/*.mm"]), + copts = OBJCPP_COPTS, + tags = ["nobuildkite"], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":DebugServerCommonHeaders", + ":DebugServerCommonMacOSXHeaders", + ], +) + +cc_library( + name = "DebugServerCommon", + srcs = glob( + ["tools/debugserver/source/**/*.cpp"], + exclude = ["tools/debugserver/source/debugserver.cpp"], + ), + tags = ["nobuildkite"], + local_defines = ["LLDB_USE_OS_LOG"], + deps = [ + ":DebugServerCommonHeaders", + ":DebugServerCommonMacOSXHeaders", + ":DebugServerMacOSX", + ":Host", + ], +) + +genrule( + name = "mach_gen", + srcs = ["tools/debugserver/source/MacOSX/dbgnub-mig.defs"], + outs = [ + "mach_exc.h", + "mach_excServer.c", + "mach_excUser.c", + ], + cmd = "mig -header $(location :mach_exc.h) -server $(location :mach_excServer.c) -user $(location :mach_excUser.c) $(SRCS)", + tags = ["nobuildkite"], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), +) + +expand_template( + name = "debugserver_version_gen", + out = "debugserver_vers.c", + substitutions = _VERSION_SUBSTITUTIONS, + template = "tools/debugserver/source/debugserver_vers.c.in", +) + +cc_binary( + name = "debugserver", + srcs = [ + "tools/debugserver/source/debugserver.cpp", + ":debugserver_version_gen", + ":mach_gen", + ], + tags = ["nobuildkite"], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [":DebugServerCommon"], +) + +cc_binary( + name = "lldb-argdumper", + srcs = glob(["tools/argdumper/*.cpp"]), + deps = ["//llvm:Support"], +) diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel new file mode 100644 index 0000000000000..bbc523f54a190 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel @@ -0,0 +1,2323 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("//mlir:tblgen.bzl", "gentbl_cc_library") +load(":plugin_config.bzl", "DEFAULT_PLUGINS", "DEFAULT_SCRIPT_PLUGINS", "OBJCPP_COPTS") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], +) + +licenses(["notice"]) + +cc_library( + name = "PluginClangCommon", + srcs = glob(["Language/ClangCommon/*.cpp"]), + hdrs = glob(["Language/ClangCommon/*.h"]), + include_prefix = "Plugins", + deps = [ + "//clang:basic", + "//clang:lex", + "//lldb:CoreHeaders", + "//lldb:Host", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjCLanguageHeaders", + hdrs = glob(["Language/ObjC/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginClangCommon", + ":PluginExpressionParserClangHeaders", + "//lldb:CoreHeaders", + ], +) + +cc_library( + name = "PluginObjCLanguage", + srcs = glob(["Language/ObjC/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":PluginAppleObjCRuntime", + ":PluginExpressionParserClangHeaders", + ":PluginObjCLanguageHeaders", + ":PluginObjCRuntime", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:basic", + "//lldb:CoreHeaders", + "//lldb:DataFormattersHeaders", + "//lldb:ExpressionHeaders", + "//lldb:Host", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginTypeSystemClangHeaders", + hdrs = glob(["TypeSystem/Clang/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginExpressionParserClangHeaders", + "//clang:frontend", + "//lldb:CoreHeaders", + ], +) + +cc_library( + name = "PluginCPPRuntime", + srcs = glob(["LanguageRuntime/CPlusPlus/*.cpp"]), + hdrs = glob(["LanguageRuntime/CPlusPlus/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:CoreHeaders", + "//lldb:Headers", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjCRuntime", + srcs = glob(["LanguageRuntime/ObjC/*.cpp"]), + hdrs = glob(["LanguageRuntime/ObjC/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//lldb:BreakpointHeaders", + "//lldb:CoreHeaders", + "//lldb:Headers", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginAppleObjCRuntime", + srcs = glob(["LanguageRuntime/ObjC/AppleObjCRuntime/*.cpp"]), + hdrs = glob(["LanguageRuntime/ObjC/AppleObjCRuntime/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginExpressionParserClangHeaders", + ":PluginObjCLanguageHeaders", + ":PluginObjCRuntime", + ":PluginProcessUtility", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:basic", + "//lldb:BreakpointHeaders", + "//lldb:CoreHeaders", + "//lldb:DataFormattersHeaders", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginTypeSystemClang", + srcs = glob(["TypeSystem/Clang/*.cpp"]), + deps = [ + ":PluginExpressionParserClangHeaders", + ":PluginObjCRuntime", + ":PluginSymbolFileDWARF", + ":PluginSymbolFileDWARFHeaders", + ":PluginSymbolFileNativePDBHeaders", + ":PluginSymbolFilePDBHeaders", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:basic", + "//clang:frontend", + "//clang:lex", + "//clang:sema", + "//lldb:CoreHeaders", + "//lldb:Host", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginExpressionParserClangHeaders", + hdrs = glob(["ExpressionParser/Clang/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:CoreHeaders", + "//lldb:DataFormattersHeaders", + ], +) + +cc_library( + name = "PluginExpressionParserClang", + srcs = glob(["ExpressionParser/Clang/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginCPlusPlusLanguageHeaders", + ":PluginExpressionParserClangHeaders", + ":PluginObjCRuntime", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:basic", + "//clang:codegen", + "//clang:config", + "//clang:driver", + "//clang:edit", + "//clang:frontend", + "//clang:frontend_rewrite", + "//clang:lex", + "//clang:parse", + "//clang:rewrite", + "//clang:sema", + "//clang:serialization", + "//lldb:Core", + "//lldb:DataFormatters", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:ExecutionEngine", + "//llvm:IPO", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +gentbl_cc_library( + name = "PlatformMacOSXProperties", + strip_include_prefix = "Platform/MacOSX", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Platform/MacOSX/PlatformMacOSXProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Platform/MacOSX/PlatformMacOSXPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Platform/MacOSX/PlatformMacOSXProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginPlatformMacOSXObjCXXHeaders", + hdrs = glob(["Platform/MacOSX/objcxx/*.h"]), + include_prefix = "Plugins", + tags = ["nobuildkite"], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = ["//lldb:Host"], +) + +objc_library( + name = "PluginPlatformMacOSXObjCXX", + srcs = glob(["Platform/MacOSX/objcxx/*.mm"]), + copts = OBJCPP_COPTS, + tags = ["nobuildkite"], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [":PluginPlatformMacOSXObjCXXHeaders"], +) + +cc_library( + name = "PluginPlatformMacOSX", + srcs = glob( + ["Platform/MacOSX/*.cpp"], + exclude = ["Platform/MacOSX/PlatformAppleSimulator.cpp"], + ) + + select({ + "@platforms//os:macos": ["Platform/MacOSX/PlatformAppleSimulator.cpp"], + "//conditions:default": [], + }), + hdrs = glob(["Platform/MacOSX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PlatformMacOSXProperties", + ":PluginDynamicLoaderDarwinKernelHeaders", + ":PluginObjectContainerMachOFileset", + ":PluginPlatformPOSIX", + "//clang:driver_options_inc_gen", + "//lldb:BreakpointHeaders", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ] + select({ + "@platforms//os:macos": [":PluginPlatformMacOSXObjCXX"], + "//conditions:default": [], + }), +) + +gentbl_cc_library( + name = "SymbolFileDWARFProperties", + strip_include_prefix = "SymbolFile/DWARF", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "SymbolFile/DWARF/SymbolFileDWARFProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "SymbolFile/DWARF/SymbolFileDWARFPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "SymbolFile/DWARF/SymbolFileDWARFProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginSymbolFileDWARFHeaders", + hdrs = glob(["SymbolFile/DWARF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginTypeSystemClangHeaders", + "//lldb:Core", + ], +) + +cc_library( + name = "PluginSymbolFileDWARF", + srcs = glob(["SymbolFile/DWARF/*.cpp"]), + deps = [ + ":PluginCPlusPlusLanguageHeaders", + ":PluginExpressionParserClangHeaders", + ":PluginObjCLanguageHeaders", + ":PluginSymbolFileDWARFHeaders", + ":PluginTypeSystemClangHeaders", + ":SymbolFileDWARFProperties", + "//clang:ast", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:DebugInfoDWARF", + "//llvm:Demangle", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginProcessUtility", + srcs = glob(["Process/Utility/*.cpp"]), + hdrs = glob(["Process/Utility/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:BreakpointHeaders", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginObjectFilePDB", + srcs = glob(["ObjectFile/PDB/*.cpp"]), + hdrs = glob(["ObjectFile/PDB/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:DebugInfoPDB", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSymbolFileNativePDBHeaders", + hdrs = glob(["SymbolFile/NativePDB/*.h"]), + include_prefix = "Plugins", + deps = ["//lldb:Core"], +) + +cc_library( + name = "PluginSymbolFileNativePDB", + srcs = glob(["SymbolFile/NativePDB/*.cpp"]), + deps = [ + ":PluginCPlusPlusLanguageHeaders", + ":PluginExpressionParserClangHeaders", + ":PluginObjectFilePDB", + ":PluginProcessUtility", + ":PluginSymbolFileNativePDBHeaders", + ":PluginSymbolFilePDBHeaders", + ":PluginTypeSystemClangHeaders", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:DebugInfoCodeView", + "//llvm:DebugInfoMSF", + "//llvm:DebugInfoPDB", + "//llvm:Demangle", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSymbolFilePDBHeaders", + hdrs = glob(["SymbolFile/PDB/*.h"]), + include_prefix = "Plugins", + deps = ["//lldb:Core"], +) + +cc_library( + name = "PluginSymbolFilePDB", + srcs = glob(["SymbolFile/PDB/*.cpp"]), + deps = [ + ":PluginCPlusPlusLanguageHeaders", + ":PluginExpressionParserClangHeaders", + ":PluginSymbolFileNativePDB", + ":PluginSymbolFileNativePDBHeaders", + ":PluginSymbolFilePDBHeaders", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:lex", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:DebugInfoCodeView", + "//llvm:DebugInfoPDB", + ], +) + +gentbl_cc_library( + name = "ProcessGDBRemoteProperties", + strip_include_prefix = "Process/gdb-remote", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Process/gdb-remote/ProcessGDBRemoteProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Process/gdb-remote/ProcessGDBRemotePropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Process/gdb-remote/ProcessGDBRemoteProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginProcessGDBRemote", + srcs = glob(["Process/gdb-remote/*.cpp"]), + hdrs = glob(["Process/gdb-remote/*.h"]) + [ + "Process/gdb-remote/GDBRemoteErrno.def", + ], + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + ":ProcessGDBRemoteProperties", + "//lldb:BreakpointHeaders", + "//lldb:CoreHeaders", + "//lldb:DataFormattersHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + "@llvm_zlib//:zlib", + ], +) + +cc_library( + name = "PluginObjectContainerMachOArchive", + srcs = glob(["ObjectContainer/Universal-Mach-O/*.cpp"]), + hdrs = glob(["ObjectContainer/Universal-Mach-O/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginObjectContainerBSDArchive", + srcs = glob(["ObjectContainer/BSD-Archive/*.cpp"]), + hdrs = glob(["ObjectContainer/BSD-Archive/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectContainerMachOFileset", + srcs = glob(["ObjectContainer/Mach-O-Fileset/*.cpp"]), + hdrs = glob(["ObjectContainer/Mach-O-Fileset/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +gentbl_cc_library( + name = "StructuredDataDarwinLogProperties", + strip_include_prefix = "StructuredData/DarwinLog", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "StructuredData/DarwinLog/StructuredDataDarwinLogProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "StructuredData/DarwinLog/StructuredDataDarwinLogPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "StructuredData/DarwinLog/StructuredDataDarwinLogProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginStructuredDataDarwinLog", + srcs = glob(["StructuredData/DarwinLog/*.cpp"]), + hdrs = glob(["StructuredData/DarwinLog/*.h"]), + include_prefix = "Plugins", + deps = [ + ":StructuredDataDarwinLogProperties", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginTraceCommon", + srcs = glob(["Trace/common/*.cpp"]), + hdrs = glob(["Trace/common/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +cc_library( + name = "PluginPlatformPOSIX", + srcs = glob(["Platform/POSIX/*.cpp"]), + hdrs = glob(["Platform/POSIX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginPlatformGDB", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "PlatformQemuUserProperties", + strip_include_prefix = "Platform/QemuUser", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Platform/QemuUser/PlatformQemuUserProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Platform/QemuUser/PlatformQemuUserPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Platform/QemuUser/PlatformQemuUserProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginPlatformQemuUser", + srcs = glob(["Platform/QemuUser/*.cpp"]), + hdrs = glob(["Platform/QemuUser/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PlatformQemuUserProperties", + ":PluginProcessGDBRemote", + "//lldb:CoreHeaders", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginPlatformGDB", + srcs = glob(["Platform/gdb-server/*.cpp"]), + hdrs = glob(["Platform/gdb-server/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessGDBRemote", + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginPlatformLinux", + srcs = glob(["Platform/Linux/*.cpp"]), + hdrs = glob(["Platform/Linux/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginPlatformPOSIX", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + ], +) + +gentbl_cc_library( + name = "PlatformAndroidProperties", + strip_include_prefix = "Platform/Android", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Platform/Android/PlatformAndroidProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Platform/Android/PlatformAndroidPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Platform/Android/PlatformAndroidProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginPlatformAndroid", + srcs = glob(["Platform/Android/*.cpp"]), + hdrs = glob(["Platform/Android/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PlatformAndroidProperties", + ":PluginPlatformGDB", + ":PluginPlatformLinux", + ":PluginPlatformPOSIX", + "//lldb:Core", + "//lldb:Host", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginPlatformWindows", + srcs = glob(["Platform/Windows/*.cpp"]), + hdrs = glob(["Platform/Windows/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginPlatformGDB", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:Host", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginMemoryHistoryASan", + srcs = glob(["MemoryHistory/asan/*.cpp"]), + hdrs = glob(["MemoryHistory/asan/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +cc_library( + name = "PluginClangREPL", + srcs = glob(["REPL/Clang/*.cpp"]), + hdrs = glob(["REPL/Clang/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginClangCommon", + ":PluginTypeSystemClang", + "//lldb:Core", + "//lldb:DataFormatters", + "//lldb:ExpressionHeaders", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolVendorWasm", + srcs = glob(["SymbolVendor/wasm/*.cpp"]), + hdrs = glob(["SymbolVendor/wasm/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileWasm", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolVendorMacOSX", + srcs = glob(["SymbolVendor/MacOSX/*.cpp"]), + hdrs = glob(["SymbolVendor/MacOSX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileMachO", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolVendorPECOFF", + srcs = glob(["SymbolVendor/PECOFF/*.cpp"]), + hdrs = glob(["SymbolVendor/PECOFF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFilePECOFF", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolVendorELF", + srcs = glob(["SymbolVendor/ELF/*.cpp"]), + hdrs = glob(["SymbolVendor/ELF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileELF", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginObjCPlusPlusLanguage", + srcs = glob(["Language/ObjCPlusPlus/*.cpp"]), + hdrs = glob(["Language/ObjCPlusPlus/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginClangCommon", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginCPlusPlusLanguageHeaders", + hdrs = glob(["Language/CPlusPlus/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginClangCommon", + ":PluginExpressionParserClangHeaders", + "//lldb:CoreHeaders", + ], +) + +cc_library( + name = "PluginCPlusPlusLanguage", + srcs = glob(["Language/CPlusPlus/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginCPlusPlusLanguageHeaders", + ":PluginClangCommon", + ":PluginExpressionParserClangHeaders", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//clang:basic", + "//lldb:Core", + "//lldb:DataFormatters", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Demangle", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "TraceExporterCTFOptions", + strip_include_prefix = "TraceExporter/ctf", + tbl_outs = [( + ["-gen-lldb-option-defs"], + "TraceExporter/ctf/TraceExporterCTFCommandOptions.inc", + )], + tblgen = "//lldb:lldb-tblgen", + td_file = "TraceExporter/ctf/TraceExporterCTFOptions.td", + deps = [ + "//lldb:CommandsTdFiles", + "//lldb:CoreTdFiles", + ], +) + +cc_library( + name = "PluginTraceExporterCTF", + srcs = glob(["TraceExporter/ctf/*.cpp"]), + hdrs = glob(["TraceExporter/ctf/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginTraceExporterCommon", + ":TraceExporterCTFOptions", + "//lldb:Core", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +cc_library( + name = "PluginTraceExporterCommon", + srcs = glob(["TraceExporter/common/*.cpp"]), + hdrs = glob(["TraceExporter/common/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginABIPowerPC", + srcs = glob(["ABI/PowerPC/*.cpp"]), + hdrs = glob(["ABI/PowerPC/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIHexagon", + srcs = glob(["ABI/Hexagon/*.cpp"]), + hdrs = glob(["ABI/Hexagon/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIMips", + srcs = glob(["ABI/Mips/*.cpp"]), + hdrs = glob(["ABI/Mips/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIMSP430", + srcs = glob(["ABI/MSP430/*.cpp"]), + hdrs = glob(["ABI/MSP430/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIX86", + srcs = glob(["ABI/X86/*.cpp"]), + hdrs = glob(["ABI/X86/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIARM", + srcs = glob(["ABI/ARM/*.cpp"]), + hdrs = glob(["ABI/ARM/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIARC", + srcs = glob(["ABI/ARC/*.cpp"]), + hdrs = glob(["ABI/ARC/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIRISCV", + srcs = glob(["ABI/RISCV/*.cpp"]), + hdrs = glob(["ABI/RISCV/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABISystemZ", + srcs = glob(["ABI/SystemZ/*.cpp"]), + hdrs = glob(["ABI/SystemZ/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIAArch64", + srcs = glob(["ABI/AArch64/*.cpp"]), + hdrs = glob(["ABI/AArch64/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginDynamicLoaderPosixDYLDHeaders", + hdrs = glob(["DynamicLoader/POSIX-DYLD/*.h"]), + include_prefix = "Plugins", +) + +cc_library( + name = "PluginDynamicLoaderPosixDYLD", + srcs = glob(["DynamicLoader/POSIX-DYLD/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":PluginDynamicLoaderPosixDYLDHeaders", + ":PluginProcessElfCore", + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginDynamicLoaderWindowsDYLD", + srcs = glob(["DynamicLoader/Windows-DYLD/*.cpp"]), + hdrs = glob(["DynamicLoader/Windows-DYLD/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginDynamicLoaderHexagonDYLD", + srcs = glob(["DynamicLoader/Hexagon-DYLD/*.cpp"]), + hdrs = glob(["DynamicLoader/Hexagon-DYLD/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginDynamicLoaderWasmDYLD", + srcs = glob(["DynamicLoader/wasm-DYLD/*.cpp"]), + hdrs = glob(["DynamicLoader/wasm-DYLD/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileWasm", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginDynamicLoaderStatic", + srcs = glob(["DynamicLoader/Static/*.cpp"]), + hdrs = glob(["DynamicLoader/Static/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginDynamicLoaderMacOSXDYLD", + srcs = glob(["DynamicLoader/MacOSX-DYLD/*.cpp"]), + hdrs = glob(["DynamicLoader/MacOSX-DYLD/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjCRuntime", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:TargetParser", + ], +) + +gentbl_cc_library( + name = "DynamicLoaderDarwinKernelProperties", + strip_include_prefix = "DynamicLoader/Darwin-Kernel", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernelProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernelPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernelProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginDynamicLoaderDarwinKernelHeaders", + hdrs = glob(["DynamicLoader/Darwin-Kernel/*.h"]), + include_prefix = "Plugins", +) + +cc_library( + name = "PluginDynamicLoaderDarwinKernel", + srcs = glob(["DynamicLoader/Darwin-Kernel/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":DynamicLoaderDarwinKernelProperties", + ":PluginDynamicLoaderDarwinKernelHeaders", + ":PluginObjectFileMachO", + ":PluginPlatformMacOSX", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginArchitecturePPC64", + srcs = glob(["Architecture/PPC64/*.cpp"]), + hdrs = glob(["Architecture/PPC64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + ], +) + +cc_library( + name = "PluginArchitectureMips", + srcs = glob(["Architecture/Mips/*.cpp"]), + hdrs = glob(["Architecture/Mips/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginArchitectureArm", + srcs = glob(["Architecture/Arm/*.cpp"]), + hdrs = glob(["Architecture/Arm/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginArchitectureAArch64", + srcs = glob(["Architecture/AArch64/*.cpp"]), + hdrs = glob(["Architecture/AArch64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginDisassemblerLLVMC", + srcs = glob(["Disassembler/LLVMC/*.cpp"]), + hdrs = glob(["Disassembler/LLVMC/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:MC", + "//llvm:MCDisassembler", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginSymbolFileSymtab", + srcs = glob(["SymbolFile/Symtab/*.cpp"]), + hdrs = glob(["SymbolFile/Symtab/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolFileCTF", + srcs = glob(["SymbolFile/CTF/*.cpp"]), + hdrs = glob(["SymbolFile/CTF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginExpressionParserClangHeaders", + ":PluginTypeSystemClangHeaders", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + "@llvm_zlib//:zlib", + ], +) + +cc_library( + name = "PluginSymbolFileJSON", + srcs = glob(["SymbolFile/JSON/*.cpp"]), + hdrs = glob(["SymbolFile/JSON/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileJSON", + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSymbolFileBreakpad", + srcs = glob(["SymbolFile/Breakpad/*.cpp"]), + hdrs = glob(["SymbolFile/Breakpad/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileBreakpad", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionPPC64", + srcs = glob(["Instruction/PPC64/*.cpp"]), + hdrs = glob(["Instruction/PPC64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstructionLoongArch", + srcs = glob(["Instruction/LoongArch/*.cpp"]), + hdrs = glob(["Instruction/LoongArch/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionMIPS", + srcs = glob(["Instruction/MIPS/*.cpp"]), + hdrs = glob(["Instruction/MIPS/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:MC", + "//llvm:MCDisassembler", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionMIPS64", + srcs = glob(["Instruction/MIPS64/*.cpp"]), + hdrs = glob(["Instruction/MIPS64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:MC", + "//llvm:MCDisassembler", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionARM", + srcs = glob(["Instruction/ARM/*.cpp"]), + hdrs = glob(["Instruction/ARM/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionRISCV", + srcs = glob(["Instruction/RISCV/*.cpp"]), + hdrs = glob(["Instruction/RISCV/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionARM64", + srcs = glob(["Instruction/ARM64/*.cpp"]), + hdrs = glob(["Instruction/ARM64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeASanLibsanitizers", + srcs = glob(["InstrumentationRuntime/ASanLibsanitizers/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/ASanLibsanitizers/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginInstrumentationRuntimeUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeTSan", + srcs = glob(["InstrumentationRuntime/TSan/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/TSan/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeASan", + srcs = glob(["InstrumentationRuntime/ASan/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/ASan/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginInstrumentationRuntimeUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeMainThreadChecker", + srcs = glob(["InstrumentationRuntime/MainThreadChecker/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/MainThreadChecker/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Interpreter", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeUBSan", + srcs = glob(["InstrumentationRuntime/UBSan/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/UBSan/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeUtility", + srcs = glob(["InstrumentationRuntime/Utility/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/Utility/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +gentbl_cc_library( + name = "JITLoaderGDBProperties", + strip_include_prefix = "JITLoader/GDB", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "JITLoader/GDB/JITLoaderGDBProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "JITLoader/GDB/JITLoaderGDBPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "JITLoader/GDB/JITLoaderGDBProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginJITLoaderGDB", + srcs = glob(["JITLoader/GDB/*.cpp"]), + hdrs = glob(["JITLoader/GDB/*.h"]), + include_prefix = "Plugins", + deps = [ + ":JITLoaderGDBProperties", + ":PluginObjectFileMachO", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSymbolLocatorDefault", + srcs = glob(["SymbolLocator/Default/*.cpp"]), + hdrs = glob(["SymbolLocator/Default/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileWasm", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "SymbolLocatorDebuginfodProperties", + strip_include_prefix = "SymbolLocator/Debuginfod", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "SymbolLocator/Debuginfod/SymbolLocatorDebuginfodProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "SymbolLocator/Debuginfod/SymbolLocatorDebuginfodPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "SymbolLocator/Debuginfod/SymbolLocatorDebuginfodProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginSymbolLocatorDebuginfod", + srcs = glob(["SymbolLocator/Debuginfod/*.cpp"]), + hdrs = glob(["SymbolLocator/Debuginfod/*.h"]), + include_prefix = "Plugins", + deps = [ + ":SymbolLocatorDebuginfodProperties", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Debuginfod", + ], +) + +cc_library( + name = "PluginSymbolLocatorDebugSymbols", + srcs = glob(["SymbolLocator/DebugSymbols/*.cpp"]), + hdrs = glob(["SymbolLocator/DebugSymbols/*.h"]), + include_prefix = "Plugins", + tags = ["nobuildkite"], + deps = [ + ":PluginObjectFileWasm", + "//lldb:Core", + "//lldb:Host", + "//lldb:HostMacOSXPrivateHeaders", + "//lldb:Symbol", + ], +) + +# TODO: python support +# cc_library( +# name = "PluginOperatingSystemPython", +# srcs = glob(["OperatingSystem/Python/*.cpp"]), +# hdrs = glob(["OperatingSystem/Python/*.h"]), +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Core", +# "//lldb:Interpreter", +# ":PluginProcessUtility", +# "//lldb:Symbol", +# "//lldb:Target", +# ], +# ) +# cc_library( +# name = "PluginScriptInterpreterPythonInterfaces", +# srcs = glob(["ScriptInterpreter/Python/Interfaces/*.cpp"]), +# hdrs = glob(["ScriptInterpreter/Python/Interfaces/*.h"]), +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Core", +# "//lldb:Host", +# "//lldb:Interpreter", +# "//lldb:Target", +# "@rules_python//python/cc:current_py_cc_headers", +# "@rules_python//python/cc:current_py_cc_libs", +# ], +# ) +# cc_library( +# name = "PluginScriptInterpreterPythonHeaders", +# hdrs = glob(["ScriptInterpreter/Python/*.h"]), +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Host", +# ], +# ) +# cc_library( +# name = "PluginScriptInterpreterPython", +# srcs = glob(["ScriptInterpreter/Python/*.cpp"]), +# local_defines = [ +# 'LLDB_PYTHON_EXE_RELATIVE_PATH=\\"bin/python3\\"', +# # Must be kept in sync with WORKSPACE python version +# 'LLDB_PYTHON_RELATIVE_LIBDIR=\\"lib/python3.11/site-packages\\"', +# ], +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Breakpoint", +# "//lldb:Core", +# "//lldb:DataFormatters", +# "//lldb:Host", +# "//lldb:Interpreter", +# ":PluginScriptInterpreterPythonHeaders", +# ":PluginScriptInterpreterPythonInterfaces", +# "//lldb:Target", +# ], +# ) + +# TODO: lua support +# cc_library( +# name = "PluginScriptInterpreterLua", +# srcs = glob(["ScriptInterpreter/Lua/*.cpp"]), +# hdrs = glob(["ScriptInterpreter/Lua/*.h"]), +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Core", +# "//lldb:Interpreter", +# ], +# ) + +cc_library( + name = "PluginScriptInterpreterNone", + srcs = glob(["ScriptInterpreter/None/*.cpp"]), + hdrs = glob(["ScriptInterpreter/None/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSystemRuntimeMacOSX", + srcs = glob(["SystemRuntime/MacOSX/*.cpp"]), + hdrs = glob(["SystemRuntime/MacOSX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginObjectFileCOFF", + srcs = glob(["ObjectFile/COFF/*.cpp"]), + hdrs = glob(["ObjectFile/COFF/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:Utility", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectFileWasm", + srcs = glob(["ObjectFile/wasm/*.cpp"]), + hdrs = glob(["ObjectFile/wasm/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectFileJSON", + srcs = glob(["ObjectFile/JSON/*.cpp"]), + hdrs = glob(["ObjectFile/JSON/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectFilePlaceholder", + srcs = glob(["ObjectFile/Placeholder/*.cpp"]), + hdrs = glob(["ObjectFile/Placeholder/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginObjectFileMachO", + srcs = glob(["ObjectFile/Mach-O/*.cpp"]), + hdrs = glob(["ObjectFile/Mach-O/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectFileMinidump", + srcs = glob(["ObjectFile/Minidump/*.cpp"]), + hdrs = glob(["ObjectFile/Minidump/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessMinidump", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Object", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "ObjectFilePECOFFProperties", + strip_include_prefix = "ObjectFile/PECOFF", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "ObjectFile/PECOFF/ObjectFilePECOFFProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "ObjectFile/PECOFF/ObjectFilePECOFFPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "ObjectFile/PECOFF/ObjectFilePECOFFProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginObjectFilePECOFF", + srcs = glob(["ObjectFile/PECOFF/*.cpp"]), + hdrs = glob(["ObjectFile/PECOFF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":ObjectFilePECOFFProperties", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Object", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginObjectFileBreakpad", + srcs = glob(["ObjectFile/Breakpad/*.cpp"]), + hdrs = glob(["ObjectFile/Breakpad/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginObjectFileELF", + srcs = glob(["ObjectFile/ELF/*.cpp"]), + hdrs = glob(["ObjectFile/ELF/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginUnwindAssemblyX86", + srcs = glob(["UnwindAssembly/x86/*.cpp"]), + hdrs = glob(["UnwindAssembly/x86/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:MCDisassembler", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginUnwindAssemblyInstEmulation", + srcs = glob(["UnwindAssembly/InstEmulation/*.cpp"]), + hdrs = glob(["UnwindAssembly/InstEmulation/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginProcessPOSIX", + srcs = glob(["Process/POSIX/*.cpp"]), + hdrs = glob(["Process/POSIX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Headers", + "//lldb:Host", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginScriptedProcess", + srcs = glob(["Process/scripted/*.cpp"]), + hdrs = glob(["Process/scripted/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginProcessMachCore", + srcs = glob(["Process/mach-core/*.cpp"]), + hdrs = glob(["Process/mach-core/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginDynamicLoaderDarwinKernelHeaders", + ":PluginDynamicLoaderMacOSXDYLD", + ":PluginDynamicLoaderStatic", + ":PluginObjectFileMachO", + ":PluginPlatformMacOSX", + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginProcessElfCore", + srcs = glob(["Process/elf-core/*.cpp"]), + hdrs = glob(["Process/elf-core/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginDynamicLoaderPosixDYLDHeaders", + ":PluginObjectFileELF", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "ProcessKDPProperties", + strip_include_prefix = "Process/MacOSX-Kernel", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Process/MacOSX-Kernel/ProcessKDPProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Process/MacOSX-Kernel/ProcessKDPPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Process/MacOSX-Kernel/ProcessKDPProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginProcessMacOSXKernel", + srcs = glob(["Process/MacOSX-Kernel/*.cpp"]), + hdrs = glob(["Process/MacOSX-Kernel/*.h"]), + include_prefix = "Plugins", + tags = ["nobuildkite"], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":PluginDynamicLoaderDarwinKernel", + ":PluginDynamicLoaderDarwinKernelHeaders", + ":PluginDynamicLoaderStatic", + ":PluginProcessUtility", + ":ProcessKDPProperties", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginProcessMinidump", + srcs = glob(["Process/minidump/*.cpp"]), + hdrs = glob(["Process/minidump/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFilePlaceholder", + ":PluginProcessElfCore", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:BinaryFormat", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginCXXItaniumABI", + srcs = glob(["LanguageRuntime/CPlusPlus/ItaniumABI/*.cpp"]), + hdrs = glob(["LanguageRuntime/CPlusPlus/ItaniumABI/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:DataFormattersHeaders", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginGNUstepObjCRuntime", + srcs = glob(["LanguageRuntime/ObjC/GNUstepObjCRuntime/*.cpp"]), + hdrs = glob(["LanguageRuntime/ObjC/GNUstepObjCRuntime/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginExpressionParserClang", + ":PluginObjCRuntime", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginRegisterTypeBuilderClang", + srcs = glob(["RegisterTypeBuilder/*.cpp"]), + hdrs = glob(["RegisterTypeBuilder/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +_DEFAULT_LOAD_PLUGINS = "\n".join(["LLDB_PLUGIN({})".format(x) for x in DEFAULT_PLUGINS]) + \ + "\n" + "\n".join(["LLDB_SCRIPT_PLUGIN({})".format(x) for x in DEFAULT_SCRIPT_PLUGINS]) + +expand_template( + name = "plugins_config_gen", + out = "Plugins.def", + substitutions = { + "@LLDB_PROCESS_WINDOWS_PLUGIN@": "", + "@LLDB_PROCESS_GDB_PLUGIN@": "LLDB_PLUGIN(ProcessGDBRemote)", + } | select({ + "@platforms//os:macos": { + "@LLDB_ENUM_PLUGINS@": _DEFAULT_LOAD_PLUGINS + """ +LLDB_PLUGIN(ProcessMacOSXKernel) +LLDB_PLUGIN(SymbolLocatorDebugSymbols) +LLDB_PLUGIN(SymbolVendorMacOSX) +""", + }, + "//conditions:default": { + "@LLDB_ENUM_PLUGINS@": _DEFAULT_LOAD_PLUGINS, + }, + }), + template = "Plugins.def.in", +) + +cc_library( + name = "PluginsConfig", + hdrs = [":plugins_config_gen"], + include_prefix = "Plugins", +) diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/plugin_config.bzl b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/plugin_config.bzl new file mode 100644 index 0000000000000..5949d2d7a504c --- /dev/null +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/plugin_config.bzl @@ -0,0 +1,104 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""Common configuration for LLDB plugins.""" + +load("//:vars.bzl", "CMAKE_CXX_STANDARD") + +DEFAULT_PLUGINS = [ + "ABIAArch64", + "ABIARM", + "ABIHexagon", + "ABIMips", + "ABIMSP430", + "ABIPowerPC", + "ABIRISCV", + "ABISystemZ", + "ABIX86", + "AppleObjCRuntime", + "ArchitectureAArch64", + "ArchitectureArm", + "ArchitectureMips", + "ArchitecturePPC64", + "ClangREPL", + "CPlusPlusLanguage", + "CXXItaniumABI", + "DisassemblerLLVMC", + "DynamicLoaderDarwinKernel", + "DynamicLoaderHexagonDYLD", + "DynamicLoaderMacOSXDYLD", + "DynamicLoaderPosixDYLD", + "DynamicLoaderStatic", + "DynamicLoaderWasmDYLD", + "DynamicLoaderWindowsDYLD", + "GNUstepObjCRuntime", + "InstructionARM", + "InstructionARM64", + "InstructionLoongArch", + "InstructionMIPS", + "InstructionMIPS64", + "InstructionPPC64", + "InstructionRISCV", + "InstrumentationRuntimeASan", + "InstrumentationRuntimeASanLibsanitizers", + "InstrumentationRuntimeMainThreadChecker", + "InstrumentationRuntimeTSan", + "InstrumentationRuntimeUBSan", + "JITLoaderGDB", + "MemoryHistoryASan", + "ObjCLanguage", + "ObjCPlusPlusLanguage", + "ObjectContainerBSDArchive", + "ObjectContainerMachOArchive", + "ObjectContainerMachOFileset", + "ObjectFileBreakpad", + "ObjectFileCOFF", + "ObjectFileELF", + "ObjectFileJSON", + "ObjectFileMachO", + "ObjectFileMinidump", + "ObjectFilePDB", + "ObjectFilePECOFF", + "ObjectFilePlaceholder", + "ObjectFileWasm", + "PlatformAndroid", + "PlatformGDB", + "PlatformLinux", + "PlatformMacOSX", + "PlatformQemuUser", + "PlatformWindows", + "ProcessElfCore", + "ProcessMachCore", + "ProcessMinidump", + "RegisterTypeBuilderClang", + "ScriptedProcess", + "StructuredDataDarwinLog", + "SymbolFileBreakpad", + "SymbolFileCTF", + "SymbolFileDWARF", + "SymbolFileJSON", + "SymbolFilePDB", + "SymbolFileSymtab", + "SymbolLocatorDebuginfod", + "SymbolLocatorDefault", + "SymbolVendorELF", + "SymbolVendorPECOFF", + "SymbolVendorWasm", + "SystemRuntimeMacOSX", + "TraceExporterCTF", + "TypeSystemClang", + "UnwindAssemblyInstEmulation", + "UnwindAssemblyX86", +] + +DEFAULT_SCRIPT_PLUGINS = [ + "ScriptInterpreterNone", +] + +OBJCPP_COPTS = [ + "-std=c++{}".format(CMAKE_CXX_STANDARD), + "-fno-objc-exceptions", + "-fno-objc-arc", + "-Wno-shorten-64-to-32", +] diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 9cfcb7d3838ed..af9dc26abec44 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -3214,23 +3214,13 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "dsymutil_main", - out = "dsymutil-driver.cpp", - substitutions = { - "@TOOL_NAME@": "dsymutil", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "dsymutil", +cc_library( + name = "dsymutil-lib", srcs = glob([ "tools/dsymutil/*.cpp", "tools/dsymutil/*.h", - ]) + ["dsymutil-driver.cpp"], + ]), copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsCodeGens", ":BinaryFormat", @@ -3253,6 +3243,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "dsymutil", + stamp = 0, + deps = [":dsymutil-lib"], +) + cc_binary( name = "llc", srcs = glob([ @@ -3532,22 +3528,10 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "cxxfilt_main", - out = "llvm-cxxfilt-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_cxxfilt", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-cxxfilt", - srcs = glob([ - "tools/llvm-cxxfilt/*.cpp", - ]) + ["llvm-cxxfilt-driver.cpp"], +cc_library( + name = "llvm-cxxfilt-lib", + srcs = glob(["tools/llvm-cxxfilt/*.cpp"]), copts = llvm_copts, - stamp = 0, deps = [ ":CxxfiltOptsTableGen", ":Demangle", @@ -3557,6 +3541,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-cxxfilt", + stamp = 0, + deps = [":llvm-cxxfilt-lib"], +) + cc_binary( name = "llvm-debuginfo-analyzer", srcs = glob([ @@ -3674,22 +3664,10 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "dwp_main", - out = "llvm-dwp-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_dwp", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-dwp", - srcs = glob([ - "tools/llvm-dwp/*.cpp", - ]) + ["llvm-dwp-driver.cpp"], +cc_library( + name = "llvm-dwp-lib", + srcs = glob(["tools/llvm-dwp/*.cpp"]), copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsCodeGens", ":DWP", @@ -3700,6 +3678,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-dwp", + stamp = 0, + deps = [":llvm-dwp-lib"], +) + cc_binary( name = "llvm-exegesis", srcs = [ @@ -3754,22 +3738,10 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "gsymutil_main", - out = "llvm-gsymutil-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_gsymutil", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-gsymutil", - srcs = glob([ - "tools/llvm-gsymutil/*.cpp", - ]) + ["llvm-gsymutil-driver.cpp"], +cc_library( + name = "llvm-gsymutil-lib", + srcs = glob(["tools/llvm-gsymutil/*.cpp"]), copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsCodeGens", ":DebugInfo", @@ -3785,6 +3757,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-gsymutil", + stamp = 0, + deps = [":llvm-gsymutil-lib"], +) + gentbl( name = "IfsOptionsTableGen", strip_include_prefix = "tools/llvm-ifs", @@ -3797,23 +3775,13 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "ifs_main", - out = "llvm-ifs-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_ifs", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-ifs", +cc_library( + name = "llvm-ifs-lib", srcs = glob([ "tools/llvm-ifs/*.cpp", "tools/llvm-ifs/*.h", - ]) + ["llvm-ifs-driver.cpp"], + ]), copts = llvm_copts, - stamp = 0, deps = [ ":BinaryFormat", ":IfsOptionsTableGen", @@ -3826,6 +3794,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-ifs", + stamp = 0, + deps = [":llvm-ifs-lib"], +) + cc_binary( name = "llvm-jitlink", srcs = glob([ @@ -3884,23 +3858,13 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "libtool-darwin_main", - out = "llvm-libtool-darwin-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_libtool_darwin", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-libtool-darwin", +cc_library( + name = "llvm-libtool-darwin-lib", srcs = glob([ "tools/llvm-libtool-darwin/*.cpp", "tools/llvm-libtool-darwin/*.h", - ]) + ["llvm-libtool-darwin-driver.cpp"], + ]), copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsAsmParsers", ":AllTargetsCodeGens", @@ -3914,6 +3878,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-libtool-darwin", + stamp = 0, + deps = [":llvm-libtool-darwin-lib"], +) + cc_binary( name = "llvm-link", srcs = glob([ @@ -3949,22 +3919,10 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "lipo_main", - out = "llvm-lipo-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_lipo", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-lipo", - srcs = [ - "tools/llvm-lipo/llvm-lipo.cpp", - ] + ["llvm-lipo-driver.cpp"], +cc_library( + name = "llvm-lipo-lib", + srcs = ["tools/llvm-lipo/llvm-lipo.cpp"], copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsAsmParsers", ":BinaryFormat", @@ -3978,6 +3936,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-lipo", + stamp = 0, + deps = [":llvm-lipo-lib"], +) + cc_binary( name = "llvm-lto", srcs = glob([ @@ -4077,23 +4041,13 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "ml_main", - out = "llvm-ml-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_ml", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-ml", +cc_library( + name = "llvm-ml-lib", srcs = glob([ "tools/llvm-ml/*.cpp", "tools/llvm-ml/*.h", - ]) + ["llvm-ml-driver.cpp"], + ]), copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsAsmParsers", ":AllTargetsCodeGens", @@ -4108,6 +4062,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-ml", + stamp = 0, + deps = [":llvm-ml-lib"], +) + cc_binary( name = "llvm-modextract", srcs = glob([ @@ -4136,22 +4096,10 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "mt_main", - out = "llvm-mt-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_mt", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-mt", - srcs = glob([ - "tools/llvm-mt/*.cpp", - ]) + ["llvm-mt-driver.cpp"], +cc_library( + name = "llvm-mt-lib", + srcs = glob(["tools/llvm-mt/*.cpp"]), copts = llvm_copts, - stamp = 0, deps = [ ":MtTableGen", ":Option", @@ -4160,6 +4108,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-mt", + stamp = 0, + deps = [":llvm-mt-lib"], +) + gentbl( name = "NmOptsTableGen", strip_include_prefix = "tools/llvm-nm", @@ -4271,23 +4225,13 @@ cc_binary( ], ) -expand_template( - name = "objcopy_main", - out = "llvm-objcopy-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_objcopy", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-objcopy", +cc_library( + name = "llvm-objcopy-lib", srcs = glob([ "tools/llvm-objcopy/*.cpp", "tools/llvm-objcopy/*.h", - ]) + ["llvm-objcopy-driver.cpp"], + ]), copts = llvm_copts, - stamp = 0, deps = [ ":BinaryFormat", ":MC", @@ -4305,6 +4249,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-objcopy", + stamp = 0, + deps = [":llvm-objcopy-lib"], +) + binary_alias( name = "llvm-strip", binary = ":llvm-objcopy", @@ -4320,23 +4270,13 @@ binary_alias( binary = ":llvm-objcopy", ) -expand_template( - name = "objdump_main", - out = "llvm-objdump-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_objdump", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-objdump", +cc_library( + name = "llvm-objdump-lib", srcs = glob([ "tools/llvm-objdump/*.cpp", "tools/llvm-objdump/*.h", - ]) + ["llvm-objdump-driver.cpp"], + ]), copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsAsmParsers", ":AllTargetsCodeGens", @@ -4360,6 +4300,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-objdump", + stamp = 0, + deps = [":llvm-objdump-lib"], +) + gentbl( name = "ObjdumpOptsTableGen", strip_include_prefix = "tools/llvm-objdump", @@ -4425,22 +4371,10 @@ cc_binary( ], ) -expand_template( - name = "profdata_main", - out = "llvm-profdata-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_profdata", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-profdata", - srcs = glob([ - "tools/llvm-profdata/*.cpp", - ]) + ["llvm-profdata-driver.cpp"], +cc_library( + name = "llvm-profdata-lib", + srcs = glob(["tools/llvm-profdata/*.cpp"]), copts = llvm_copts, - stamp = 0, deps = [ ":Core", ":Object", @@ -4449,6 +4383,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-profdata", + stamp = 0, + deps = [":llvm-profdata-lib"], +) + cc_binary( name = "llvm-profgen", srcs = glob([ @@ -4504,23 +4444,13 @@ cc_library( textual_hdrs = glob(["tools/llvm-rc/*.def"]), ) -expand_template( - name = "rc_main", - out = "llvm-rc-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_rc", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-rc", +cc_library( + name = "llvm-rc-lib", srcs = glob([ "tools/llvm-rc/*.cpp", "tools/llvm-rc/*.h", - ]) + ["llvm-rc-driver.cpp"], + ]), copts = llvm_copts, - stamp = 0, deps = [ ":Object", ":Option", @@ -4533,6 +4463,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-rc", + stamp = 0, + deps = [":llvm-rc-lib"], +) + binary_alias( name = "llvm-windres", binary = ":llvm-rc", @@ -4550,23 +4486,13 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "readobj_main", - out = "llvm-readobj-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_readobj", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-readobj", +cc_library( + name = "llvm-readobj-lib", srcs = glob([ "tools/llvm-readobj/*.cpp", "tools/llvm-readobj/*.h", - ]) + ["llvm-readobj-driver.cpp"], + ]), copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsCodeGens", ":BinaryFormat", @@ -4582,6 +4508,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-readobj", + stamp = 0, + deps = [":llvm-readobj-lib"], +) + # Create an 'llvm-readelf' named binary from the 'llvm-readobj' tool. binary_alias( name = "llvm-readelf", @@ -4723,22 +4655,10 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "symbolizer_main", - out = "llvm-symbolizer-driver.cpp", - substitutions = { - "@TOOL_NAME@": "llvm_symbolizer", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "llvm-symbolizer", - srcs = glob([ - "tools/llvm-symbolizer/*.cpp", - ]) + ["llvm-symbolizer-driver.cpp"], +cc_library( + name = "llvm-symbolizer-lib", + srcs = glob(["tools/llvm-symbolizer/*.cpp"]), copts = llvm_copts, - stamp = 0, deps = [ ":DebugInfoDWARF", ":DebugInfoPDB", @@ -4752,6 +4672,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "llvm-symbolizer", + stamp = 0, + deps = [":llvm-symbolizer-lib"], +) + binary_alias( name = "llvm-addr2line", binary = ":llvm-symbolizer", @@ -4843,22 +4769,10 @@ gentbl( td_srcs = ["include/llvm/Option/OptParser.td"], ) -expand_template( - name = "sancov_main", - out = "sancov-driver.cpp", - substitutions = { - "@TOOL_NAME@": "sancov", - }, - template = "cmake/modules/llvm-driver-template.cpp.in", -) - -cc_binary( - name = "sancov", - srcs = glob([ - "tools/sancov/*.cpp", - ]) + ["sancov-driver.cpp"], +cc_library( + name = "sancov-lib", + srcs = glob(["tools/sancov/*.cpp"]), copts = llvm_copts, - stamp = 0, deps = [ ":AllTargetsCodeGens", ":AllTargetsDisassemblers", @@ -4874,6 +4788,12 @@ cc_binary( ], ) +llvm_driver_cc_binary( + name = "sancov", + stamp = 0, + deps = [":sancov-lib"], +) + cc_binary( name = "sanstats", srcs = glob([ diff --git a/utils/bazel/llvm-project-overlay/llvm/driver.bzl b/utils/bazel/llvm-project-overlay/llvm/driver.bzl index bd0d26d64f481..888626d7cf845 100644 --- a/utils/bazel/llvm-project-overlay/llvm/driver.bzl +++ b/utils/bazel/llvm-project-overlay/llvm/driver.bzl @@ -8,31 +8,29 @@ load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") load("@bazel_skylib//rules:expand_template.bzl", "expand_template") # Mapping from every tool to the cc_library that implements the tool's entrypoint. -# TODO: uncomment the remaining targets after splitting them -# into separate library/binary targets. _TOOLS = { - # "clang-scan-deps": "//clang:clang-scan-deps-lib", - # "clang": "//clang:clang-driver", - # "dsymutil": "//llvm:dsymutil-lib", - # "lld": "//lld:lld-lib", + "clang-scan-deps": "//clang:clang-scan-deps-lib", + "clang": "//clang:clang-driver", + "dsymutil": "//llvm:dsymutil-lib", + "lld": "//lld:lld-lib", "llvm-ar": "//llvm:llvm-ar-lib", - # "llvm-cxxfilt": "//llvm:llvm-cxxfilt-lib", - # "llvm-dwp": "//llvm:llvm-dwp-lib", - # "llvm-gsymutil": "//llvm:llvm-gsymutil-lib", - # "llvm-ifs": "//llvm:llvm-ifs-lib", - # "llvm-libtool-darwin": "//llvm:llvm-libtool-darwin-lib", - # "llvm-lipo": "//llvm:llvm-lipo-lib", - # "llvm-ml": "//llvm:llvm-ml-lib", - # "llvm-mt": "//llvm:llvm-mt-lib", + "llvm-cxxfilt": "//llvm:llvm-cxxfilt-lib", + "llvm-dwp": "//llvm:llvm-dwp-lib", + "llvm-gsymutil": "//llvm:llvm-gsymutil-lib", + "llvm-ifs": "//llvm:llvm-ifs-lib", + "llvm-libtool-darwin": "//llvm:llvm-libtool-darwin-lib", + "llvm-lipo": "//llvm:llvm-lipo-lib", + "llvm-ml": "//llvm:llvm-ml-lib", + "llvm-mt": "//llvm:llvm-mt-lib", "llvm-nm": "//llvm:llvm-nm-lib", - # "llvm-objcopy": "//llvm:llvm-objcopy-lib", - # "llvm-objdump": "//llvm:llvm-objdump-lib", - # "llvm-profdata": "//llvm:llvm-profdata-lib", - # "llvm-rc": "//llvm:llvm-rc-lib", - # "llvm-readobj": "//llvm:llvm-readobj-lib", + "llvm-objcopy": "//llvm:llvm-objcopy-lib", + "llvm-objdump": "//llvm:llvm-objdump-lib", + "llvm-profdata": "//llvm:llvm-profdata-lib", + "llvm-rc": "//llvm:llvm-rc-lib", + "llvm-readobj": "//llvm:llvm-readobj-lib", "llvm-size": "//llvm:llvm-size-lib", - # "llvm-symbolizer": "//llvm:llvm-symbolizer-lib", - # "sancov": "//llvm:sancov-lib", + "llvm-symbolizer": "//llvm:llvm-symbolizer-lib", + "sancov": "//llvm:sancov-lib", } # Tools automatically get their own name as an alias, but there may be additional diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h index b4fb2373d571f..e9385f45c5e5c 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h @@ -306,6 +306,9 @@ /* Whether tools show host and target info when invoked with --version */ #define LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 1 +/* Whether tools show optional build config flags when invoked with --version */ +#define LLVM_VERSION_PRINTER_SHOW_BUILD_CONFIG 1 + /* Define if libxml2 is supported on this platform. */ /* #undef LLVM_ENABLE_LIBXML2 */ diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index ddd3e69e6ce35..497edcfceffe4 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4275,6 +4275,7 @@ cc_library( ":AffineDialect", ":Analysis", ":ArithDialect", + ":ArithUtils", ":DialectUtils", ":FuncDialect", ":IR", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 30130131c4651..684b59e7f62f6 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -387,6 +387,8 @@ cc_library( ":TestOpsIncGen", ":TestOpsSyntaxIncGen", ":TestTypeDefsIncGen", + "//llvm:Core", + "//llvm:IRReader", "//llvm:Support", "//mlir:ArithDialect", "//mlir:BytecodeOpInterface", @@ -399,6 +401,7 @@ cc_library( "//mlir:DestinationStyleOpInterface", "//mlir:Dialect", "//mlir:DialectUtils", + "//mlir:FromLLVMIRTranslation", "//mlir:FuncDialect", "//mlir:FuncTransforms", "//mlir:FunctionInterfaces", @@ -407,6 +410,7 @@ cc_library( "//mlir:InferTypeOpInterface", "//mlir:InliningUtils", "//mlir:LLVMDialect", + "//mlir:LLVMIRToLLVMTranslation", "//mlir:LinalgDialect", "//mlir:LoopLikeInterface", "//mlir:Pass", @@ -414,6 +418,7 @@ cc_library( "//mlir:SideEffectInterfaces", "//mlir:Support", "//mlir:TensorDialect", + "//mlir:TranslateLib", "//mlir:TransformUtils", "//mlir:Transforms", "//mlir:ViewLikeInterface", diff --git a/utils/bazel/llvm_configs/config.h.cmake b/utils/bazel/llvm_configs/config.h.cmake index fc1f9bf342f8d..977c182e9d2b0 100644 --- a/utils/bazel/llvm_configs/config.h.cmake +++ b/utils/bazel/llvm_configs/config.h.cmake @@ -290,6 +290,9 @@ /* Whether tools show host and target info when invoked with --version */ #cmakedefine01 LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO +/* Whether tools show optional build config flags when invoked with --version */ +#cmakedefine01 LLVM_VERSION_PRINTER_SHOW_BUILD_CONFIG + /* Define if libxml2 is supported on this platform. */ #cmakedefine LLVM_ENABLE_LIBXML2 ${LLVM_ENABLE_LIBXML2}